1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 ARM Ltd. 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/interrupt.h> 11 #include <linux/irq.h> 12 #include <linux/irqdomain.h> 13 #include <linux/uaccess.h> 14 15 #include <clocksource/arm_arch_timer.h> 16 #include <asm/arch_timer.h> 17 #include <asm/kvm_emulate.h> 18 #include <asm/kvm_hyp.h> 19 #include <asm/kvm_nested.h> 20 21 #include <kvm/arm_vgic.h> 22 #include <kvm/arm_arch_timer.h> 23 24 #include "trace.h" 25 26 static struct timecounter *timecounter; 27 static unsigned int host_vtimer_irq; 28 static unsigned int host_ptimer_irq; 29 static u32 host_vtimer_irq_flags; 30 static u32 host_ptimer_irq_flags; 31 32 static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); 33 34 static const u8 default_ppi[] = { 35 [TIMER_PTIMER] = 30, 36 [TIMER_VTIMER] = 27, 37 [TIMER_HPTIMER] = 26, 38 [TIMER_HVTIMER] = 28, 39 }; 40 41 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); 42 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 43 struct arch_timer_context *timer_ctx); 44 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); 45 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, 46 struct arch_timer_context *timer, 47 enum kvm_arch_timer_regs treg, 48 u64 val); 49 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, 50 struct arch_timer_context *timer, 51 enum kvm_arch_timer_regs treg); 52 static bool kvm_arch_timer_get_input_level(int vintid); 53 54 static struct irq_ops arch_timer_irq_ops = { 55 .get_input_level = kvm_arch_timer_get_input_level, 56 }; 57 nr_timers(struct kvm_vcpu * vcpu)58 static int nr_timers(struct kvm_vcpu *vcpu) 59 { 60 if (!vcpu_has_nv(vcpu)) 61 return NR_KVM_EL0_TIMERS; 62 63 return NR_KVM_TIMERS; 64 } 65 timer_get_ctl(struct arch_timer_context * ctxt)66 u32 timer_get_ctl(struct arch_timer_context *ctxt) 67 { 68 struct kvm_vcpu *vcpu = ctxt->vcpu; 69 70 switch(arch_timer_ctx_index(ctxt)) { 71 case TIMER_VTIMER: 72 return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); 73 case TIMER_PTIMER: 74 return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); 75 case TIMER_HVTIMER: 76 return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2); 77 case TIMER_HPTIMER: 78 return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2); 79 default: 80 WARN_ON(1); 81 return 0; 82 } 83 } 84 timer_get_cval(struct arch_timer_context * ctxt)85 u64 timer_get_cval(struct arch_timer_context *ctxt) 86 { 87 struct kvm_vcpu *vcpu = ctxt->vcpu; 88 89 switch(arch_timer_ctx_index(ctxt)) { 90 case TIMER_VTIMER: 91 return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); 92 case TIMER_PTIMER: 93 return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); 94 case TIMER_HVTIMER: 95 return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2); 96 case TIMER_HPTIMER: 97 return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); 98 default: 99 WARN_ON(1); 100 return 0; 101 } 102 } 103 timer_get_offset(struct arch_timer_context * ctxt)104 static u64 timer_get_offset(struct arch_timer_context *ctxt) 105 { 106 u64 offset = 0; 107 108 if (!ctxt) 109 return 0; 110 111 if (ctxt->offset.vm_offset) 112 offset += *ctxt->offset.vm_offset; 113 if (ctxt->offset.vcpu_offset) 114 offset += *ctxt->offset.vcpu_offset; 115 116 return offset; 117 } 118 timer_set_ctl(struct arch_timer_context * ctxt,u32 ctl)119 static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) 120 { 121 struct kvm_vcpu *vcpu = ctxt->vcpu; 122 123 switch(arch_timer_ctx_index(ctxt)) { 124 case TIMER_VTIMER: 125 __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl; 126 break; 127 case TIMER_PTIMER: 128 __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; 129 break; 130 case TIMER_HVTIMER: 131 __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl; 132 break; 133 case TIMER_HPTIMER: 134 __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl; 135 break; 136 default: 137 WARN_ON(1); 138 } 139 } 140 timer_set_cval(struct arch_timer_context * ctxt,u64 cval)141 static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) 142 { 143 struct kvm_vcpu *vcpu = ctxt->vcpu; 144 145 switch(arch_timer_ctx_index(ctxt)) { 146 case TIMER_VTIMER: 147 __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval; 148 break; 149 case TIMER_PTIMER: 150 __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; 151 break; 152 case TIMER_HVTIMER: 153 __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval; 154 break; 155 case TIMER_HPTIMER: 156 __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval; 157 break; 158 default: 159 WARN_ON(1); 160 } 161 } 162 timer_set_offset(struct arch_timer_context * ctxt,u64 offset)163 static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) 164 { 165 if (!ctxt->offset.vm_offset) { 166 WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt)); 167 return; 168 } 169 170 WRITE_ONCE(*ctxt->offset.vm_offset, offset); 171 } 172 kvm_phys_timer_read(void)173 u64 kvm_phys_timer_read(void) 174 { 175 return timecounter->cc->read(timecounter->cc); 176 } 177 get_timer_map(struct kvm_vcpu * vcpu,struct timer_map * map)178 void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) 179 { 180 if (vcpu_has_nv(vcpu)) { 181 if (is_hyp_ctxt(vcpu)) { 182 map->direct_vtimer = vcpu_hvtimer(vcpu); 183 map->direct_ptimer = vcpu_hptimer(vcpu); 184 map->emul_vtimer = vcpu_vtimer(vcpu); 185 map->emul_ptimer = vcpu_ptimer(vcpu); 186 } else { 187 map->direct_vtimer = vcpu_vtimer(vcpu); 188 map->direct_ptimer = vcpu_ptimer(vcpu); 189 map->emul_vtimer = vcpu_hvtimer(vcpu); 190 map->emul_ptimer = vcpu_hptimer(vcpu); 191 } 192 } else if (has_vhe()) { 193 map->direct_vtimer = vcpu_vtimer(vcpu); 194 map->direct_ptimer = vcpu_ptimer(vcpu); 195 map->emul_vtimer = NULL; 196 map->emul_ptimer = NULL; 197 } else { 198 map->direct_vtimer = vcpu_vtimer(vcpu); 199 map->direct_ptimer = NULL; 200 map->emul_vtimer = NULL; 201 map->emul_ptimer = vcpu_ptimer(vcpu); 202 } 203 204 trace_kvm_get_timer_map(vcpu->vcpu_id, map); 205 } 206 userspace_irqchip(struct kvm * kvm)207 static inline bool userspace_irqchip(struct kvm *kvm) 208 { 209 return unlikely(!irqchip_in_kernel(kvm)); 210 } 211 soft_timer_start(struct hrtimer * hrt,u64 ns)212 static void soft_timer_start(struct hrtimer *hrt, u64 ns) 213 { 214 hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), 215 HRTIMER_MODE_ABS_HARD); 216 } 217 soft_timer_cancel(struct hrtimer * hrt)218 static void soft_timer_cancel(struct hrtimer *hrt) 219 { 220 hrtimer_cancel(hrt); 221 } 222 kvm_arch_timer_handler(int irq,void * dev_id)223 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) 224 { 225 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 226 struct arch_timer_context *ctx; 227 struct timer_map map; 228 229 /* 230 * We may see a timer interrupt after vcpu_put() has been called which 231 * sets the CPU's vcpu pointer to NULL, because even though the timer 232 * has been disabled in timer_save_state(), the hardware interrupt 233 * signal may not have been retired from the interrupt controller yet. 234 */ 235 if (!vcpu) 236 return IRQ_HANDLED; 237 238 get_timer_map(vcpu, &map); 239 240 if (irq == host_vtimer_irq) 241 ctx = map.direct_vtimer; 242 else 243 ctx = map.direct_ptimer; 244 245 if (kvm_timer_should_fire(ctx)) 246 kvm_timer_update_irq(vcpu, true, ctx); 247 248 if (userspace_irqchip(vcpu->kvm) && 249 !static_branch_unlikely(&has_gic_active_state)) 250 disable_percpu_irq(host_vtimer_irq); 251 252 return IRQ_HANDLED; 253 } 254 kvm_counter_compute_delta(struct arch_timer_context * timer_ctx,u64 val)255 static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx, 256 u64 val) 257 { 258 u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 259 260 if (now < val) { 261 u64 ns; 262 263 ns = cyclecounter_cyc2ns(timecounter->cc, 264 val - now, 265 timecounter->mask, 266 &timer_ctx->ns_frac); 267 return ns; 268 } 269 270 return 0; 271 } 272 kvm_timer_compute_delta(struct arch_timer_context * timer_ctx)273 static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) 274 { 275 return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx)); 276 } 277 kvm_timer_irq_can_fire(struct arch_timer_context * timer_ctx)278 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) 279 { 280 WARN_ON(timer_ctx && timer_ctx->loaded); 281 return timer_ctx && 282 ((timer_get_ctl(timer_ctx) & 283 (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE); 284 } 285 vcpu_has_wfit_active(struct kvm_vcpu * vcpu)286 static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu) 287 { 288 return (cpus_have_final_cap(ARM64_HAS_WFXT) && 289 vcpu_get_flag(vcpu, IN_WFIT)); 290 } 291 wfit_delay_ns(struct kvm_vcpu * vcpu)292 static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) 293 { 294 u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); 295 struct arch_timer_context *ctx; 296 297 ctx = (vcpu_has_nv(vcpu) && is_hyp_ctxt(vcpu)) ? vcpu_hvtimer(vcpu) 298 : vcpu_vtimer(vcpu); 299 300 return kvm_counter_compute_delta(ctx, val); 301 } 302 303 /* 304 * Returns the earliest expiration time in ns among guest timers. 305 * Note that it will return 0 if none of timers can fire. 306 */ kvm_timer_earliest_exp(struct kvm_vcpu * vcpu)307 static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) 308 { 309 u64 min_delta = ULLONG_MAX; 310 int i; 311 312 for (i = 0; i < nr_timers(vcpu); i++) { 313 struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; 314 315 WARN(ctx->loaded, "timer %d loaded\n", i); 316 if (kvm_timer_irq_can_fire(ctx)) 317 min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); 318 } 319 320 if (vcpu_has_wfit_active(vcpu)) 321 min_delta = min(min_delta, wfit_delay_ns(vcpu)); 322 323 /* If none of timers can fire, then return 0 */ 324 if (min_delta == ULLONG_MAX) 325 return 0; 326 327 return min_delta; 328 } 329 kvm_bg_timer_expire(struct hrtimer * hrt)330 static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) 331 { 332 struct arch_timer_cpu *timer; 333 struct kvm_vcpu *vcpu; 334 u64 ns; 335 336 timer = container_of(hrt, struct arch_timer_cpu, bg_timer); 337 vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); 338 339 /* 340 * Check that the timer has really expired from the guest's 341 * PoV (NTP on the host may have forced it to expire 342 * early). If we should have slept longer, restart it. 343 */ 344 ns = kvm_timer_earliest_exp(vcpu); 345 if (unlikely(ns)) { 346 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 347 return HRTIMER_RESTART; 348 } 349 350 kvm_vcpu_wake_up(vcpu); 351 return HRTIMER_NORESTART; 352 } 353 kvm_hrtimer_expire(struct hrtimer * hrt)354 static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) 355 { 356 struct arch_timer_context *ctx; 357 struct kvm_vcpu *vcpu; 358 u64 ns; 359 360 ctx = container_of(hrt, struct arch_timer_context, hrtimer); 361 vcpu = ctx->vcpu; 362 363 trace_kvm_timer_hrtimer_expire(ctx); 364 365 /* 366 * Check that the timer has really expired from the guest's 367 * PoV (NTP on the host may have forced it to expire 368 * early). If not ready, schedule for a later time. 369 */ 370 ns = kvm_timer_compute_delta(ctx); 371 if (unlikely(ns)) { 372 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 373 return HRTIMER_RESTART; 374 } 375 376 kvm_timer_update_irq(vcpu, true, ctx); 377 return HRTIMER_NORESTART; 378 } 379 kvm_timer_should_fire(struct arch_timer_context * timer_ctx)380 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) 381 { 382 enum kvm_arch_timers index; 383 u64 cval, now; 384 385 if (!timer_ctx) 386 return false; 387 388 index = arch_timer_ctx_index(timer_ctx); 389 390 if (timer_ctx->loaded) { 391 u32 cnt_ctl = 0; 392 393 switch (index) { 394 case TIMER_VTIMER: 395 case TIMER_HVTIMER: 396 cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); 397 break; 398 case TIMER_PTIMER: 399 case TIMER_HPTIMER: 400 cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); 401 break; 402 case NR_KVM_TIMERS: 403 /* GCC is braindead */ 404 cnt_ctl = 0; 405 break; 406 } 407 408 return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && 409 (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && 410 !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); 411 } 412 413 if (!kvm_timer_irq_can_fire(timer_ctx)) 414 return false; 415 416 cval = timer_get_cval(timer_ctx); 417 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 418 419 return cval <= now; 420 } 421 kvm_cpu_has_pending_timer(struct kvm_vcpu * vcpu)422 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 423 { 424 return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0; 425 } 426 427 /* 428 * Reflect the timer output level into the kvm_run structure 429 */ kvm_timer_update_run(struct kvm_vcpu * vcpu)430 void kvm_timer_update_run(struct kvm_vcpu *vcpu) 431 { 432 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 433 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 434 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 435 436 /* Populate the device bitmap with the timer states */ 437 regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | 438 KVM_ARM_DEV_EL1_PTIMER); 439 if (kvm_timer_should_fire(vtimer)) 440 regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; 441 if (kvm_timer_should_fire(ptimer)) 442 regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; 443 } 444 kvm_timer_update_irq(struct kvm_vcpu * vcpu,bool new_level,struct arch_timer_context * timer_ctx)445 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 446 struct arch_timer_context *timer_ctx) 447 { 448 int ret; 449 450 timer_ctx->irq.level = new_level; 451 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx), 452 timer_ctx->irq.level); 453 454 if (!userspace_irqchip(vcpu->kvm)) { 455 ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 456 timer_irq(timer_ctx), 457 timer_ctx->irq.level, 458 timer_ctx); 459 WARN_ON(ret); 460 } 461 } 462 463 /* Only called for a fully emulated timer */ timer_emulate(struct arch_timer_context * ctx)464 static void timer_emulate(struct arch_timer_context *ctx) 465 { 466 bool should_fire = kvm_timer_should_fire(ctx); 467 468 trace_kvm_timer_emulate(ctx, should_fire); 469 470 if (should_fire != ctx->irq.level) 471 kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); 472 473 /* 474 * If the timer can fire now, we don't need to have a soft timer 475 * scheduled for the future. If the timer cannot fire at all, 476 * then we also don't need a soft timer. 477 */ 478 if (should_fire || !kvm_timer_irq_can_fire(ctx)) 479 return; 480 481 soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx)); 482 } 483 set_cntvoff(u64 cntvoff)484 static void set_cntvoff(u64 cntvoff) 485 { 486 kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); 487 } 488 set_cntpoff(u64 cntpoff)489 static void set_cntpoff(u64 cntpoff) 490 { 491 if (has_cntpoff()) 492 write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2); 493 } 494 timer_save_state(struct arch_timer_context * ctx)495 static void timer_save_state(struct arch_timer_context *ctx) 496 { 497 struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); 498 enum kvm_arch_timers index = arch_timer_ctx_index(ctx); 499 unsigned long flags; 500 501 if (!timer->enabled) 502 return; 503 504 local_irq_save(flags); 505 506 if (!ctx->loaded) 507 goto out; 508 509 switch (index) { 510 u64 cval; 511 512 case TIMER_VTIMER: 513 case TIMER_HVTIMER: 514 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); 515 timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL)); 516 517 /* Disable the timer */ 518 write_sysreg_el0(0, SYS_CNTV_CTL); 519 isb(); 520 521 /* 522 * The kernel may decide to run userspace after 523 * calling vcpu_put, so we reset cntvoff to 0 to 524 * ensure a consistent read between user accesses to 525 * the virtual counter and kernel access to the 526 * physical counter of non-VHE case. 527 * 528 * For VHE, the virtual counter uses a fixed virtual 529 * offset of zero, so no need to zero CNTVOFF_EL2 530 * register, but this is actually useful when switching 531 * between EL1/vEL2 with NV. 532 * 533 * Do it unconditionally, as this is either unavoidable 534 * or dirt cheap. 535 */ 536 set_cntvoff(0); 537 break; 538 case TIMER_PTIMER: 539 case TIMER_HPTIMER: 540 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); 541 cval = read_sysreg_el0(SYS_CNTP_CVAL); 542 543 cval -= timer_get_offset(ctx); 544 545 timer_set_cval(ctx, cval); 546 547 /* Disable the timer */ 548 write_sysreg_el0(0, SYS_CNTP_CTL); 549 isb(); 550 551 set_cntpoff(0); 552 break; 553 case NR_KVM_TIMERS: 554 BUG(); 555 } 556 557 trace_kvm_timer_save_state(ctx); 558 559 ctx->loaded = false; 560 out: 561 local_irq_restore(flags); 562 } 563 564 /* 565 * Schedule the background timer before calling kvm_vcpu_halt, so that this 566 * thread is removed from its waitqueue and made runnable when there's a timer 567 * interrupt to handle. 568 */ kvm_timer_blocking(struct kvm_vcpu * vcpu)569 static void kvm_timer_blocking(struct kvm_vcpu *vcpu) 570 { 571 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 572 struct timer_map map; 573 574 get_timer_map(vcpu, &map); 575 576 /* 577 * If no timers are capable of raising interrupts (disabled or 578 * masked), then there's no more work for us to do. 579 */ 580 if (!kvm_timer_irq_can_fire(map.direct_vtimer) && 581 !kvm_timer_irq_can_fire(map.direct_ptimer) && 582 !kvm_timer_irq_can_fire(map.emul_vtimer) && 583 !kvm_timer_irq_can_fire(map.emul_ptimer) && 584 !vcpu_has_wfit_active(vcpu)) 585 return; 586 587 /* 588 * At least one guest time will expire. Schedule a background timer. 589 * Set the earliest expiration time among the guest timers. 590 */ 591 soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); 592 } 593 kvm_timer_unblocking(struct kvm_vcpu * vcpu)594 static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) 595 { 596 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 597 598 soft_timer_cancel(&timer->bg_timer); 599 } 600 timer_restore_state(struct arch_timer_context * ctx)601 static void timer_restore_state(struct arch_timer_context *ctx) 602 { 603 struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); 604 enum kvm_arch_timers index = arch_timer_ctx_index(ctx); 605 unsigned long flags; 606 607 if (!timer->enabled) 608 return; 609 610 local_irq_save(flags); 611 612 if (ctx->loaded) 613 goto out; 614 615 switch (index) { 616 u64 cval, offset; 617 618 case TIMER_VTIMER: 619 case TIMER_HVTIMER: 620 set_cntvoff(timer_get_offset(ctx)); 621 write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL); 622 isb(); 623 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); 624 break; 625 case TIMER_PTIMER: 626 case TIMER_HPTIMER: 627 cval = timer_get_cval(ctx); 628 offset = timer_get_offset(ctx); 629 set_cntpoff(offset); 630 cval += offset; 631 write_sysreg_el0(cval, SYS_CNTP_CVAL); 632 isb(); 633 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); 634 break; 635 case NR_KVM_TIMERS: 636 BUG(); 637 } 638 639 trace_kvm_timer_restore_state(ctx); 640 641 ctx->loaded = true; 642 out: 643 local_irq_restore(flags); 644 } 645 set_timer_irq_phys_active(struct arch_timer_context * ctx,bool active)646 static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) 647 { 648 int r; 649 r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active); 650 WARN_ON(r); 651 } 652 kvm_timer_vcpu_load_gic(struct arch_timer_context * ctx)653 static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) 654 { 655 struct kvm_vcpu *vcpu = ctx->vcpu; 656 bool phys_active = false; 657 658 /* 659 * Update the timer output so that it is likely to match the 660 * state we're about to restore. If the timer expires between 661 * this point and the register restoration, we'll take the 662 * interrupt anyway. 663 */ 664 kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); 665 666 if (irqchip_in_kernel(vcpu->kvm)) 667 phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); 668 669 phys_active |= ctx->irq.level; 670 671 set_timer_irq_phys_active(ctx, phys_active); 672 } 673 kvm_timer_vcpu_load_nogic(struct kvm_vcpu * vcpu)674 static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) 675 { 676 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 677 678 /* 679 * Update the timer output so that it is likely to match the 680 * state we're about to restore. If the timer expires between 681 * this point and the register restoration, we'll take the 682 * interrupt anyway. 683 */ 684 kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer); 685 686 /* 687 * When using a userspace irqchip with the architected timers and a 688 * host interrupt controller that doesn't support an active state, we 689 * must still prevent continuously exiting from the guest, and 690 * therefore mask the physical interrupt by disabling it on the host 691 * interrupt controller when the virtual level is high, such that the 692 * guest can make forward progress. Once we detect the output level 693 * being de-asserted, we unmask the interrupt again so that we exit 694 * from the guest when the timer fires. 695 */ 696 if (vtimer->irq.level) 697 disable_percpu_irq(host_vtimer_irq); 698 else 699 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 700 } 701 702 /* If _pred is true, set bit in _set, otherwise set it in _clr */ 703 #define assign_clear_set_bit(_pred, _bit, _clr, _set) \ 704 do { \ 705 if (_pred) \ 706 (_set) |= (_bit); \ 707 else \ 708 (_clr) |= (_bit); \ 709 } while (0) 710 kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu * vcpu,struct timer_map * map)711 static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, 712 struct timer_map *map) 713 { 714 int hw, ret; 715 716 if (!irqchip_in_kernel(vcpu->kvm)) 717 return; 718 719 /* 720 * We only ever unmap the vtimer irq on a VHE system that runs nested 721 * virtualization, in which case we have both a valid emul_vtimer, 722 * emul_ptimer, direct_vtimer, and direct_ptimer. 723 * 724 * Since this is called from kvm_timer_vcpu_load(), a change between 725 * vEL2 and vEL1/0 will have just happened, and the timer_map will 726 * represent this, and therefore we switch the emul/direct mappings 727 * below. 728 */ 729 hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer)); 730 if (hw < 0) { 731 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer)); 732 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer)); 733 734 ret = kvm_vgic_map_phys_irq(vcpu, 735 map->direct_vtimer->host_timer_irq, 736 timer_irq(map->direct_vtimer), 737 &arch_timer_irq_ops); 738 WARN_ON_ONCE(ret); 739 ret = kvm_vgic_map_phys_irq(vcpu, 740 map->direct_ptimer->host_timer_irq, 741 timer_irq(map->direct_ptimer), 742 &arch_timer_irq_ops); 743 WARN_ON_ONCE(ret); 744 745 /* 746 * The virtual offset behaviour is "interresting", as it 747 * always applies when HCR_EL2.E2H==0, but only when 748 * accessed from EL1 when HCR_EL2.E2H==1. So make sure we 749 * track E2H when putting the HV timer in "direct" mode. 750 */ 751 if (map->direct_vtimer == vcpu_hvtimer(vcpu)) { 752 struct arch_timer_offset *offs = &map->direct_vtimer->offset; 753 754 if (vcpu_el2_e2h_is_set(vcpu)) 755 offs->vcpu_offset = NULL; 756 else 757 offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); 758 } 759 } 760 } 761 timer_set_traps(struct kvm_vcpu * vcpu,struct timer_map * map)762 static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) 763 { 764 bool tpt, tpc; 765 u64 clr, set; 766 767 /* 768 * No trapping gets configured here with nVHE. See 769 * __timer_enable_traps(), which is where the stuff happens. 770 */ 771 if (!has_vhe()) 772 return; 773 774 /* 775 * Our default policy is not to trap anything. As we progress 776 * within this function, reality kicks in and we start adding 777 * traps based on emulation requirements. 778 */ 779 tpt = tpc = false; 780 781 /* 782 * We have two possibility to deal with a physical offset: 783 * 784 * - Either we have CNTPOFF (yay!) or the offset is 0: 785 * we let the guest freely access the HW 786 * 787 * - or neither of these condition apply: 788 * we trap accesses to the HW, but still use it 789 * after correcting the physical offset 790 */ 791 if (!has_cntpoff() && timer_get_offset(map->direct_ptimer)) 792 tpt = tpc = true; 793 794 /* 795 * Apply the enable bits that the guest hypervisor has requested for 796 * its own guest. We can only add traps that wouldn't have been set 797 * above. 798 */ 799 if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { 800 u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); 801 802 /* Use the VHE format for mental sanity */ 803 if (!vcpu_el2_e2h_is_set(vcpu)) 804 val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10; 805 806 tpt |= !(val & (CNTHCTL_EL1PCEN << 10)); 807 tpc |= !(val & (CNTHCTL_EL1PCTEN << 10)); 808 } 809 810 /* 811 * Now that we have collected our requirements, compute the 812 * trap and enable bits. 813 */ 814 set = 0; 815 clr = 0; 816 817 assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr); 818 assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr); 819 820 /* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */ 821 sysreg_clear_set(cnthctl_el2, clr, set); 822 } 823 kvm_timer_vcpu_load(struct kvm_vcpu * vcpu)824 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) 825 { 826 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 827 struct timer_map map; 828 829 if (unlikely(!timer->enabled)) 830 return; 831 832 get_timer_map(vcpu, &map); 833 834 if (static_branch_likely(&has_gic_active_state)) { 835 if (vcpu_has_nv(vcpu)) 836 kvm_timer_vcpu_load_nested_switch(vcpu, &map); 837 838 kvm_timer_vcpu_load_gic(map.direct_vtimer); 839 if (map.direct_ptimer) 840 kvm_timer_vcpu_load_gic(map.direct_ptimer); 841 } else { 842 kvm_timer_vcpu_load_nogic(vcpu); 843 } 844 845 kvm_timer_unblocking(vcpu); 846 847 timer_restore_state(map.direct_vtimer); 848 if (map.direct_ptimer) 849 timer_restore_state(map.direct_ptimer); 850 if (map.emul_vtimer) 851 timer_emulate(map.emul_vtimer); 852 if (map.emul_ptimer) 853 timer_emulate(map.emul_ptimer); 854 855 timer_set_traps(vcpu, &map); 856 } 857 kvm_timer_should_notify_user(struct kvm_vcpu * vcpu)858 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) 859 { 860 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 861 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 862 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 863 bool vlevel, plevel; 864 865 if (likely(irqchip_in_kernel(vcpu->kvm))) 866 return false; 867 868 vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; 869 plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; 870 871 return kvm_timer_should_fire(vtimer) != vlevel || 872 kvm_timer_should_fire(ptimer) != plevel; 873 } 874 kvm_timer_vcpu_put(struct kvm_vcpu * vcpu)875 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) 876 { 877 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 878 struct timer_map map; 879 880 if (unlikely(!timer->enabled)) 881 return; 882 883 get_timer_map(vcpu, &map); 884 885 timer_save_state(map.direct_vtimer); 886 if (map.direct_ptimer) 887 timer_save_state(map.direct_ptimer); 888 889 /* 890 * Cancel soft timer emulation, because the only case where we 891 * need it after a vcpu_put is in the context of a sleeping VCPU, and 892 * in that case we already factor in the deadline for the physical 893 * timer when scheduling the bg_timer. 894 * 895 * In any case, we re-schedule the hrtimer for the physical timer when 896 * coming back to the VCPU thread in kvm_timer_vcpu_load(). 897 */ 898 if (map.emul_vtimer) 899 soft_timer_cancel(&map.emul_vtimer->hrtimer); 900 if (map.emul_ptimer) 901 soft_timer_cancel(&map.emul_ptimer->hrtimer); 902 903 if (kvm_vcpu_is_blocking(vcpu)) 904 kvm_timer_blocking(vcpu); 905 } 906 907 /* 908 * With a userspace irqchip we have to check if the guest de-asserted the 909 * timer and if so, unmask the timer irq signal on the host interrupt 910 * controller to ensure that we see future timer signals. 911 */ unmask_vtimer_irq_user(struct kvm_vcpu * vcpu)912 static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) 913 { 914 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 915 916 if (!kvm_timer_should_fire(vtimer)) { 917 kvm_timer_update_irq(vcpu, false, vtimer); 918 if (static_branch_likely(&has_gic_active_state)) 919 set_timer_irq_phys_active(vtimer, false); 920 else 921 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 922 } 923 } 924 kvm_timer_sync_user(struct kvm_vcpu * vcpu)925 void kvm_timer_sync_user(struct kvm_vcpu *vcpu) 926 { 927 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 928 929 if (unlikely(!timer->enabled)) 930 return; 931 932 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 933 unmask_vtimer_irq_user(vcpu); 934 } 935 kvm_timer_vcpu_reset(struct kvm_vcpu * vcpu)936 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) 937 { 938 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 939 struct timer_map map; 940 941 get_timer_map(vcpu, &map); 942 943 /* 944 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 945 * and to 0 for ARMv7. We provide an implementation that always 946 * resets the timer to be disabled and unmasked and is compliant with 947 * the ARMv7 architecture. 948 */ 949 for (int i = 0; i < nr_timers(vcpu); i++) 950 timer_set_ctl(vcpu_get_timer(vcpu, i), 0); 951 952 /* 953 * A vcpu running at EL2 is in charge of the offset applied to 954 * the virtual timer, so use the physical VM offset, and point 955 * the vcpu offset to CNTVOFF_EL2. 956 */ 957 if (vcpu_has_nv(vcpu)) { 958 struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset; 959 960 offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); 961 offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset; 962 } 963 964 if (timer->enabled) { 965 for (int i = 0; i < nr_timers(vcpu); i++) 966 kvm_timer_update_irq(vcpu, false, 967 vcpu_get_timer(vcpu, i)); 968 969 if (irqchip_in_kernel(vcpu->kvm)) { 970 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer)); 971 if (map.direct_ptimer) 972 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer)); 973 } 974 } 975 976 if (map.emul_vtimer) 977 soft_timer_cancel(&map.emul_vtimer->hrtimer); 978 if (map.emul_ptimer) 979 soft_timer_cancel(&map.emul_ptimer->hrtimer); 980 981 return 0; 982 } 983 timer_context_init(struct kvm_vcpu * vcpu,int timerid)984 static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) 985 { 986 struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); 987 struct kvm *kvm = vcpu->kvm; 988 989 ctxt->vcpu = vcpu; 990 991 if (timerid == TIMER_VTIMER) 992 ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; 993 else 994 ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; 995 996 hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); 997 ctxt->hrtimer.function = kvm_hrtimer_expire; 998 999 switch (timerid) { 1000 case TIMER_PTIMER: 1001 case TIMER_HPTIMER: 1002 ctxt->host_timer_irq = host_ptimer_irq; 1003 break; 1004 case TIMER_VTIMER: 1005 case TIMER_HVTIMER: 1006 ctxt->host_timer_irq = host_vtimer_irq; 1007 break; 1008 } 1009 } 1010 kvm_timer_vcpu_init(struct kvm_vcpu * vcpu)1011 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) 1012 { 1013 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1014 1015 for (int i = 0; i < NR_KVM_TIMERS; i++) 1016 timer_context_init(vcpu, i); 1017 1018 /* Synchronize offsets across timers of a VM if not already provided */ 1019 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { 1020 timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); 1021 timer_set_offset(vcpu_ptimer(vcpu), 0); 1022 } 1023 1024 hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); 1025 timer->bg_timer.function = kvm_bg_timer_expire; 1026 } 1027 kvm_timer_init_vm(struct kvm * kvm)1028 void kvm_timer_init_vm(struct kvm *kvm) 1029 { 1030 for (int i = 0; i < NR_KVM_TIMERS; i++) 1031 kvm->arch.timer_data.ppi[i] = default_ppi[i]; 1032 } 1033 kvm_timer_cpu_up(void)1034 void kvm_timer_cpu_up(void) 1035 { 1036 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 1037 if (host_ptimer_irq) 1038 enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); 1039 } 1040 kvm_timer_cpu_down(void)1041 void kvm_timer_cpu_down(void) 1042 { 1043 disable_percpu_irq(host_vtimer_irq); 1044 if (host_ptimer_irq) 1045 disable_percpu_irq(host_ptimer_irq); 1046 } 1047 kvm_arm_timer_set_reg(struct kvm_vcpu * vcpu,u64 regid,u64 value)1048 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) 1049 { 1050 struct arch_timer_context *timer; 1051 1052 switch (regid) { 1053 case KVM_REG_ARM_TIMER_CTL: 1054 timer = vcpu_vtimer(vcpu); 1055 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); 1056 break; 1057 case KVM_REG_ARM_TIMER_CNT: 1058 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, 1059 &vcpu->kvm->arch.flags)) { 1060 timer = vcpu_vtimer(vcpu); 1061 timer_set_offset(timer, kvm_phys_timer_read() - value); 1062 } 1063 break; 1064 case KVM_REG_ARM_TIMER_CVAL: 1065 timer = vcpu_vtimer(vcpu); 1066 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); 1067 break; 1068 case KVM_REG_ARM_PTIMER_CTL: 1069 timer = vcpu_ptimer(vcpu); 1070 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); 1071 break; 1072 case KVM_REG_ARM_PTIMER_CNT: 1073 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, 1074 &vcpu->kvm->arch.flags)) { 1075 timer = vcpu_ptimer(vcpu); 1076 timer_set_offset(timer, kvm_phys_timer_read() - value); 1077 } 1078 break; 1079 case KVM_REG_ARM_PTIMER_CVAL: 1080 timer = vcpu_ptimer(vcpu); 1081 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); 1082 break; 1083 1084 default: 1085 return -1; 1086 } 1087 1088 return 0; 1089 } 1090 read_timer_ctl(struct arch_timer_context * timer)1091 static u64 read_timer_ctl(struct arch_timer_context *timer) 1092 { 1093 /* 1094 * Set ISTATUS bit if it's expired. 1095 * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is 1096 * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit 1097 * regardless of ENABLE bit for our implementation convenience. 1098 */ 1099 u32 ctl = timer_get_ctl(timer); 1100 1101 if (!kvm_timer_compute_delta(timer)) 1102 ctl |= ARCH_TIMER_CTRL_IT_STAT; 1103 1104 return ctl; 1105 } 1106 kvm_arm_timer_get_reg(struct kvm_vcpu * vcpu,u64 regid)1107 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) 1108 { 1109 switch (regid) { 1110 case KVM_REG_ARM_TIMER_CTL: 1111 return kvm_arm_timer_read(vcpu, 1112 vcpu_vtimer(vcpu), TIMER_REG_CTL); 1113 case KVM_REG_ARM_TIMER_CNT: 1114 return kvm_arm_timer_read(vcpu, 1115 vcpu_vtimer(vcpu), TIMER_REG_CNT); 1116 case KVM_REG_ARM_TIMER_CVAL: 1117 return kvm_arm_timer_read(vcpu, 1118 vcpu_vtimer(vcpu), TIMER_REG_CVAL); 1119 case KVM_REG_ARM_PTIMER_CTL: 1120 return kvm_arm_timer_read(vcpu, 1121 vcpu_ptimer(vcpu), TIMER_REG_CTL); 1122 case KVM_REG_ARM_PTIMER_CNT: 1123 return kvm_arm_timer_read(vcpu, 1124 vcpu_ptimer(vcpu), TIMER_REG_CNT); 1125 case KVM_REG_ARM_PTIMER_CVAL: 1126 return kvm_arm_timer_read(vcpu, 1127 vcpu_ptimer(vcpu), TIMER_REG_CVAL); 1128 } 1129 return (u64)-1; 1130 } 1131 kvm_arm_timer_read(struct kvm_vcpu * vcpu,struct arch_timer_context * timer,enum kvm_arch_timer_regs treg)1132 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, 1133 struct arch_timer_context *timer, 1134 enum kvm_arch_timer_regs treg) 1135 { 1136 u64 val; 1137 1138 switch (treg) { 1139 case TIMER_REG_TVAL: 1140 val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer); 1141 val = lower_32_bits(val); 1142 break; 1143 1144 case TIMER_REG_CTL: 1145 val = read_timer_ctl(timer); 1146 break; 1147 1148 case TIMER_REG_CVAL: 1149 val = timer_get_cval(timer); 1150 break; 1151 1152 case TIMER_REG_CNT: 1153 val = kvm_phys_timer_read() - timer_get_offset(timer); 1154 break; 1155 1156 case TIMER_REG_VOFF: 1157 val = *timer->offset.vcpu_offset; 1158 break; 1159 1160 default: 1161 BUG(); 1162 } 1163 1164 return val; 1165 } 1166 kvm_arm_timer_read_sysreg(struct kvm_vcpu * vcpu,enum kvm_arch_timers tmr,enum kvm_arch_timer_regs treg)1167 u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, 1168 enum kvm_arch_timers tmr, 1169 enum kvm_arch_timer_regs treg) 1170 { 1171 struct arch_timer_context *timer; 1172 struct timer_map map; 1173 u64 val; 1174 1175 get_timer_map(vcpu, &map); 1176 timer = vcpu_get_timer(vcpu, tmr); 1177 1178 if (timer == map.emul_vtimer || timer == map.emul_ptimer) 1179 return kvm_arm_timer_read(vcpu, timer, treg); 1180 1181 preempt_disable(); 1182 timer_save_state(timer); 1183 1184 val = kvm_arm_timer_read(vcpu, timer, treg); 1185 1186 timer_restore_state(timer); 1187 preempt_enable(); 1188 1189 return val; 1190 } 1191 kvm_arm_timer_write(struct kvm_vcpu * vcpu,struct arch_timer_context * timer,enum kvm_arch_timer_regs treg,u64 val)1192 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, 1193 struct arch_timer_context *timer, 1194 enum kvm_arch_timer_regs treg, 1195 u64 val) 1196 { 1197 switch (treg) { 1198 case TIMER_REG_TVAL: 1199 timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val); 1200 break; 1201 1202 case TIMER_REG_CTL: 1203 timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT); 1204 break; 1205 1206 case TIMER_REG_CVAL: 1207 timer_set_cval(timer, val); 1208 break; 1209 1210 case TIMER_REG_VOFF: 1211 *timer->offset.vcpu_offset = val; 1212 break; 1213 1214 default: 1215 BUG(); 1216 } 1217 } 1218 kvm_arm_timer_write_sysreg(struct kvm_vcpu * vcpu,enum kvm_arch_timers tmr,enum kvm_arch_timer_regs treg,u64 val)1219 void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, 1220 enum kvm_arch_timers tmr, 1221 enum kvm_arch_timer_regs treg, 1222 u64 val) 1223 { 1224 struct arch_timer_context *timer; 1225 struct timer_map map; 1226 1227 get_timer_map(vcpu, &map); 1228 timer = vcpu_get_timer(vcpu, tmr); 1229 if (timer == map.emul_vtimer || timer == map.emul_ptimer) { 1230 soft_timer_cancel(&timer->hrtimer); 1231 kvm_arm_timer_write(vcpu, timer, treg, val); 1232 timer_emulate(timer); 1233 } else { 1234 preempt_disable(); 1235 timer_save_state(timer); 1236 kvm_arm_timer_write(vcpu, timer, treg, val); 1237 timer_restore_state(timer); 1238 preempt_enable(); 1239 } 1240 } 1241 timer_irq_set_vcpu_affinity(struct irq_data * d,void * vcpu)1242 static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) 1243 { 1244 if (vcpu) 1245 irqd_set_forwarded_to_vcpu(d); 1246 else 1247 irqd_clr_forwarded_to_vcpu(d); 1248 1249 return 0; 1250 } 1251 timer_irq_set_irqchip_state(struct irq_data * d,enum irqchip_irq_state which,bool val)1252 static int timer_irq_set_irqchip_state(struct irq_data *d, 1253 enum irqchip_irq_state which, bool val) 1254 { 1255 if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d)) 1256 return irq_chip_set_parent_state(d, which, val); 1257 1258 if (val) 1259 irq_chip_mask_parent(d); 1260 else 1261 irq_chip_unmask_parent(d); 1262 1263 return 0; 1264 } 1265 timer_irq_eoi(struct irq_data * d)1266 static void timer_irq_eoi(struct irq_data *d) 1267 { 1268 if (!irqd_is_forwarded_to_vcpu(d)) 1269 irq_chip_eoi_parent(d); 1270 } 1271 timer_irq_ack(struct irq_data * d)1272 static void timer_irq_ack(struct irq_data *d) 1273 { 1274 d = d->parent_data; 1275 if (d->chip->irq_ack) 1276 d->chip->irq_ack(d); 1277 } 1278 1279 static struct irq_chip timer_chip = { 1280 .name = "KVM", 1281 .irq_ack = timer_irq_ack, 1282 .irq_mask = irq_chip_mask_parent, 1283 .irq_unmask = irq_chip_unmask_parent, 1284 .irq_eoi = timer_irq_eoi, 1285 .irq_set_type = irq_chip_set_type_parent, 1286 .irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity, 1287 .irq_set_irqchip_state = timer_irq_set_irqchip_state, 1288 }; 1289 timer_irq_domain_alloc(struct irq_domain * domain,unsigned int virq,unsigned int nr_irqs,void * arg)1290 static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, 1291 unsigned int nr_irqs, void *arg) 1292 { 1293 irq_hw_number_t hwirq = (uintptr_t)arg; 1294 1295 return irq_domain_set_hwirq_and_chip(domain, virq, hwirq, 1296 &timer_chip, NULL); 1297 } 1298 timer_irq_domain_free(struct irq_domain * domain,unsigned int virq,unsigned int nr_irqs)1299 static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq, 1300 unsigned int nr_irqs) 1301 { 1302 } 1303 1304 static const struct irq_domain_ops timer_domain_ops = { 1305 .alloc = timer_irq_domain_alloc, 1306 .free = timer_irq_domain_free, 1307 }; 1308 kvm_irq_fixup_flags(unsigned int virq,u32 * flags)1309 static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) 1310 { 1311 *flags = irq_get_trigger_type(virq); 1312 if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) { 1313 kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n", 1314 virq); 1315 *flags = IRQF_TRIGGER_LOW; 1316 } 1317 } 1318 kvm_irq_init(struct arch_timer_kvm_info * info)1319 static int kvm_irq_init(struct arch_timer_kvm_info *info) 1320 { 1321 struct irq_domain *domain = NULL; 1322 1323 if (info->virtual_irq <= 0) { 1324 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", 1325 info->virtual_irq); 1326 return -ENODEV; 1327 } 1328 1329 host_vtimer_irq = info->virtual_irq; 1330 kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags); 1331 1332 if (kvm_vgic_global_state.no_hw_deactivation) { 1333 struct fwnode_handle *fwnode; 1334 struct irq_data *data; 1335 1336 fwnode = irq_domain_alloc_named_fwnode("kvm-timer"); 1337 if (!fwnode) 1338 return -ENOMEM; 1339 1340 /* Assume both vtimer and ptimer in the same parent */ 1341 data = irq_get_irq_data(host_vtimer_irq); 1342 domain = irq_domain_create_hierarchy(data->domain, 0, 1343 NR_KVM_TIMERS, fwnode, 1344 &timer_domain_ops, NULL); 1345 if (!domain) { 1346 irq_domain_free_fwnode(fwnode); 1347 return -ENOMEM; 1348 } 1349 1350 arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE; 1351 WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq, 1352 (void *)TIMER_VTIMER)); 1353 } 1354 1355 if (info->physical_irq > 0) { 1356 host_ptimer_irq = info->physical_irq; 1357 kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags); 1358 1359 if (domain) 1360 WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq, 1361 (void *)TIMER_PTIMER)); 1362 } 1363 1364 return 0; 1365 } 1366 kvm_timer_hyp_init(bool has_gic)1367 int __init kvm_timer_hyp_init(bool has_gic) 1368 { 1369 struct arch_timer_kvm_info *info; 1370 int err; 1371 1372 info = arch_timer_get_kvm_info(); 1373 timecounter = &info->timecounter; 1374 1375 if (!timecounter->cc) { 1376 kvm_err("kvm_arch_timer: uninitialized timecounter\n"); 1377 return -ENODEV; 1378 } 1379 1380 err = kvm_irq_init(info); 1381 if (err) 1382 return err; 1383 1384 /* First, do the virtual EL1 timer irq */ 1385 1386 err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, 1387 "kvm guest vtimer", kvm_get_running_vcpus()); 1388 if (err) { 1389 kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n", 1390 host_vtimer_irq, err); 1391 return err; 1392 } 1393 1394 if (has_gic) { 1395 err = irq_set_vcpu_affinity(host_vtimer_irq, 1396 kvm_get_running_vcpus()); 1397 if (err) { 1398 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 1399 goto out_free_vtimer_irq; 1400 } 1401 1402 static_branch_enable(&has_gic_active_state); 1403 } 1404 1405 kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); 1406 1407 /* Now let's do the physical EL1 timer irq */ 1408 1409 if (info->physical_irq > 0) { 1410 err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, 1411 "kvm guest ptimer", kvm_get_running_vcpus()); 1412 if (err) { 1413 kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n", 1414 host_ptimer_irq, err); 1415 goto out_free_vtimer_irq; 1416 } 1417 1418 if (has_gic) { 1419 err = irq_set_vcpu_affinity(host_ptimer_irq, 1420 kvm_get_running_vcpus()); 1421 if (err) { 1422 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 1423 goto out_free_ptimer_irq; 1424 } 1425 } 1426 1427 kvm_debug("physical timer IRQ%d\n", host_ptimer_irq); 1428 } else if (has_vhe()) { 1429 kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n", 1430 info->physical_irq); 1431 err = -ENODEV; 1432 goto out_free_vtimer_irq; 1433 } 1434 1435 return 0; 1436 1437 out_free_ptimer_irq: 1438 if (info->physical_irq > 0) 1439 free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus()); 1440 out_free_vtimer_irq: 1441 free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus()); 1442 return err; 1443 } 1444 kvm_timer_vcpu_terminate(struct kvm_vcpu * vcpu)1445 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) 1446 { 1447 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1448 1449 soft_timer_cancel(&timer->bg_timer); 1450 } 1451 timer_irqs_are_valid(struct kvm_vcpu * vcpu)1452 static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) 1453 { 1454 u32 ppis = 0; 1455 bool valid; 1456 1457 mutex_lock(&vcpu->kvm->arch.config_lock); 1458 1459 for (int i = 0; i < nr_timers(vcpu); i++) { 1460 struct arch_timer_context *ctx; 1461 int irq; 1462 1463 ctx = vcpu_get_timer(vcpu, i); 1464 irq = timer_irq(ctx); 1465 if (kvm_vgic_set_owner(vcpu, irq, ctx)) 1466 break; 1467 1468 /* 1469 * We know by construction that we only have PPIs, so 1470 * all values are less than 32. 1471 */ 1472 ppis |= BIT(irq); 1473 } 1474 1475 valid = hweight32(ppis) == nr_timers(vcpu); 1476 1477 if (valid) 1478 set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); 1479 1480 mutex_unlock(&vcpu->kvm->arch.config_lock); 1481 1482 return valid; 1483 } 1484 kvm_arch_timer_get_input_level(int vintid)1485 static bool kvm_arch_timer_get_input_level(int vintid) 1486 { 1487 struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); 1488 1489 if (WARN(!vcpu, "No vcpu context!\n")) 1490 return false; 1491 1492 for (int i = 0; i < nr_timers(vcpu); i++) { 1493 struct arch_timer_context *ctx; 1494 1495 ctx = vcpu_get_timer(vcpu, i); 1496 if (timer_irq(ctx) == vintid) 1497 return kvm_timer_should_fire(ctx); 1498 } 1499 1500 /* A timer IRQ has fired, but no matching timer was found? */ 1501 WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid); 1502 1503 return false; 1504 } 1505 kvm_timer_enable(struct kvm_vcpu * vcpu)1506 int kvm_timer_enable(struct kvm_vcpu *vcpu) 1507 { 1508 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1509 struct timer_map map; 1510 int ret; 1511 1512 if (timer->enabled) 1513 return 0; 1514 1515 /* Without a VGIC we do not map virtual IRQs to physical IRQs */ 1516 if (!irqchip_in_kernel(vcpu->kvm)) 1517 goto no_vgic; 1518 1519 /* 1520 * At this stage, we have the guarantee that the vgic is both 1521 * available and initialized. 1522 */ 1523 if (!timer_irqs_are_valid(vcpu)) { 1524 kvm_debug("incorrectly configured timer irqs\n"); 1525 return -EINVAL; 1526 } 1527 1528 get_timer_map(vcpu, &map); 1529 1530 ret = kvm_vgic_map_phys_irq(vcpu, 1531 map.direct_vtimer->host_timer_irq, 1532 timer_irq(map.direct_vtimer), 1533 &arch_timer_irq_ops); 1534 if (ret) 1535 return ret; 1536 1537 if (map.direct_ptimer) { 1538 ret = kvm_vgic_map_phys_irq(vcpu, 1539 map.direct_ptimer->host_timer_irq, 1540 timer_irq(map.direct_ptimer), 1541 &arch_timer_irq_ops); 1542 } 1543 1544 if (ret) 1545 return ret; 1546 1547 no_vgic: 1548 timer->enabled = 1; 1549 return 0; 1550 } 1551 1552 /* If we have CNTPOFF, permanently set ECV to enable it */ kvm_timer_init_vhe(void)1553 void kvm_timer_init_vhe(void) 1554 { 1555 if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)) 1556 sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV); 1557 } 1558 kvm_arm_timer_set_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1559 int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1560 { 1561 int __user *uaddr = (int __user *)(long)attr->addr; 1562 int irq, idx, ret = 0; 1563 1564 if (!irqchip_in_kernel(vcpu->kvm)) 1565 return -EINVAL; 1566 1567 if (get_user(irq, uaddr)) 1568 return -EFAULT; 1569 1570 if (!(irq_is_ppi(irq))) 1571 return -EINVAL; 1572 1573 mutex_lock(&vcpu->kvm->arch.config_lock); 1574 1575 if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, 1576 &vcpu->kvm->arch.flags)) { 1577 ret = -EBUSY; 1578 goto out; 1579 } 1580 1581 switch (attr->attr) { 1582 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1583 idx = TIMER_VTIMER; 1584 break; 1585 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1586 idx = TIMER_PTIMER; 1587 break; 1588 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1589 idx = TIMER_HVTIMER; 1590 break; 1591 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1592 idx = TIMER_HPTIMER; 1593 break; 1594 default: 1595 ret = -ENXIO; 1596 goto out; 1597 } 1598 1599 /* 1600 * We cannot validate the IRQ unicity before we run, so take it at 1601 * face value. The verdict will be given on first vcpu run, for each 1602 * vcpu. Yes this is late. Blame it on the stupid API. 1603 */ 1604 vcpu->kvm->arch.timer_data.ppi[idx] = irq; 1605 1606 out: 1607 mutex_unlock(&vcpu->kvm->arch.config_lock); 1608 return ret; 1609 } 1610 kvm_arm_timer_get_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1611 int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1612 { 1613 int __user *uaddr = (int __user *)(long)attr->addr; 1614 struct arch_timer_context *timer; 1615 int irq; 1616 1617 switch (attr->attr) { 1618 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1619 timer = vcpu_vtimer(vcpu); 1620 break; 1621 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1622 timer = vcpu_ptimer(vcpu); 1623 break; 1624 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1625 timer = vcpu_hvtimer(vcpu); 1626 break; 1627 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1628 timer = vcpu_hptimer(vcpu); 1629 break; 1630 default: 1631 return -ENXIO; 1632 } 1633 1634 irq = timer_irq(timer); 1635 return put_user(irq, uaddr); 1636 } 1637 kvm_arm_timer_has_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1638 int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1639 { 1640 switch (attr->attr) { 1641 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1642 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1643 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1644 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1645 return 0; 1646 } 1647 1648 return -ENXIO; 1649 } 1650 kvm_vm_ioctl_set_counter_offset(struct kvm * kvm,struct kvm_arm_counter_offset * offset)1651 int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, 1652 struct kvm_arm_counter_offset *offset) 1653 { 1654 int ret = 0; 1655 1656 if (offset->reserved) 1657 return -EINVAL; 1658 1659 mutex_lock(&kvm->lock); 1660 1661 if (lock_all_vcpus(kvm)) { 1662 set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags); 1663 1664 /* 1665 * If userspace decides to set the offset using this 1666 * API rather than merely restoring the counter 1667 * values, the offset applies to both the virtual and 1668 * physical views. 1669 */ 1670 kvm->arch.timer_data.voffset = offset->counter_offset; 1671 kvm->arch.timer_data.poffset = offset->counter_offset; 1672 1673 unlock_all_vcpus(kvm); 1674 } else { 1675 ret = -EBUSY; 1676 } 1677 1678 mutex_unlock(&kvm->lock); 1679 1680 return ret; 1681 } 1682