1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Clocksource driver for the synthetic counter and timers 5 * provided by the Hyper-V hypervisor to guest VMs, as described 6 * in the Hyper-V Top Level Functional Spec (TLFS). This driver 7 * is instruction set architecture independent. 8 * 9 * Copyright (C) 2019, Microsoft, Inc. 10 * 11 * Author: Michael Kelley <mikelley@microsoft.com> 12 */ 13 14 #include <linux/percpu.h> 15 #include <linux/cpumask.h> 16 #include <linux/clockchips.h> 17 #include <linux/clocksource.h> 18 #include <linux/sched_clock.h> 19 #include <linux/mm.h> 20 #include <linux/cpuhotplug.h> 21 #include <clocksource/hyperv_timer.h> 22 #include <asm/hyperv-tlfs.h> 23 #include <asm/mshyperv.h> 24 25 static struct clock_event_device __percpu *hv_clock_event; 26 static u64 hv_sched_clock_offset __ro_after_init; 27 28 /* 29 * If false, we're using the old mechanism for stimer0 interrupts 30 * where it sends a VMbus message when it expires. The old 31 * mechanism is used when running on older versions of Hyper-V 32 * that don't support Direct Mode. While Hyper-V provides 33 * four stimer's per CPU, Linux uses only stimer0. 34 * 35 * Because Direct Mode does not require processing a VMbus 36 * message, stimer interrupts can be enabled earlier in the 37 * process of booting a CPU, and consistent with when timer 38 * interrupts are enabled for other clocksource drivers. 39 * However, for legacy versions of Hyper-V when Direct Mode 40 * is not enabled, setting up stimer interrupts must be 41 * delayed until VMbus is initialized and can process the 42 * interrupt message. 43 */ 44 static bool direct_mode_enabled; 45 46 static int stimer0_irq; 47 static int stimer0_vector; 48 static int stimer0_message_sint; 49 50 /* 51 * ISR for when stimer0 is operating in Direct Mode. Direct Mode 52 * does not use VMbus or any VMbus messages, so process here and not 53 * in the VMbus driver code. 54 */ 55 void hv_stimer0_isr(void) 56 { 57 struct clock_event_device *ce; 58 59 ce = this_cpu_ptr(hv_clock_event); 60 ce->event_handler(ce); 61 } 62 EXPORT_SYMBOL_GPL(hv_stimer0_isr); 63 64 static int hv_ce_set_next_event(unsigned long delta, 65 struct clock_event_device *evt) 66 { 67 u64 current_tick; 68 69 current_tick = hyperv_cs->read(NULL); 70 current_tick += delta; 71 hv_init_timer(0, current_tick); 72 return 0; 73 } 74 75 static int hv_ce_shutdown(struct clock_event_device *evt) 76 { 77 hv_init_timer(0, 0); 78 hv_init_timer_config(0, 0); 79 if (direct_mode_enabled) 80 hv_disable_stimer0_percpu_irq(stimer0_irq); 81 82 return 0; 83 } 84 85 static int hv_ce_set_oneshot(struct clock_event_device *evt) 86 { 87 union hv_stimer_config timer_cfg; 88 89 timer_cfg.as_uint64 = 0; 90 timer_cfg.enable = 1; 91 timer_cfg.auto_enable = 1; 92 if (direct_mode_enabled) { 93 /* 94 * When it expires, the timer will directly interrupt 95 * on the specified hardware vector/IRQ. 96 */ 97 timer_cfg.direct_mode = 1; 98 timer_cfg.apic_vector = stimer0_vector; 99 hv_enable_stimer0_percpu_irq(stimer0_irq); 100 } else { 101 /* 102 * When it expires, the timer will generate a VMbus message, 103 * to be handled by the normal VMbus interrupt handler. 104 */ 105 timer_cfg.direct_mode = 0; 106 timer_cfg.sintx = stimer0_message_sint; 107 } 108 hv_init_timer_config(0, timer_cfg.as_uint64); 109 return 0; 110 } 111 112 /* 113 * hv_stimer_init - Per-cpu initialization of the clockevent 114 */ 115 static int hv_stimer_init(unsigned int cpu) 116 { 117 struct clock_event_device *ce; 118 119 if (!hv_clock_event) 120 return 0; 121 122 ce = per_cpu_ptr(hv_clock_event, cpu); 123 ce->name = "Hyper-V clockevent"; 124 ce->features = CLOCK_EVT_FEAT_ONESHOT; 125 ce->cpumask = cpumask_of(cpu); 126 ce->rating = 1000; 127 ce->set_state_shutdown = hv_ce_shutdown; 128 ce->set_state_oneshot = hv_ce_set_oneshot; 129 ce->set_next_event = hv_ce_set_next_event; 130 131 clockevents_config_and_register(ce, 132 HV_CLOCK_HZ, 133 HV_MIN_DELTA_TICKS, 134 HV_MAX_MAX_DELTA_TICKS); 135 return 0; 136 } 137 138 /* 139 * hv_stimer_cleanup - Per-cpu cleanup of the clockevent 140 */ 141 int hv_stimer_cleanup(unsigned int cpu) 142 { 143 struct clock_event_device *ce; 144 145 if (!hv_clock_event) 146 return 0; 147 148 /* 149 * In the legacy case where Direct Mode is not enabled 150 * (which can only be on x86/64), stimer cleanup happens 151 * relatively early in the CPU offlining process. We 152 * must unbind the stimer-based clockevent device so 153 * that the LAPIC timer can take over until clockevents 154 * are no longer needed in the offlining process. Note 155 * that clockevents_unbind_device() eventually calls 156 * hv_ce_shutdown(). 157 * 158 * The unbind should not be done when Direct Mode is 159 * enabled because we may be on an architecture where 160 * there are no other clockevent devices to fallback to. 161 */ 162 ce = per_cpu_ptr(hv_clock_event, cpu); 163 if (direct_mode_enabled) 164 hv_ce_shutdown(ce); 165 else 166 clockevents_unbind_device(ce, cpu); 167 168 return 0; 169 } 170 EXPORT_SYMBOL_GPL(hv_stimer_cleanup); 171 172 /* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */ 173 int hv_stimer_alloc(void) 174 { 175 int ret = 0; 176 177 /* 178 * Synthetic timers are always available except on old versions of 179 * Hyper-V on x86. In that case, return as error as Linux will use a 180 * clockevent based on emulated LAPIC timer hardware. 181 */ 182 if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE)) 183 return -EINVAL; 184 185 hv_clock_event = alloc_percpu(struct clock_event_device); 186 if (!hv_clock_event) 187 return -ENOMEM; 188 189 direct_mode_enabled = ms_hyperv.misc_features & 190 HV_STIMER_DIRECT_MODE_AVAILABLE; 191 if (direct_mode_enabled) { 192 ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector, 193 hv_stimer0_isr); 194 if (ret) 195 goto free_percpu; 196 197 /* 198 * Since we are in Direct Mode, stimer initialization 199 * can be done now with a CPUHP value in the same range 200 * as other clockevent devices. 201 */ 202 ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING, 203 "clockevents/hyperv/stimer:starting", 204 hv_stimer_init, hv_stimer_cleanup); 205 if (ret < 0) 206 goto free_stimer0_irq; 207 } 208 return ret; 209 210 free_stimer0_irq: 211 hv_remove_stimer0_irq(stimer0_irq); 212 stimer0_irq = 0; 213 free_percpu: 214 free_percpu(hv_clock_event); 215 hv_clock_event = NULL; 216 return ret; 217 } 218 EXPORT_SYMBOL_GPL(hv_stimer_alloc); 219 220 /* 221 * hv_stimer_legacy_init -- Called from the VMbus driver to handle 222 * the case when Direct Mode is not enabled, and the stimer 223 * must be initialized late in the CPU onlining process. 224 * 225 */ 226 void hv_stimer_legacy_init(unsigned int cpu, int sint) 227 { 228 if (direct_mode_enabled) 229 return; 230 231 /* 232 * This function gets called by each vCPU, so setting the 233 * global stimer_message_sint value each time is conceptually 234 * not ideal, but the value passed in is always the same and 235 * it avoids introducing yet another interface into this 236 * clocksource driver just to set the sint in the legacy case. 237 */ 238 stimer0_message_sint = sint; 239 (void)hv_stimer_init(cpu); 240 } 241 EXPORT_SYMBOL_GPL(hv_stimer_legacy_init); 242 243 /* 244 * hv_stimer_legacy_cleanup -- Called from the VMbus driver to 245 * handle the case when Direct Mode is not enabled, and the 246 * stimer must be cleaned up early in the CPU offlining 247 * process. 248 */ 249 void hv_stimer_legacy_cleanup(unsigned int cpu) 250 { 251 if (direct_mode_enabled) 252 return; 253 (void)hv_stimer_cleanup(cpu); 254 } 255 EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup); 256 257 258 /* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */ 259 void hv_stimer_free(void) 260 { 261 if (!hv_clock_event) 262 return; 263 264 if (direct_mode_enabled) { 265 cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING); 266 hv_remove_stimer0_irq(stimer0_irq); 267 stimer0_irq = 0; 268 } 269 free_percpu(hv_clock_event); 270 hv_clock_event = NULL; 271 } 272 EXPORT_SYMBOL_GPL(hv_stimer_free); 273 274 /* 275 * Do a global cleanup of clockevents for the cases of kexec and 276 * vmbus exit 277 */ 278 void hv_stimer_global_cleanup(void) 279 { 280 int cpu; 281 282 /* 283 * hv_stime_legacy_cleanup() will stop the stimer if Direct 284 * Mode is not enabled, and fallback to the LAPIC timer. 285 */ 286 for_each_present_cpu(cpu) { 287 hv_stimer_legacy_cleanup(cpu); 288 } 289 290 /* 291 * If Direct Mode is enabled, the cpuhp teardown callback 292 * (hv_stimer_cleanup) will be run on all CPUs to stop the 293 * stimers. 294 */ 295 hv_stimer_free(); 296 } 297 EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); 298 299 /* 300 * Code and definitions for the Hyper-V clocksources. Two 301 * clocksources are defined: one that reads the Hyper-V defined MSR, and 302 * the other that uses the TSC reference page feature as defined in the 303 * TLFS. The MSR version is for compatibility with old versions of 304 * Hyper-V and 32-bit x86. The TSC reference page version is preferred. 305 */ 306 307 struct clocksource *hyperv_cs; 308 EXPORT_SYMBOL_GPL(hyperv_cs); 309 310 static struct ms_hyperv_tsc_page tsc_pg __aligned(PAGE_SIZE); 311 312 struct ms_hyperv_tsc_page *hv_get_tsc_page(void) 313 { 314 return &tsc_pg; 315 } 316 EXPORT_SYMBOL_GPL(hv_get_tsc_page); 317 318 static u64 notrace read_hv_clock_tsc(struct clocksource *arg) 319 { 320 u64 current_tick = hv_read_tsc_page(&tsc_pg); 321 322 if (current_tick == U64_MAX) 323 hv_get_time_ref_count(current_tick); 324 325 return current_tick; 326 } 327 328 static u64 read_hv_sched_clock_tsc(void) 329 { 330 return read_hv_clock_tsc(NULL) - hv_sched_clock_offset; 331 } 332 333 static struct clocksource hyperv_cs_tsc = { 334 .name = "hyperv_clocksource_tsc_page", 335 .rating = 400, 336 .read = read_hv_clock_tsc, 337 .mask = CLOCKSOURCE_MASK(64), 338 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 339 }; 340 341 static u64 notrace read_hv_clock_msr(struct clocksource *arg) 342 { 343 u64 current_tick; 344 /* 345 * Read the partition counter to get the current tick count. This count 346 * is set to 0 when the partition is created and is incremented in 347 * 100 nanosecond units. 348 */ 349 hv_get_time_ref_count(current_tick); 350 return current_tick; 351 } 352 353 static u64 read_hv_sched_clock_msr(void) 354 { 355 return read_hv_clock_msr(NULL) - hv_sched_clock_offset; 356 } 357 358 static struct clocksource hyperv_cs_msr = { 359 .name = "hyperv_clocksource_msr", 360 .rating = 400, 361 .read = read_hv_clock_msr, 362 .mask = CLOCKSOURCE_MASK(64), 363 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 364 }; 365 366 static bool __init hv_init_tsc_clocksource(void) 367 { 368 u64 tsc_msr; 369 phys_addr_t phys_addr; 370 371 if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) 372 return false; 373 374 hyperv_cs = &hyperv_cs_tsc; 375 phys_addr = virt_to_phys(&tsc_pg); 376 377 /* 378 * The Hyper-V TLFS specifies to preserve the value of reserved 379 * bits in registers. So read the existing value, preserve the 380 * low order 12 bits, and add in the guest physical address 381 * (which already has at least the low 12 bits set to zero since 382 * it is page aligned). Also set the "enable" bit, which is bit 0. 383 */ 384 hv_get_reference_tsc(tsc_msr); 385 tsc_msr &= GENMASK_ULL(11, 0); 386 tsc_msr = tsc_msr | 0x1 | (u64)phys_addr; 387 hv_set_reference_tsc(tsc_msr); 388 389 hv_set_clocksource_vdso(hyperv_cs_tsc); 390 clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); 391 392 hv_sched_clock_offset = hyperv_cs->read(hyperv_cs); 393 hv_setup_sched_clock(read_hv_sched_clock_tsc); 394 395 return true; 396 } 397 398 void __init hv_init_clocksource(void) 399 { 400 /* 401 * Try to set up the TSC page clocksource. If it succeeds, we're 402 * done. Otherwise, set up the MSR clocksoruce. At least one of 403 * these will always be available except on very old versions of 404 * Hyper-V on x86. In that case we won't have a Hyper-V 405 * clocksource, but Linux will still run with a clocksource based 406 * on the emulated PIT or LAPIC timer. 407 */ 408 if (hv_init_tsc_clocksource()) 409 return; 410 411 if (!(ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE)) 412 return; 413 414 hyperv_cs = &hyperv_cs_msr; 415 clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); 416 417 hv_sched_clock_offset = hyperv_cs->read(hyperv_cs); 418 hv_setup_sched_clock(read_hv_sched_clock_msr); 419 } 420 EXPORT_SYMBOL_GPL(hv_init_clocksource); 421