xref: /openbmc/linux/arch/x86/xen/time.c (revision f3a8b664)
1 /*
2  * Xen time implementation.
3  *
4  * This is implemented in terms of a clocksource driver which uses
5  * the hypervisor clock as a nanosecond timebase, and a clockevent
6  * driver which uses the hypervisor's timer mechanism.
7  *
8  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
9  */
10 #include <linux/kernel.h>
11 #include <linux/interrupt.h>
12 #include <linux/clocksource.h>
13 #include <linux/clockchips.h>
14 #include <linux/gfp.h>
15 #include <linux/slab.h>
16 #include <linux/pvclock_gtod.h>
17 #include <linux/timekeeper_internal.h>
18 
19 #include <asm/pvclock.h>
20 #include <asm/xen/hypervisor.h>
21 #include <asm/xen/hypercall.h>
22 
23 #include <xen/events.h>
24 #include <xen/features.h>
25 #include <xen/interface/xen.h>
26 #include <xen/interface/vcpu.h>
27 
28 #include "xen-ops.h"
29 
30 /* Xen may fire a timer up to this many ns early */
31 #define TIMER_SLOP	100000
32 
33 /* Get the TSC speed from Xen */
34 static unsigned long xen_tsc_khz(void)
35 {
36 	struct pvclock_vcpu_time_info *info =
37 		&HYPERVISOR_shared_info->vcpu_info[0].time;
38 
39 	return pvclock_tsc_khz(info);
40 }
41 
42 cycle_t xen_clocksource_read(void)
43 {
44         struct pvclock_vcpu_time_info *src;
45 	cycle_t ret;
46 
47 	preempt_disable_notrace();
48 	src = &__this_cpu_read(xen_vcpu)->time;
49 	ret = pvclock_clocksource_read(src);
50 	preempt_enable_notrace();
51 	return ret;
52 }
53 
54 static cycle_t xen_clocksource_get_cycles(struct clocksource *cs)
55 {
56 	return xen_clocksource_read();
57 }
58 
59 static void xen_read_wallclock(struct timespec *ts)
60 {
61 	struct shared_info *s = HYPERVISOR_shared_info;
62 	struct pvclock_wall_clock *wall_clock = &(s->wc);
63         struct pvclock_vcpu_time_info *vcpu_time;
64 
65 	vcpu_time = &get_cpu_var(xen_vcpu)->time;
66 	pvclock_read_wallclock(wall_clock, vcpu_time, ts);
67 	put_cpu_var(xen_vcpu);
68 }
69 
70 static void xen_get_wallclock(struct timespec *now)
71 {
72 	xen_read_wallclock(now);
73 }
74 
75 static int xen_set_wallclock(const struct timespec *now)
76 {
77 	return -1;
78 }
79 
80 static int xen_pvclock_gtod_notify(struct notifier_block *nb,
81 				   unsigned long was_set, void *priv)
82 {
83 	/* Protected by the calling core code serialization */
84 	static struct timespec64 next_sync;
85 
86 	struct xen_platform_op op;
87 	struct timespec64 now;
88 	struct timekeeper *tk = priv;
89 	static bool settime64_supported = true;
90 	int ret;
91 
92 	now.tv_sec = tk->xtime_sec;
93 	now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
94 
95 	/*
96 	 * We only take the expensive HV call when the clock was set
97 	 * or when the 11 minutes RTC synchronization time elapsed.
98 	 */
99 	if (!was_set && timespec64_compare(&now, &next_sync) < 0)
100 		return NOTIFY_OK;
101 
102 again:
103 	if (settime64_supported) {
104 		op.cmd = XENPF_settime64;
105 		op.u.settime64.mbz = 0;
106 		op.u.settime64.secs = now.tv_sec;
107 		op.u.settime64.nsecs = now.tv_nsec;
108 		op.u.settime64.system_time = xen_clocksource_read();
109 	} else {
110 		op.cmd = XENPF_settime32;
111 		op.u.settime32.secs = now.tv_sec;
112 		op.u.settime32.nsecs = now.tv_nsec;
113 		op.u.settime32.system_time = xen_clocksource_read();
114 	}
115 
116 	ret = HYPERVISOR_platform_op(&op);
117 
118 	if (ret == -ENOSYS && settime64_supported) {
119 		settime64_supported = false;
120 		goto again;
121 	}
122 	if (ret < 0)
123 		return NOTIFY_BAD;
124 
125 	/*
126 	 * Move the next drift compensation time 11 minutes
127 	 * ahead. That's emulating the sync_cmos_clock() update for
128 	 * the hardware RTC.
129 	 */
130 	next_sync = now;
131 	next_sync.tv_sec += 11 * 60;
132 
133 	return NOTIFY_OK;
134 }
135 
136 static struct notifier_block xen_pvclock_gtod_notifier = {
137 	.notifier_call = xen_pvclock_gtod_notify,
138 };
139 
140 static struct clocksource xen_clocksource __read_mostly = {
141 	.name = "xen",
142 	.rating = 400,
143 	.read = xen_clocksource_get_cycles,
144 	.mask = ~0,
145 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
146 };
147 
148 /*
149    Xen clockevent implementation
150 
151    Xen has two clockevent implementations:
152 
153    The old timer_op one works with all released versions of Xen prior
154    to version 3.0.4.  This version of the hypervisor provides a
155    single-shot timer with nanosecond resolution.  However, sharing the
156    same event channel is a 100Hz tick which is delivered while the
157    vcpu is running.  We don't care about or use this tick, but it will
158    cause the core time code to think the timer fired too soon, and
159    will end up resetting it each time.  It could be filtered, but
160    doing so has complications when the ktime clocksource is not yet
161    the xen clocksource (ie, at boot time).
162 
163    The new vcpu_op-based timer interface allows the tick timer period
164    to be changed or turned off.  The tick timer is not useful as a
165    periodic timer because events are only delivered to running vcpus.
166    The one-shot timer can report when a timeout is in the past, so
167    set_next_event is capable of returning -ETIME when appropriate.
168    This interface is used when available.
169 */
170 
171 
172 /*
173   Get a hypervisor absolute time.  In theory we could maintain an
174   offset between the kernel's time and the hypervisor's time, and
175   apply that to a kernel's absolute timeout.  Unfortunately the
176   hypervisor and kernel times can drift even if the kernel is using
177   the Xen clocksource, because ntp can warp the kernel's clocksource.
178 */
179 static s64 get_abs_timeout(unsigned long delta)
180 {
181 	return xen_clocksource_read() + delta;
182 }
183 
184 static int xen_timerop_shutdown(struct clock_event_device *evt)
185 {
186 	/* cancel timeout */
187 	HYPERVISOR_set_timer_op(0);
188 
189 	return 0;
190 }
191 
192 static int xen_timerop_set_next_event(unsigned long delta,
193 				      struct clock_event_device *evt)
194 {
195 	WARN_ON(!clockevent_state_oneshot(evt));
196 
197 	if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
198 		BUG();
199 
200 	/* We may have missed the deadline, but there's no real way of
201 	   knowing for sure.  If the event was in the past, then we'll
202 	   get an immediate interrupt. */
203 
204 	return 0;
205 }
206 
207 static const struct clock_event_device xen_timerop_clockevent = {
208 	.name			= "xen",
209 	.features		= CLOCK_EVT_FEAT_ONESHOT,
210 
211 	.max_delta_ns		= 0xffffffff,
212 	.min_delta_ns		= TIMER_SLOP,
213 
214 	.mult			= 1,
215 	.shift			= 0,
216 	.rating			= 500,
217 
218 	.set_state_shutdown	= xen_timerop_shutdown,
219 	.set_next_event		= xen_timerop_set_next_event,
220 };
221 
222 static int xen_vcpuop_shutdown(struct clock_event_device *evt)
223 {
224 	int cpu = smp_processor_id();
225 
226 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(cpu),
227 			       NULL) ||
228 	    HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
229 			       NULL))
230 		BUG();
231 
232 	return 0;
233 }
234 
235 static int xen_vcpuop_set_oneshot(struct clock_event_device *evt)
236 {
237 	int cpu = smp_processor_id();
238 
239 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
240 			       NULL))
241 		BUG();
242 
243 	return 0;
244 }
245 
246 static int xen_vcpuop_set_next_event(unsigned long delta,
247 				     struct clock_event_device *evt)
248 {
249 	int cpu = smp_processor_id();
250 	struct vcpu_set_singleshot_timer single;
251 	int ret;
252 
253 	WARN_ON(!clockevent_state_oneshot(evt));
254 
255 	single.timeout_abs_ns = get_abs_timeout(delta);
256 	/* Get an event anyway, even if the timeout is already expired */
257 	single.flags = 0;
258 
259 	ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(cpu),
260 				 &single);
261 	BUG_ON(ret != 0);
262 
263 	return ret;
264 }
265 
266 static const struct clock_event_device xen_vcpuop_clockevent = {
267 	.name = "xen",
268 	.features = CLOCK_EVT_FEAT_ONESHOT,
269 
270 	.max_delta_ns = 0xffffffff,
271 	.min_delta_ns = TIMER_SLOP,
272 
273 	.mult = 1,
274 	.shift = 0,
275 	.rating = 500,
276 
277 	.set_state_shutdown = xen_vcpuop_shutdown,
278 	.set_state_oneshot = xen_vcpuop_set_oneshot,
279 	.set_next_event = xen_vcpuop_set_next_event,
280 };
281 
282 static const struct clock_event_device *xen_clockevent =
283 	&xen_timerop_clockevent;
284 
285 struct xen_clock_event_device {
286 	struct clock_event_device evt;
287 	char name[16];
288 };
289 static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 };
290 
291 static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
292 {
293 	struct clock_event_device *evt = this_cpu_ptr(&xen_clock_events.evt);
294 	irqreturn_t ret;
295 
296 	ret = IRQ_NONE;
297 	if (evt->event_handler) {
298 		evt->event_handler(evt);
299 		ret = IRQ_HANDLED;
300 	}
301 
302 	return ret;
303 }
304 
305 void xen_teardown_timer(int cpu)
306 {
307 	struct clock_event_device *evt;
308 	BUG_ON(cpu == 0);
309 	evt = &per_cpu(xen_clock_events, cpu).evt;
310 
311 	if (evt->irq >= 0) {
312 		unbind_from_irqhandler(evt->irq, NULL);
313 		evt->irq = -1;
314 	}
315 }
316 
317 void xen_setup_timer(int cpu)
318 {
319 	struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu);
320 	struct clock_event_device *evt = &xevt->evt;
321 	int irq;
322 
323 	WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
324 	if (evt->irq >= 0)
325 		xen_teardown_timer(cpu);
326 
327 	printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
328 
329 	snprintf(xevt->name, sizeof(xevt->name), "timer%d", cpu);
330 
331 	irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
332 				      IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
333 				      IRQF_FORCE_RESUME|IRQF_EARLY_RESUME,
334 				      xevt->name, NULL);
335 	(void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
336 
337 	memcpy(evt, xen_clockevent, sizeof(*evt));
338 
339 	evt->cpumask = cpumask_of(cpu);
340 	evt->irq = irq;
341 }
342 
343 
344 void xen_setup_cpu_clockevents(void)
345 {
346 	clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt));
347 }
348 
349 void xen_timer_resume(void)
350 {
351 	int cpu;
352 
353 	pvclock_resume();
354 
355 	if (xen_clockevent != &xen_vcpuop_clockevent)
356 		return;
357 
358 	for_each_online_cpu(cpu) {
359 		if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer,
360 				       xen_vcpu_nr(cpu), NULL))
361 			BUG();
362 	}
363 }
364 
365 static const struct pv_time_ops xen_time_ops __initconst = {
366 	.sched_clock = xen_clocksource_read,
367 	.steal_clock = xen_steal_clock,
368 };
369 
370 static void __init xen_time_init(void)
371 {
372 	int cpu = smp_processor_id();
373 	struct timespec tp;
374 
375 	/* As Dom0 is never moved, no penalty on using TSC there */
376 	if (xen_initial_domain())
377 		xen_clocksource.rating = 275;
378 
379 	clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
380 
381 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
382 			       NULL) == 0) {
383 		/* Successfully turned off 100Hz tick, so we have the
384 		   vcpuop-based timer interface */
385 		printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
386 		xen_clockevent = &xen_vcpuop_clockevent;
387 	}
388 
389 	/* Set initial system time with full resolution */
390 	xen_read_wallclock(&tp);
391 	do_settimeofday(&tp);
392 
393 	setup_force_cpu_cap(X86_FEATURE_TSC);
394 
395 	xen_setup_runstate_info(cpu);
396 	xen_setup_timer(cpu);
397 	xen_setup_cpu_clockevents();
398 
399 	xen_time_setup_guest();
400 
401 	if (xen_initial_domain())
402 		pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
403 }
404 
405 void __init xen_init_time_ops(void)
406 {
407 	pv_time_ops = xen_time_ops;
408 
409 	x86_init.timers.timer_init = xen_time_init;
410 	x86_init.timers.setup_percpu_clockev = x86_init_noop;
411 	x86_cpuinit.setup_percpu_clockev = x86_init_noop;
412 
413 	x86_platform.calibrate_tsc = xen_tsc_khz;
414 	x86_platform.get_wallclock = xen_get_wallclock;
415 	/* Dom0 uses the native method to set the hardware RTC. */
416 	if (!xen_initial_domain())
417 		x86_platform.set_wallclock = xen_set_wallclock;
418 }
419 
420 #ifdef CONFIG_XEN_PVHVM
421 static void xen_hvm_setup_cpu_clockevents(void)
422 {
423 	int cpu = smp_processor_id();
424 	xen_setup_runstate_info(cpu);
425 	/*
426 	 * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence
427 	 * doing it xen_hvm_cpu_notify (which gets called by smp_init during
428 	 * early bootup and also during CPU hotplug events).
429 	 */
430 	xen_setup_cpu_clockevents();
431 }
432 
433 void __init xen_hvm_init_time_ops(void)
434 {
435 	if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
436 		printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
437 				"disable pv timer\n");
438 		return;
439 	}
440 
441 	pv_time_ops = xen_time_ops;
442 	x86_init.timers.setup_percpu_clockev = xen_time_init;
443 	x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
444 
445 	x86_platform.calibrate_tsc = xen_tsc_khz;
446 	x86_platform.get_wallclock = xen_get_wallclock;
447 	x86_platform.set_wallclock = xen_set_wallclock;
448 }
449 #endif
450