xref: /openbmc/linux/arch/x86/xen/time.c (revision 1ac731c529cd4d6adbce134754b51ff7d822b145)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
29702785aSThomas Gleixner /*
39702785aSThomas Gleixner  * Xen time implementation.
49702785aSThomas Gleixner  *
59702785aSThomas Gleixner  * This is implemented in terms of a clocksource driver which uses
69702785aSThomas Gleixner  * the hypervisor clock as a nanosecond timebase, and a clockevent
79702785aSThomas Gleixner  * driver which uses the hypervisor's timer mechanism.
89702785aSThomas Gleixner  *
99702785aSThomas Gleixner  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
109702785aSThomas Gleixner  */
119702785aSThomas Gleixner #include <linux/kernel.h>
129702785aSThomas Gleixner #include <linux/interrupt.h>
139702785aSThomas Gleixner #include <linux/clocksource.h>
149702785aSThomas Gleixner #include <linux/clockchips.h>
155a0e3ad6STejun Heo #include <linux/gfp.h>
16c9d76a24SKonrad Rzeszutek Wilk #include <linux/slab.h>
175584880eSDavid Vrabel #include <linux/pvclock_gtod.h>
1876096863SStefano Stabellini #include <linux/timekeeper_internal.h>
199702785aSThomas Gleixner 
201c7b67f7SGerd Hoffmann #include <asm/pvclock.h>
219702785aSThomas Gleixner #include <asm/xen/hypervisor.h>
229702785aSThomas Gleixner #include <asm/xen/hypercall.h>
23*99a7bcafSKrister Johansen #include <asm/xen/cpuid.h>
249702785aSThomas Gleixner 
259702785aSThomas Gleixner #include <xen/events.h>
26409771d2SStefano Stabellini #include <xen/features.h>
279702785aSThomas Gleixner #include <xen/interface/xen.h>
289702785aSThomas Gleixner #include <xen/interface/vcpu.h>
299702785aSThomas Gleixner 
309702785aSThomas Gleixner #include "xen-ops.h"
319702785aSThomas Gleixner 
322ec16bc0SRyan Thibodeaux /* Minimum amount of time until next clock event fires */
339702785aSThomas Gleixner #define TIMER_SLOP	100000
349702785aSThomas Gleixner 
3538669ba2SPavel Tatashin static u64 xen_sched_clock_offset __read_mostly;
3638669ba2SPavel Tatashin 
37e93ef949SAlok Kataria /* Get the TSC speed from Xen */
xen_tsc_khz(void)38409771d2SStefano Stabellini static unsigned long xen_tsc_khz(void)
399702785aSThomas Gleixner {
403807f345SGlauber Costa 	struct pvclock_vcpu_time_info *info =
419702785aSThomas Gleixner 		&HYPERVISOR_shared_info->vcpu_info[0].time;
429702785aSThomas Gleixner 
43898ec52dSHayato Ohhashi 	setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
443807f345SGlauber Costa 	return pvclock_tsc_khz(info);
459702785aSThomas Gleixner }
469702785aSThomas Gleixner 
xen_clocksource_read(void)477b25b9cbSPavel Tatashin static u64 xen_clocksource_read(void)
489702785aSThomas Gleixner {
491c7b67f7SGerd Hoffmann         struct pvclock_vcpu_time_info *src;
50a5a1d1c2SThomas Gleixner 	u64 ret;
519702785aSThomas Gleixner 
52f1c39625SJeremy Fitzhardinge 	preempt_disable_notrace();
533251f20bSBoris Ostrovsky 	src = &__this_cpu_read(xen_vcpu)->time;
541c7b67f7SGerd Hoffmann 	ret = pvclock_clocksource_read(src);
55f1c39625SJeremy Fitzhardinge 	preempt_enable_notrace();
569702785aSThomas Gleixner 	return ret;
579702785aSThomas Gleixner }
589702785aSThomas Gleixner 
xen_clocksource_get_cycles(struct clocksource * cs)59a5a1d1c2SThomas Gleixner static u64 xen_clocksource_get_cycles(struct clocksource *cs)
608e19608eSMagnus Damm {
618e19608eSMagnus Damm 	return xen_clocksource_read();
628e19608eSMagnus Damm }
638e19608eSMagnus Damm 
xen_sched_clock(void)648739c681SPeter Zijlstra static noinstr u64 xen_sched_clock(void)
6538669ba2SPavel Tatashin {
668739c681SPeter Zijlstra         struct pvclock_vcpu_time_info *src;
678739c681SPeter Zijlstra 	u64 ret;
688739c681SPeter Zijlstra 
698739c681SPeter Zijlstra 	src = &__this_cpu_read(xen_vcpu)->time;
708739c681SPeter Zijlstra 	ret = pvclock_clocksource_read_nowd(src);
718739c681SPeter Zijlstra 	ret -= xen_sched_clock_offset;
728739c681SPeter Zijlstra 
738739c681SPeter Zijlstra 	return ret;
748739c681SPeter Zijlstra }
7538669ba2SPavel Tatashin 
xen_read_wallclock(struct timespec64 * ts)7638669ba2SPavel Tatashin static void xen_read_wallclock(struct timespec64 *ts)
77e27c4929SArnd Bergmann {
789702785aSThomas Gleixner 	struct shared_info *s = HYPERVISOR_shared_info;
791c7b67f7SGerd Hoffmann 	struct pvclock_wall_clock *wall_clock = &(s->wc);
801c7b67f7SGerd Hoffmann         struct pvclock_vcpu_time_info *vcpu_time;
811c7b67f7SGerd Hoffmann 
829702785aSThomas Gleixner 	vcpu_time = &get_cpu_var(xen_vcpu)->time;
831c7b67f7SGerd Hoffmann 	pvclock_read_wallclock(wall_clock, vcpu_time, ts);
841c7b67f7SGerd Hoffmann 	put_cpu_var(xen_vcpu);
851c7b67f7SGerd Hoffmann }
869702785aSThomas Gleixner 
xen_get_wallclock(struct timespec64 * now)879702785aSThomas Gleixner static void xen_get_wallclock(struct timespec64 *now)
88e27c4929SArnd Bergmann {
899702785aSThomas Gleixner 	xen_read_wallclock(now);
903565184eSDavid Vrabel }
919702785aSThomas Gleixner 
xen_set_wallclock(const struct timespec64 * now)929702785aSThomas Gleixner static int xen_set_wallclock(const struct timespec64 *now)
93e27c4929SArnd Bergmann {
949702785aSThomas Gleixner 	return -ENODEV;
95b5494ad8SBoris Ostrovsky }
969702785aSThomas Gleixner 
xen_pvclock_gtod_notify(struct notifier_block * nb,unsigned long was_set,void * priv)979702785aSThomas Gleixner static int xen_pvclock_gtod_notify(struct notifier_block *nb,
9847433b8cSDavid Vrabel 				   unsigned long was_set, void *priv)
9947433b8cSDavid Vrabel {
1005584880eSDavid Vrabel 	/* Protected by the calling core code serialization */
10147433b8cSDavid Vrabel 	static struct timespec64 next_sync;
102187b26a9SStefano Stabellini 
1035584880eSDavid Vrabel 	struct xen_platform_op op;
10447433b8cSDavid Vrabel 	struct timespec64 now;
10576096863SStefano Stabellini 	struct timekeeper *tk = priv;
10676096863SStefano Stabellini 	static bool settime64_supported = true;
10776096863SStefano Stabellini 	int ret;
10876096863SStefano Stabellini 
1095584880eSDavid Vrabel 	now.tv_sec = tk->xtime_sec;
11076096863SStefano Stabellini 	now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
11176096863SStefano Stabellini 
1125584880eSDavid Vrabel 	/*
11347433b8cSDavid Vrabel 	 * We only take the expensive HV call when the clock was set
11447433b8cSDavid Vrabel 	 * or when the 11 minutes RTC synchronization time elapsed.
11547433b8cSDavid Vrabel 	 */
11647433b8cSDavid Vrabel 	if (!was_set && timespec64_compare(&now, &next_sync) < 0)
117187b26a9SStefano Stabellini 		return NOTIFY_OK;
11847433b8cSDavid Vrabel 
1199702785aSThomas Gleixner again:
12076096863SStefano Stabellini 	if (settime64_supported) {
12176096863SStefano Stabellini 		op.cmd = XENPF_settime64;
12276096863SStefano Stabellini 		op.u.settime64.mbz = 0;
12376096863SStefano Stabellini 		op.u.settime64.secs = now.tv_sec;
12476096863SStefano Stabellini 		op.u.settime64.nsecs = now.tv_nsec;
12576096863SStefano Stabellini 		op.u.settime64.system_time = xen_clocksource_read();
12676096863SStefano Stabellini 	} else {
12776096863SStefano Stabellini 		op.cmd = XENPF_settime32;
128f3d6027eSStefano Stabellini 		op.u.settime32.secs = now.tv_sec;
129f3d6027eSStefano Stabellini 		op.u.settime32.nsecs = now.tv_nsec;
130f3d6027eSStefano Stabellini 		op.u.settime32.system_time = xen_clocksource_read();
131f3d6027eSStefano Stabellini 	}
13276096863SStefano Stabellini 
1339702785aSThomas Gleixner 	ret = HYPERVISOR_platform_op(&op);
13476096863SStefano Stabellini 
13576096863SStefano Stabellini 	if (ret == -ENOSYS && settime64_supported) {
13676096863SStefano Stabellini 		settime64_supported = false;
13776096863SStefano Stabellini 		goto again;
13876096863SStefano Stabellini 	}
13976096863SStefano Stabellini 	if (ret < 0)
14076096863SStefano Stabellini 		return NOTIFY_BAD;
14176096863SStefano Stabellini 
1429702785aSThomas Gleixner 	/*
14347433b8cSDavid Vrabel 	 * Move the next drift compensation time 11 minutes
14447433b8cSDavid Vrabel 	 * ahead. That's emulating the sync_cmos_clock() update for
14547433b8cSDavid Vrabel 	 * the hardware RTC.
14647433b8cSDavid Vrabel 	 */
14747433b8cSDavid Vrabel 	next_sync = now;
14847433b8cSDavid Vrabel 	next_sync.tv_sec += 11 * 60;
14947433b8cSDavid Vrabel 
15047433b8cSDavid Vrabel 	return NOTIFY_OK;
1515584880eSDavid Vrabel }
1529702785aSThomas Gleixner 
1539702785aSThomas Gleixner static struct notifier_block xen_pvclock_gtod_notifier = {
1545584880eSDavid Vrabel 	.notifier_call = xen_pvclock_gtod_notify,
1555584880eSDavid Vrabel };
1565584880eSDavid Vrabel 
xen_cs_enable(struct clocksource * cs)1575584880eSDavid Vrabel static int xen_cs_enable(struct clocksource *cs)
158eec399ddSThomas Gleixner {
159eec399ddSThomas Gleixner 	vclocks_set_used(VDSO_CLOCKMODE_PVCLOCK);
160b95a8a27SThomas Gleixner 	return 0;
161eec399ddSThomas Gleixner }
162eec399ddSThomas Gleixner 
163eec399ddSThomas Gleixner static struct clocksource xen_clocksource __read_mostly = {
1649702785aSThomas Gleixner 	.name	= "xen",
1659702785aSThomas Gleixner 	.rating	= 400,
1669702785aSThomas Gleixner 	.read	= xen_clocksource_get_cycles,
1679702785aSThomas Gleixner 	.mask	= CLOCKSOURCE_MASK(64),
168eec399ddSThomas Gleixner 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
1699702785aSThomas Gleixner 	.enable = xen_cs_enable,
170eec399ddSThomas Gleixner };
1719702785aSThomas Gleixner 
1729702785aSThomas Gleixner /*
1739702785aSThomas Gleixner    Xen clockevent implementation
1749702785aSThomas Gleixner 
1759702785aSThomas Gleixner    Xen has two clockevent implementations:
1769702785aSThomas Gleixner 
1779702785aSThomas Gleixner    The old timer_op one works with all released versions of Xen prior
1789702785aSThomas Gleixner    to version 3.0.4.  This version of the hypervisor provides a
1799702785aSThomas Gleixner    single-shot timer with nanosecond resolution.  However, sharing the
1809702785aSThomas Gleixner    same event channel is a 100Hz tick which is delivered while the
1819702785aSThomas Gleixner    vcpu is running.  We don't care about or use this tick, but it will
1829702785aSThomas Gleixner    cause the core time code to think the timer fired too soon, and
1839702785aSThomas Gleixner    will end up resetting it each time.  It could be filtered, but
1849702785aSThomas Gleixner    doing so has complications when the ktime clocksource is not yet
1859702785aSThomas Gleixner    the xen clocksource (ie, at boot time).
1869702785aSThomas Gleixner 
1879702785aSThomas Gleixner    The new vcpu_op-based timer interface allows the tick timer period
1889702785aSThomas Gleixner    to be changed or turned off.  The tick timer is not useful as a
1899702785aSThomas Gleixner    periodic timer because events are only delivered to running vcpus.
1909702785aSThomas Gleixner    The one-shot timer can report when a timeout is in the past, so
1919702785aSThomas Gleixner    set_next_event is capable of returning -ETIME when appropriate.
1929702785aSThomas Gleixner    This interface is used when available.
1939702785aSThomas Gleixner */
1949702785aSThomas Gleixner 
1959702785aSThomas Gleixner 
1969702785aSThomas Gleixner /*
1979702785aSThomas Gleixner   Get a hypervisor absolute time.  In theory we could maintain an
1989702785aSThomas Gleixner   offset between the kernel's time and the hypervisor's time, and
1999702785aSThomas Gleixner   apply that to a kernel's absolute timeout.  Unfortunately the
2009702785aSThomas Gleixner   hypervisor and kernel times can drift even if the kernel is using
2019702785aSThomas Gleixner   the Xen clocksource, because ntp can warp the kernel's clocksource.
2029702785aSThomas Gleixner */
get_abs_timeout(unsigned long delta)2039702785aSThomas Gleixner static s64 get_abs_timeout(unsigned long delta)
2049702785aSThomas Gleixner {
2059702785aSThomas Gleixner 	return xen_clocksource_read() + delta;
2069702785aSThomas Gleixner }
2079702785aSThomas Gleixner 
xen_timerop_shutdown(struct clock_event_device * evt)2089702785aSThomas Gleixner static int xen_timerop_shutdown(struct clock_event_device *evt)
209955381ddSViresh Kumar {
2109702785aSThomas Gleixner 	/* cancel timeout */
211955381ddSViresh Kumar 	HYPERVISOR_set_timer_op(0);
212955381ddSViresh Kumar 
2139702785aSThomas Gleixner 	return 0;
214955381ddSViresh Kumar }
2159702785aSThomas Gleixner 
xen_timerop_set_next_event(unsigned long delta,struct clock_event_device * evt)2169702785aSThomas Gleixner static int xen_timerop_set_next_event(unsigned long delta,
2179702785aSThomas Gleixner 				      struct clock_event_device *evt)
2189702785aSThomas Gleixner {
2199702785aSThomas Gleixner 	WARN_ON(!clockevent_state_oneshot(evt));
220955381ddSViresh Kumar 
2219702785aSThomas Gleixner 	if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
2229702785aSThomas Gleixner 		BUG();
2239702785aSThomas Gleixner 
2249702785aSThomas Gleixner 	/* We may have missed the deadline, but there's no real way of
2259702785aSThomas Gleixner 	   knowing for sure.  If the event was in the past, then we'll
2269702785aSThomas Gleixner 	   get an immediate interrupt. */
2279702785aSThomas Gleixner 
2289702785aSThomas Gleixner 	return 0;
2299702785aSThomas Gleixner }
2309702785aSThomas Gleixner 
2319702785aSThomas Gleixner static struct clock_event_device xen_timerop_clockevent __ro_after_init = {
2322ec16bc0SRyan Thibodeaux 	.name			= "xen",
2339702785aSThomas Gleixner 	.features		= CLOCK_EVT_FEAT_ONESHOT,
2349702785aSThomas Gleixner 
2359702785aSThomas Gleixner 	.max_delta_ns		= 0xffffffff,
2369702785aSThomas Gleixner 	.max_delta_ticks	= 0xffffffff,
2373d18d661SNicolai Stange 	.min_delta_ns		= TIMER_SLOP,
2389702785aSThomas Gleixner 	.min_delta_ticks	= TIMER_SLOP,
2393d18d661SNicolai Stange 
2409702785aSThomas Gleixner 	.mult			= 1,
2419702785aSThomas Gleixner 	.shift			= 0,
2429702785aSThomas Gleixner 	.rating			= 500,
2439702785aSThomas Gleixner 
2449702785aSThomas Gleixner 	.set_state_shutdown	= xen_timerop_shutdown,
245955381ddSViresh Kumar 	.set_next_event		= xen_timerop_set_next_event,
2469702785aSThomas Gleixner };
2479702785aSThomas Gleixner 
xen_vcpuop_shutdown(struct clock_event_device * evt)2489702785aSThomas Gleixner static int xen_vcpuop_shutdown(struct clock_event_device *evt)
249955381ddSViresh Kumar {
2509702785aSThomas Gleixner 	int cpu = smp_processor_id();
2519702785aSThomas Gleixner 
2529702785aSThomas Gleixner 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(cpu),
253ad5475f9SVitaly Kuznetsov 			       NULL) ||
254ad5475f9SVitaly Kuznetsov 	    HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
255ad5475f9SVitaly Kuznetsov 			       NULL))
256ad5475f9SVitaly Kuznetsov 		BUG();
2579702785aSThomas Gleixner 
258955381ddSViresh Kumar 	return 0;
259955381ddSViresh Kumar }
2609702785aSThomas Gleixner 
xen_vcpuop_set_oneshot(struct clock_event_device * evt)261955381ddSViresh Kumar static int xen_vcpuop_set_oneshot(struct clock_event_device *evt)
262955381ddSViresh Kumar {
263955381ddSViresh Kumar 	int cpu = smp_processor_id();
264955381ddSViresh Kumar 
265955381ddSViresh Kumar 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
266ad5475f9SVitaly Kuznetsov 			       NULL))
267ad5475f9SVitaly Kuznetsov 		BUG();
268955381ddSViresh Kumar 
269955381ddSViresh Kumar 	return 0;
270955381ddSViresh Kumar }
2719702785aSThomas Gleixner 
xen_vcpuop_set_next_event(unsigned long delta,struct clock_event_device * evt)2729702785aSThomas Gleixner static int xen_vcpuop_set_next_event(unsigned long delta,
2739702785aSThomas Gleixner 				     struct clock_event_device *evt)
2749702785aSThomas Gleixner {
2759702785aSThomas Gleixner 	int cpu = smp_processor_id();
2769702785aSThomas Gleixner 	struct vcpu_set_singleshot_timer single;
2779702785aSThomas Gleixner 	int ret;
2789702785aSThomas Gleixner 
2799702785aSThomas Gleixner 	WARN_ON(!clockevent_state_oneshot(evt));
280955381ddSViresh Kumar 
2819702785aSThomas Gleixner 	single.timeout_abs_ns = get_abs_timeout(delta);
2829702785aSThomas Gleixner 	/* Get an event anyway, even if the timeout is already expired */
283c06b6d70SStefano Stabellini 	single.flags = 0;
284c06b6d70SStefano Stabellini 
2859702785aSThomas Gleixner 	ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(cpu),
286ad5475f9SVitaly Kuznetsov 				 &single);
287ad5475f9SVitaly Kuznetsov 	BUG_ON(ret != 0);
288c06b6d70SStefano Stabellini 
2899702785aSThomas Gleixner 	return ret;
2909702785aSThomas Gleixner }
2919702785aSThomas Gleixner 
2929702785aSThomas Gleixner static struct clock_event_device xen_vcpuop_clockevent __ro_after_init = {
2932ec16bc0SRyan Thibodeaux 	.name = "xen",
2949702785aSThomas Gleixner 	.features = CLOCK_EVT_FEAT_ONESHOT,
2959702785aSThomas Gleixner 
2969702785aSThomas Gleixner 	.max_delta_ns = 0xffffffff,
2979702785aSThomas Gleixner 	.max_delta_ticks = 0xffffffff,
2983d18d661SNicolai Stange 	.min_delta_ns = TIMER_SLOP,
2999702785aSThomas Gleixner 	.min_delta_ticks = TIMER_SLOP,
3003d18d661SNicolai Stange 
3019702785aSThomas Gleixner 	.mult = 1,
3029702785aSThomas Gleixner 	.shift = 0,
3039702785aSThomas Gleixner 	.rating = 500,
3049702785aSThomas Gleixner 
3059702785aSThomas Gleixner 	.set_state_shutdown = xen_vcpuop_shutdown,
306955381ddSViresh Kumar 	.set_state_oneshot = xen_vcpuop_set_oneshot,
307955381ddSViresh Kumar 	.set_next_event = xen_vcpuop_set_next_event,
3089702785aSThomas Gleixner };
3099702785aSThomas Gleixner 
3109702785aSThomas Gleixner static const struct clock_event_device *xen_clockevent =
3119702785aSThomas Gleixner 	&xen_timerop_clockevent;
3129702785aSThomas Gleixner 
31331620a19SKonrad Rzeszutek Wilk struct xen_clock_event_device {
31431620a19SKonrad Rzeszutek Wilk 	struct clock_event_device evt;
31531620a19SKonrad Rzeszutek Wilk 	char name[16];
3167be0772dSVitaly Kuznetsov };
31731620a19SKonrad Rzeszutek Wilk static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 };
31831620a19SKonrad Rzeszutek Wilk 
xen_timer_interrupt(int irq,void * dev_id)3199702785aSThomas Gleixner static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
3209702785aSThomas Gleixner {
3219702785aSThomas Gleixner 	struct clock_event_device *evt = this_cpu_ptr(&xen_clock_events.evt);
32289cbc767SChristoph Lameter 	irqreturn_t ret;
3239702785aSThomas Gleixner 
3249702785aSThomas Gleixner 	ret = IRQ_NONE;
3259702785aSThomas Gleixner 	if (evt->event_handler) {
3269702785aSThomas Gleixner 		evt->event_handler(evt);
3279702785aSThomas Gleixner 		ret = IRQ_HANDLED;
3289702785aSThomas Gleixner 	}
3299702785aSThomas Gleixner 
3309702785aSThomas Gleixner 	return ret;
3319702785aSThomas Gleixner }
3329702785aSThomas Gleixner 
xen_teardown_timer(int cpu)3339702785aSThomas Gleixner void xen_teardown_timer(int cpu)
33409e99da7SKonrad Rzeszutek Wilk {
33509e99da7SKonrad Rzeszutek Wilk 	struct clock_event_device *evt;
33609e99da7SKonrad Rzeszutek Wilk 	evt = &per_cpu(xen_clock_events, cpu).evt;
33709e99da7SKonrad Rzeszutek Wilk 
33809e99da7SKonrad Rzeszutek Wilk 	if (evt->irq >= 0) {
33909e99da7SKonrad Rzeszutek Wilk 		unbind_from_irqhandler(evt->irq, NULL);
34009e99da7SKonrad Rzeszutek Wilk 		evt->irq = -1;
34109e99da7SKonrad Rzeszutek Wilk 	}
34209e99da7SKonrad Rzeszutek Wilk }
34309e99da7SKonrad Rzeszutek Wilk 
xen_setup_timer(int cpu)34409e99da7SKonrad Rzeszutek Wilk void xen_setup_timer(int cpu)
3459702785aSThomas Gleixner {
3469702785aSThomas Gleixner 	struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu);
3477be0772dSVitaly Kuznetsov 	struct clock_event_device *evt = &xevt->evt;
3487be0772dSVitaly Kuznetsov 	int irq;
3499702785aSThomas Gleixner 
3509702785aSThomas Gleixner 	WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
351ef35a4e6SKonrad Rzeszutek Wilk 	if (evt->irq >= 0)
35209e99da7SKonrad Rzeszutek Wilk 		xen_teardown_timer(cpu);
35309e99da7SKonrad Rzeszutek Wilk 
354ef35a4e6SKonrad Rzeszutek Wilk 	printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
3559702785aSThomas Gleixner 
3569702785aSThomas Gleixner 	snprintf(xevt->name, sizeof(xevt->name), "timer%d", cpu);
3577be0772dSVitaly Kuznetsov 
3589702785aSThomas Gleixner 	irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
3599702785aSThomas Gleixner 				      IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
3609d71cee6SMichael Opdenacker 				      IRQF_FORCE_RESUME|IRQF_EARLY_RESUME,
3618d5999dfSDavid Vrabel 				      xevt->name, NULL);
3627be0772dSVitaly Kuznetsov 	(void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
3638785c676SDavid Vrabel 
3649702785aSThomas Gleixner 	memcpy(evt, xen_clockevent, sizeof(*evt));
3659702785aSThomas Gleixner 
3669702785aSThomas Gleixner 	evt->cpumask = cpumask_of(cpu);
367320ab2b0SRusty Russell 	evt->irq = irq;
3689702785aSThomas Gleixner }
3699702785aSThomas Gleixner 
3709702785aSThomas Gleixner 
xen_setup_cpu_clockevents(void)371d68d82afSAlex Nixon void xen_setup_cpu_clockevents(void)
3729702785aSThomas Gleixner {
3739702785aSThomas Gleixner 	clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt));
37489cbc767SChristoph Lameter }
3759702785aSThomas Gleixner 
xen_timer_resume(void)3769702785aSThomas Gleixner void xen_timer_resume(void)
377d07af1f0SJeremy Fitzhardinge {
378d07af1f0SJeremy Fitzhardinge 	int cpu;
379d07af1f0SJeremy Fitzhardinge 
380d07af1f0SJeremy Fitzhardinge 	if (xen_clockevent != &xen_vcpuop_clockevent)
381d07af1f0SJeremy Fitzhardinge 		return;
382d07af1f0SJeremy Fitzhardinge 
383d07af1f0SJeremy Fitzhardinge 	for_each_online_cpu(cpu) {
384d07af1f0SJeremy Fitzhardinge 		if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer,
385ad5475f9SVitaly Kuznetsov 				       xen_vcpu_nr(cpu), NULL))
386ad5475f9SVitaly Kuznetsov 			BUG();
387d07af1f0SJeremy Fitzhardinge 	}
388d07af1f0SJeremy Fitzhardinge }
389d07af1f0SJeremy Fitzhardinge 
390d07af1f0SJeremy Fitzhardinge static struct pvclock_vsyscall_time_info *xen_clock __read_mostly;
3912229f70bSJoao Martins static u64 xen_clock_value_saved;
392867cefb4SJuergen Gross 
xen_save_time_memory_area(void)3932229f70bSJoao Martins void xen_save_time_memory_area(void)
3942229f70bSJoao Martins {
3952229f70bSJoao Martins 	struct vcpu_register_time_memory_area t;
3962229f70bSJoao Martins 	int ret;
3972229f70bSJoao Martins 
3982229f70bSJoao Martins 	xen_clock_value_saved = xen_clocksource_read() - xen_sched_clock_offset;
399867cefb4SJuergen Gross 
400867cefb4SJuergen Gross 	if (!xen_clock)
4012229f70bSJoao Martins 		return;
4022229f70bSJoao Martins 
4032229f70bSJoao Martins 	t.addr.v = NULL;
4042229f70bSJoao Martins 
4052229f70bSJoao Martins 	ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
4062229f70bSJoao Martins 	if (ret != 0)
4072229f70bSJoao Martins 		pr_notice("Cannot save secondary vcpu_time_info (err %d)",
4082229f70bSJoao Martins 			  ret);
4092229f70bSJoao Martins 	else
4102229f70bSJoao Martins 		clear_page(xen_clock);
4112229f70bSJoao Martins }
4122229f70bSJoao Martins 
xen_restore_time_memory_area(void)4132229f70bSJoao Martins void xen_restore_time_memory_area(void)
4142229f70bSJoao Martins {
4152229f70bSJoao Martins 	struct vcpu_register_time_memory_area t;
4162229f70bSJoao Martins 	int ret;
4172229f70bSJoao Martins 
4182229f70bSJoao Martins 	if (!xen_clock)
4192229f70bSJoao Martins 		goto out;
420867cefb4SJuergen Gross 
4212229f70bSJoao Martins 	t.addr.v = &xen_clock->pvti;
4222229f70bSJoao Martins 
4232229f70bSJoao Martins 	ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
4242229f70bSJoao Martins 
4252229f70bSJoao Martins 	/*
4262229f70bSJoao Martins 	 * We don't disable VDSO_CLOCKMODE_PVCLOCK entirely if it fails to
427b95a8a27SThomas Gleixner 	 * register the secondary time info with Xen or if we migrated to a
428b95a8a27SThomas Gleixner 	 * host without the necessary flags. On both of these cases what
429b95a8a27SThomas Gleixner 	 * happens is either process seeing a zeroed out pvti or seeing no
430b95a8a27SThomas Gleixner 	 * PVCLOCK_TSC_STABLE_BIT bit set. Userspace checks the latter and
431b95a8a27SThomas Gleixner 	 * if 0, it discards the data in pvti and fallbacks to a system
432b95a8a27SThomas Gleixner 	 * call for a reliable timestamp.
433b95a8a27SThomas Gleixner 	 */
4342229f70bSJoao Martins 	if (ret != 0)
4352229f70bSJoao Martins 		pr_notice("Cannot restore secondary vcpu_time_info (err %d)",
4362229f70bSJoao Martins 			  ret);
4372229f70bSJoao Martins 
438867cefb4SJuergen Gross out:
439867cefb4SJuergen Gross 	/* Need pvclock_resume() before using xen_clocksource_read(). */
440867cefb4SJuergen Gross 	pvclock_resume();
441867cefb4SJuergen Gross 	xen_sched_clock_offset = xen_clocksource_read() - xen_clock_value_saved;
442867cefb4SJuergen Gross }
4432229f70bSJoao Martins 
xen_setup_vsyscall_time_info(void)4442229f70bSJoao Martins static void xen_setup_vsyscall_time_info(void)
4452229f70bSJoao Martins {
4462229f70bSJoao Martins 	struct vcpu_register_time_memory_area t;
4472229f70bSJoao Martins 	struct pvclock_vsyscall_time_info *ti;
4482229f70bSJoao Martins 	int ret;
4492229f70bSJoao Martins 
4502229f70bSJoao Martins 	ti = (struct pvclock_vsyscall_time_info *)get_zeroed_page(GFP_KERNEL);
4512229f70bSJoao Martins 	if (!ti)
4522229f70bSJoao Martins 		return;
4532229f70bSJoao Martins 
4542229f70bSJoao Martins 	t.addr.v = &ti->pvti;
4552229f70bSJoao Martins 
4562229f70bSJoao Martins 	ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
4572229f70bSJoao Martins 	if (ret) {
4582229f70bSJoao Martins 		pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (err %d)\n", ret);
459b95a8a27SThomas Gleixner 		free_page((unsigned long)ti);
4602229f70bSJoao Martins 		return;
4612229f70bSJoao Martins 	}
4622229f70bSJoao Martins 
4632229f70bSJoao Martins 	/*
4642229f70bSJoao Martins 	 * If primary time info had this bit set, secondary should too since
4652229f70bSJoao Martins 	 * it's the same data on both just different memory regions. But we
4662229f70bSJoao Martins 	 * still check it in case hypervisor is buggy.
4672229f70bSJoao Martins 	 */
4682229f70bSJoao Martins 	if (!(ti->pvti.flags & PVCLOCK_TSC_STABLE_BIT)) {
4692229f70bSJoao Martins 		t.addr.v = NULL;
4702229f70bSJoao Martins 		ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area,
4712229f70bSJoao Martins 					 0, &t);
4722229f70bSJoao Martins 		if (!ret)
4732229f70bSJoao Martins 			free_page((unsigned long)ti);
4742229f70bSJoao Martins 
4752229f70bSJoao Martins 		pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (tsc unstable)\n");
476b95a8a27SThomas Gleixner 		return;
4772229f70bSJoao Martins 	}
4782229f70bSJoao Martins 
4792229f70bSJoao Martins 	xen_clock = ti;
4802229f70bSJoao Martins 	pvclock_set_pvti_cpu0_va(xen_clock);
4812229f70bSJoao Martins 
4822229f70bSJoao Martins 	xen_clocksource.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
483b95a8a27SThomas Gleixner }
4842229f70bSJoao Martins 
4852229f70bSJoao Martins /*
486caea091eSKrister Johansen  * Check if it is possible to safely use the tsc as a clocksource.  This is
487caea091eSKrister Johansen  * only true if the hypervisor notifies the guest that its tsc is invariant,
488caea091eSKrister Johansen  * the tsc is stable, and the tsc instruction will never be emulated.
489caea091eSKrister Johansen  */
xen_tsc_safe_clocksource(void)490caea091eSKrister Johansen static int __init xen_tsc_safe_clocksource(void)
491caea091eSKrister Johansen {
492caea091eSKrister Johansen 	u32 eax, ebx, ecx, edx;
493caea091eSKrister Johansen 
494caea091eSKrister Johansen 	if (!(boot_cpu_has(X86_FEATURE_CONSTANT_TSC)))
495caea091eSKrister Johansen 		return 0;
496caea091eSKrister Johansen 
497caea091eSKrister Johansen 	if (!(boot_cpu_has(X86_FEATURE_NONSTOP_TSC)))
498caea091eSKrister Johansen 		return 0;
499caea091eSKrister Johansen 
500caea091eSKrister Johansen 	if (check_tsc_unstable())
501caea091eSKrister Johansen 		return 0;
502caea091eSKrister Johansen 
503caea091eSKrister Johansen 	/* Leaf 4, sub-leaf 0 (0x40000x03) */
504caea091eSKrister Johansen 	cpuid_count(xen_cpuid_base() + 3, 0, &eax, &ebx, &ecx, &edx);
505caea091eSKrister Johansen 
506caea091eSKrister Johansen 	return ebx == XEN_CPUID_TSC_MODE_NEVER_EMULATE;
507*99a7bcafSKrister Johansen }
508caea091eSKrister Johansen 
xen_time_init(void)509caea091eSKrister Johansen static void __init xen_time_init(void)
510fb6ce5deSDaniel Kiper {
5119702785aSThomas Gleixner 	struct pvclock_vcpu_time_info *pvti;
512b8888080SJoao Martins 	int cpu = smp_processor_id();
5139702785aSThomas Gleixner 	struct timespec64 tp;
514e27c4929SArnd Bergmann 
5159702785aSThomas Gleixner 	/*
516caea091eSKrister Johansen 	 * As Dom0 is never moved, no penalty on using TSC there.
517caea091eSKrister Johansen 	 *
518caea091eSKrister Johansen 	 * If it is possible for the guest to determine that the tsc is a safe
519caea091eSKrister Johansen 	 * clocksource, then set xen_clocksource rating below that of the tsc
520caea091eSKrister Johansen 	 * so that the system prefers tsc instead.
521caea091eSKrister Johansen 	 */
522caea091eSKrister Johansen 	if (xen_initial_domain())
52394dd85f6SPalik, Imre 		xen_clocksource.rating = 275;
52494dd85f6SPalik, Imre 	else if (xen_tsc_safe_clocksource())
525caea091eSKrister Johansen 		xen_clocksource.rating = 299;
526caea091eSKrister Johansen 
52794dd85f6SPalik, Imre 	clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
528b01cc1b0SJohn Stultz 
5299702785aSThomas Gleixner 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
530ad5475f9SVitaly Kuznetsov 			       NULL) == 0) {
531ad5475f9SVitaly Kuznetsov 		/* Successfully turned off 100Hz tick, so we have the
5329702785aSThomas Gleixner 		   vcpuop-based timer interface */
5339702785aSThomas Gleixner 		printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
5349702785aSThomas Gleixner 		xen_clockevent = &xen_vcpuop_clockevent;
5359702785aSThomas Gleixner 	}
5369702785aSThomas Gleixner 
5379702785aSThomas Gleixner 	/* Set initial system time with full resolution */
5389702785aSThomas Gleixner 	xen_read_wallclock(&tp);
539c4507257SJohn Stultz 	do_settimeofday64(&tp);
540e27c4929SArnd Bergmann 
5419702785aSThomas Gleixner 	setup_force_cpu_cap(X86_FEATURE_TSC);
542404ee5b1SAndi Kleen 
5439702785aSThomas Gleixner 	/*
544b8888080SJoao Martins 	 * We check ahead on the primary time info if this
545b8888080SJoao Martins 	 * bit is supported hence speeding up Xen clocksource.
546b8888080SJoao Martins 	 */
547b8888080SJoao Martins 	pvti = &__this_cpu_read(xen_vcpu)->time;
548b8888080SJoao Martins 	if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) {
5492229f70bSJoao Martins 		pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
550b8888080SJoao Martins 		xen_setup_vsyscall_time_info();
5512229f70bSJoao Martins 	}
5522229f70bSJoao Martins 
553b8888080SJoao Martins 	xen_setup_runstate_info(cpu);
554be012920SIan Campbell 	xen_setup_timer(cpu);
5559702785aSThomas Gleixner 	xen_setup_cpu_clockevents();
5569702785aSThomas Gleixner 
5575584880eSDavid Vrabel 	xen_time_setup_guest();
558ecb23dc6SJuergen Gross 
559ecb23dc6SJuergen Gross 	if (xen_initial_domain())
5605584880eSDavid Vrabel 		pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
5615584880eSDavid Vrabel }
5629702785aSThomas Gleixner 
xen_init_time_common(void)563409771d2SStefano Stabellini static void __init xen_init_time_common(void)
564a0e2bf7cSJuergen Gross {
565409771d2SStefano Stabellini 	xen_sched_clock_offset = xen_clocksource_read();
56638669ba2SPavel Tatashin 	static_call_update(pv_steal_clock, xen_steal_clock);
567a0e2bf7cSJuergen Gross 	paravirt_set_sched_clock(xen_sched_clock);
568a0e2bf7cSJuergen Gross 
569a0e2bf7cSJuergen Gross 	x86_platform.calibrate_tsc = xen_tsc_khz;
570a0e2bf7cSJuergen Gross 	x86_platform.get_wallclock = xen_get_wallclock;
571a0e2bf7cSJuergen Gross }
572a0e2bf7cSJuergen Gross 
xen_init_time_ops(void)573a0e2bf7cSJuergen Gross void __init xen_init_time_ops(void)
574a0e2bf7cSJuergen Gross {
575a0e2bf7cSJuergen Gross 	xen_init_time_common();
576a0e2bf7cSJuergen Gross 
577409771d2SStefano Stabellini 	x86_init.timers.timer_init = xen_time_init;
578409771d2SStefano Stabellini 	x86_init.timers.setup_percpu_clockev = x86_init_noop;
579409771d2SStefano Stabellini 	x86_cpuinit.setup_percpu_clockev = x86_init_noop;
580409771d2SStefano Stabellini 
581409771d2SStefano Stabellini 	/* Dom0 uses the native method to set the hardware RTC. */
58247433b8cSDavid Vrabel 	if (!xen_initial_domain())
58347433b8cSDavid Vrabel 		x86_platform.set_wallclock = xen_set_wallclock;
584409771d2SStefano Stabellini }
585409771d2SStefano Stabellini 
586409771d2SStefano Stabellini #ifdef CONFIG_XEN_PVHVM
xen_hvm_setup_cpu_clockevents(void)587ca65f9fcSStefano Stabellini static void xen_hvm_setup_cpu_clockevents(void)
588409771d2SStefano Stabellini {
589409771d2SStefano Stabellini 	int cpu = smp_processor_id();
590409771d2SStefano Stabellini 	xen_setup_runstate_info(cpu);
591409771d2SStefano Stabellini 	/*
5927918c92aSKonrad Rzeszutek Wilk 	 * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence
5937918c92aSKonrad Rzeszutek Wilk 	 * doing it xen_hvm_cpu_notify (which gets called by smp_init during
5947918c92aSKonrad Rzeszutek Wilk 	 * early bootup and also during CPU hotplug events).
5957918c92aSKonrad Rzeszutek Wilk 	 */
5967918c92aSKonrad Rzeszutek Wilk 	xen_setup_cpu_clockevents();
597409771d2SStefano Stabellini }
598409771d2SStefano Stabellini 
xen_hvm_init_time_ops(void)599409771d2SStefano Stabellini void __init xen_hvm_init_time_ops(void)
600fb6ce5deSDaniel Kiper {
601409771d2SStefano Stabellini 	static bool hvm_time_initialized;
602eed05744SDongli Zhang 
603eed05744SDongli Zhang 	if (hvm_time_initialized)
604eed05744SDongli Zhang 		return;
605eed05744SDongli Zhang 
606eed05744SDongli Zhang 	/*
60784d582d2SBoris Ostrovsky 	 * vector callback is needed otherwise we cannot receive interrupts
60884d582d2SBoris Ostrovsky 	 * on cpu > 0 and at this point we don't know how many cpus are
60984d582d2SBoris Ostrovsky 	 * available.
61084d582d2SBoris Ostrovsky 	 */
61184d582d2SBoris Ostrovsky 	if (!xen_have_vector_callback)
61284d582d2SBoris Ostrovsky 		return;
61384d582d2SBoris Ostrovsky 
61484d582d2SBoris Ostrovsky 	if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
615409771d2SStefano Stabellini 		pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer");
616eed05744SDongli Zhang 		return;
617eed05744SDongli Zhang 	}
618eed05744SDongli Zhang 
619eed05744SDongli Zhang 	/*
620eed05744SDongli Zhang 	 * Only MAX_VIRT_CPUS 'vcpu_info' are embedded inside 'shared_info'.
621eed05744SDongli Zhang 	 * The __this_cpu_read(xen_vcpu) is still NULL when Xen HVM guest
622eed05744SDongli Zhang 	 * boots on vcpu >= MAX_VIRT_CPUS (e.g., kexec), To access
623eed05744SDongli Zhang 	 * __this_cpu_read(xen_vcpu) via xen_clocksource_read() will panic.
624eed05744SDongli Zhang 	 *
625eed05744SDongli Zhang 	 * The xen_hvm_init_time_ops() should be called again later after
626eed05744SDongli Zhang 	 * __this_cpu_read(xen_vcpu) is available.
627eed05744SDongli Zhang 	 */
628eed05744SDongli Zhang 	if (!__this_cpu_read(xen_vcpu)) {
629eed05744SDongli Zhang 		pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n",
630eed05744SDongli Zhang 			xen_vcpu_nr(0));
631eed05744SDongli Zhang 		return;
632409771d2SStefano Stabellini 	}
633409771d2SStefano Stabellini 
634409771d2SStefano Stabellini 	xen_init_time_common();
635a0e2bf7cSJuergen Gross 
636a0e2bf7cSJuergen Gross 	x86_init.timers.setup_percpu_clockev = xen_time_init;
637409771d2SStefano Stabellini 	x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
638409771d2SStefano Stabellini 
639409771d2SStefano Stabellini 	x86_platform.set_wallclock = xen_set_wallclock;
640409771d2SStefano Stabellini 
641eed05744SDongli Zhang 	hvm_time_initialized = true;
642eed05744SDongli Zhang }
643409771d2SStefano Stabellini #endif
644ca65f9fcSStefano Stabellini 
6452ec16bc0SRyan Thibodeaux /* Kernel parameter to specify Xen timer slop */
parse_xen_timer_slop(char * ptr)6462ec16bc0SRyan Thibodeaux static int __init parse_xen_timer_slop(char *ptr)
6472ec16bc0SRyan Thibodeaux {
6482ec16bc0SRyan Thibodeaux 	unsigned long slop = memparse(ptr, NULL);
6492ec16bc0SRyan Thibodeaux 
6502ec16bc0SRyan Thibodeaux 	xen_timerop_clockevent.min_delta_ns = slop;
6512ec16bc0SRyan Thibodeaux 	xen_timerop_clockevent.min_delta_ticks = slop;
6522ec16bc0SRyan Thibodeaux 	xen_vcpuop_clockevent.min_delta_ns = slop;
6532ec16bc0SRyan Thibodeaux 	xen_vcpuop_clockevent.min_delta_ticks = slop;
6542ec16bc0SRyan Thibodeaux 
6552ec16bc0SRyan Thibodeaux 	return 0;
6562ec16bc0SRyan Thibodeaux }
6572ec16bc0SRyan Thibodeaux early_param("xen_timer_slop", parse_xen_timer_slop);
6582ec16bc0SRyan Thibodeaux