1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
29702785aSThomas Gleixner /*
39702785aSThomas Gleixner * Xen time implementation.
49702785aSThomas Gleixner *
59702785aSThomas Gleixner * This is implemented in terms of a clocksource driver which uses
69702785aSThomas Gleixner * the hypervisor clock as a nanosecond timebase, and a clockevent
79702785aSThomas Gleixner * driver which uses the hypervisor's timer mechanism.
89702785aSThomas Gleixner *
99702785aSThomas Gleixner * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
109702785aSThomas Gleixner */
119702785aSThomas Gleixner #include <linux/kernel.h>
129702785aSThomas Gleixner #include <linux/interrupt.h>
139702785aSThomas Gleixner #include <linux/clocksource.h>
149702785aSThomas Gleixner #include <linux/clockchips.h>
155a0e3ad6STejun Heo #include <linux/gfp.h>
16c9d76a24SKonrad Rzeszutek Wilk #include <linux/slab.h>
175584880eSDavid Vrabel #include <linux/pvclock_gtod.h>
1876096863SStefano Stabellini #include <linux/timekeeper_internal.h>
199702785aSThomas Gleixner
201c7b67f7SGerd Hoffmann #include <asm/pvclock.h>
219702785aSThomas Gleixner #include <asm/xen/hypervisor.h>
229702785aSThomas Gleixner #include <asm/xen/hypercall.h>
23*99a7bcafSKrister Johansen #include <asm/xen/cpuid.h>
249702785aSThomas Gleixner
259702785aSThomas Gleixner #include <xen/events.h>
26409771d2SStefano Stabellini #include <xen/features.h>
279702785aSThomas Gleixner #include <xen/interface/xen.h>
289702785aSThomas Gleixner #include <xen/interface/vcpu.h>
299702785aSThomas Gleixner
309702785aSThomas Gleixner #include "xen-ops.h"
319702785aSThomas Gleixner
322ec16bc0SRyan Thibodeaux /* Minimum amount of time until next clock event fires */
339702785aSThomas Gleixner #define TIMER_SLOP 100000
349702785aSThomas Gleixner
3538669ba2SPavel Tatashin static u64 xen_sched_clock_offset __read_mostly;
3638669ba2SPavel Tatashin
37e93ef949SAlok Kataria /* Get the TSC speed from Xen */
xen_tsc_khz(void)38409771d2SStefano Stabellini static unsigned long xen_tsc_khz(void)
399702785aSThomas Gleixner {
403807f345SGlauber Costa struct pvclock_vcpu_time_info *info =
419702785aSThomas Gleixner &HYPERVISOR_shared_info->vcpu_info[0].time;
429702785aSThomas Gleixner
43898ec52dSHayato Ohhashi setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
443807f345SGlauber Costa return pvclock_tsc_khz(info);
459702785aSThomas Gleixner }
469702785aSThomas Gleixner
xen_clocksource_read(void)477b25b9cbSPavel Tatashin static u64 xen_clocksource_read(void)
489702785aSThomas Gleixner {
491c7b67f7SGerd Hoffmann struct pvclock_vcpu_time_info *src;
50a5a1d1c2SThomas Gleixner u64 ret;
519702785aSThomas Gleixner
52f1c39625SJeremy Fitzhardinge preempt_disable_notrace();
533251f20bSBoris Ostrovsky src = &__this_cpu_read(xen_vcpu)->time;
541c7b67f7SGerd Hoffmann ret = pvclock_clocksource_read(src);
55f1c39625SJeremy Fitzhardinge preempt_enable_notrace();
569702785aSThomas Gleixner return ret;
579702785aSThomas Gleixner }
589702785aSThomas Gleixner
xen_clocksource_get_cycles(struct clocksource * cs)59a5a1d1c2SThomas Gleixner static u64 xen_clocksource_get_cycles(struct clocksource *cs)
608e19608eSMagnus Damm {
618e19608eSMagnus Damm return xen_clocksource_read();
628e19608eSMagnus Damm }
638e19608eSMagnus Damm
xen_sched_clock(void)648739c681SPeter Zijlstra static noinstr u64 xen_sched_clock(void)
6538669ba2SPavel Tatashin {
668739c681SPeter Zijlstra struct pvclock_vcpu_time_info *src;
678739c681SPeter Zijlstra u64 ret;
688739c681SPeter Zijlstra
698739c681SPeter Zijlstra src = &__this_cpu_read(xen_vcpu)->time;
708739c681SPeter Zijlstra ret = pvclock_clocksource_read_nowd(src);
718739c681SPeter Zijlstra ret -= xen_sched_clock_offset;
728739c681SPeter Zijlstra
738739c681SPeter Zijlstra return ret;
748739c681SPeter Zijlstra }
7538669ba2SPavel Tatashin
xen_read_wallclock(struct timespec64 * ts)7638669ba2SPavel Tatashin static void xen_read_wallclock(struct timespec64 *ts)
77e27c4929SArnd Bergmann {
789702785aSThomas Gleixner struct shared_info *s = HYPERVISOR_shared_info;
791c7b67f7SGerd Hoffmann struct pvclock_wall_clock *wall_clock = &(s->wc);
801c7b67f7SGerd Hoffmann struct pvclock_vcpu_time_info *vcpu_time;
811c7b67f7SGerd Hoffmann
829702785aSThomas Gleixner vcpu_time = &get_cpu_var(xen_vcpu)->time;
831c7b67f7SGerd Hoffmann pvclock_read_wallclock(wall_clock, vcpu_time, ts);
841c7b67f7SGerd Hoffmann put_cpu_var(xen_vcpu);
851c7b67f7SGerd Hoffmann }
869702785aSThomas Gleixner
xen_get_wallclock(struct timespec64 * now)879702785aSThomas Gleixner static void xen_get_wallclock(struct timespec64 *now)
88e27c4929SArnd Bergmann {
899702785aSThomas Gleixner xen_read_wallclock(now);
903565184eSDavid Vrabel }
919702785aSThomas Gleixner
xen_set_wallclock(const struct timespec64 * now)929702785aSThomas Gleixner static int xen_set_wallclock(const struct timespec64 *now)
93e27c4929SArnd Bergmann {
949702785aSThomas Gleixner return -ENODEV;
95b5494ad8SBoris Ostrovsky }
969702785aSThomas Gleixner
xen_pvclock_gtod_notify(struct notifier_block * nb,unsigned long was_set,void * priv)979702785aSThomas Gleixner static int xen_pvclock_gtod_notify(struct notifier_block *nb,
9847433b8cSDavid Vrabel unsigned long was_set, void *priv)
9947433b8cSDavid Vrabel {
1005584880eSDavid Vrabel /* Protected by the calling core code serialization */
10147433b8cSDavid Vrabel static struct timespec64 next_sync;
102187b26a9SStefano Stabellini
1035584880eSDavid Vrabel struct xen_platform_op op;
10447433b8cSDavid Vrabel struct timespec64 now;
10576096863SStefano Stabellini struct timekeeper *tk = priv;
10676096863SStefano Stabellini static bool settime64_supported = true;
10776096863SStefano Stabellini int ret;
10876096863SStefano Stabellini
1095584880eSDavid Vrabel now.tv_sec = tk->xtime_sec;
11076096863SStefano Stabellini now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
11176096863SStefano Stabellini
1125584880eSDavid Vrabel /*
11347433b8cSDavid Vrabel * We only take the expensive HV call when the clock was set
11447433b8cSDavid Vrabel * or when the 11 minutes RTC synchronization time elapsed.
11547433b8cSDavid Vrabel */
11647433b8cSDavid Vrabel if (!was_set && timespec64_compare(&now, &next_sync) < 0)
117187b26a9SStefano Stabellini return NOTIFY_OK;
11847433b8cSDavid Vrabel
1199702785aSThomas Gleixner again:
12076096863SStefano Stabellini if (settime64_supported) {
12176096863SStefano Stabellini op.cmd = XENPF_settime64;
12276096863SStefano Stabellini op.u.settime64.mbz = 0;
12376096863SStefano Stabellini op.u.settime64.secs = now.tv_sec;
12476096863SStefano Stabellini op.u.settime64.nsecs = now.tv_nsec;
12576096863SStefano Stabellini op.u.settime64.system_time = xen_clocksource_read();
12676096863SStefano Stabellini } else {
12776096863SStefano Stabellini op.cmd = XENPF_settime32;
128f3d6027eSStefano Stabellini op.u.settime32.secs = now.tv_sec;
129f3d6027eSStefano Stabellini op.u.settime32.nsecs = now.tv_nsec;
130f3d6027eSStefano Stabellini op.u.settime32.system_time = xen_clocksource_read();
131f3d6027eSStefano Stabellini }
13276096863SStefano Stabellini
1339702785aSThomas Gleixner ret = HYPERVISOR_platform_op(&op);
13476096863SStefano Stabellini
13576096863SStefano Stabellini if (ret == -ENOSYS && settime64_supported) {
13676096863SStefano Stabellini settime64_supported = false;
13776096863SStefano Stabellini goto again;
13876096863SStefano Stabellini }
13976096863SStefano Stabellini if (ret < 0)
14076096863SStefano Stabellini return NOTIFY_BAD;
14176096863SStefano Stabellini
1429702785aSThomas Gleixner /*
14347433b8cSDavid Vrabel * Move the next drift compensation time 11 minutes
14447433b8cSDavid Vrabel * ahead. That's emulating the sync_cmos_clock() update for
14547433b8cSDavid Vrabel * the hardware RTC.
14647433b8cSDavid Vrabel */
14747433b8cSDavid Vrabel next_sync = now;
14847433b8cSDavid Vrabel next_sync.tv_sec += 11 * 60;
14947433b8cSDavid Vrabel
15047433b8cSDavid Vrabel return NOTIFY_OK;
1515584880eSDavid Vrabel }
1529702785aSThomas Gleixner
1539702785aSThomas Gleixner static struct notifier_block xen_pvclock_gtod_notifier = {
1545584880eSDavid Vrabel .notifier_call = xen_pvclock_gtod_notify,
1555584880eSDavid Vrabel };
1565584880eSDavid Vrabel
xen_cs_enable(struct clocksource * cs)1575584880eSDavid Vrabel static int xen_cs_enable(struct clocksource *cs)
158eec399ddSThomas Gleixner {
159eec399ddSThomas Gleixner vclocks_set_used(VDSO_CLOCKMODE_PVCLOCK);
160b95a8a27SThomas Gleixner return 0;
161eec399ddSThomas Gleixner }
162eec399ddSThomas Gleixner
163eec399ddSThomas Gleixner static struct clocksource xen_clocksource __read_mostly = {
1649702785aSThomas Gleixner .name = "xen",
1659702785aSThomas Gleixner .rating = 400,
1669702785aSThomas Gleixner .read = xen_clocksource_get_cycles,
1679702785aSThomas Gleixner .mask = CLOCKSOURCE_MASK(64),
168eec399ddSThomas Gleixner .flags = CLOCK_SOURCE_IS_CONTINUOUS,
1699702785aSThomas Gleixner .enable = xen_cs_enable,
170eec399ddSThomas Gleixner };
1719702785aSThomas Gleixner
1729702785aSThomas Gleixner /*
1739702785aSThomas Gleixner Xen clockevent implementation
1749702785aSThomas Gleixner
1759702785aSThomas Gleixner Xen has two clockevent implementations:
1769702785aSThomas Gleixner
1779702785aSThomas Gleixner The old timer_op one works with all released versions of Xen prior
1789702785aSThomas Gleixner to version 3.0.4. This version of the hypervisor provides a
1799702785aSThomas Gleixner single-shot timer with nanosecond resolution. However, sharing the
1809702785aSThomas Gleixner same event channel is a 100Hz tick which is delivered while the
1819702785aSThomas Gleixner vcpu is running. We don't care about or use this tick, but it will
1829702785aSThomas Gleixner cause the core time code to think the timer fired too soon, and
1839702785aSThomas Gleixner will end up resetting it each time. It could be filtered, but
1849702785aSThomas Gleixner doing so has complications when the ktime clocksource is not yet
1859702785aSThomas Gleixner the xen clocksource (ie, at boot time).
1869702785aSThomas Gleixner
1879702785aSThomas Gleixner The new vcpu_op-based timer interface allows the tick timer period
1889702785aSThomas Gleixner to be changed or turned off. The tick timer is not useful as a
1899702785aSThomas Gleixner periodic timer because events are only delivered to running vcpus.
1909702785aSThomas Gleixner The one-shot timer can report when a timeout is in the past, so
1919702785aSThomas Gleixner set_next_event is capable of returning -ETIME when appropriate.
1929702785aSThomas Gleixner This interface is used when available.
1939702785aSThomas Gleixner */
1949702785aSThomas Gleixner
1959702785aSThomas Gleixner
1969702785aSThomas Gleixner /*
1979702785aSThomas Gleixner Get a hypervisor absolute time. In theory we could maintain an
1989702785aSThomas Gleixner offset between the kernel's time and the hypervisor's time, and
1999702785aSThomas Gleixner apply that to a kernel's absolute timeout. Unfortunately the
2009702785aSThomas Gleixner hypervisor and kernel times can drift even if the kernel is using
2019702785aSThomas Gleixner the Xen clocksource, because ntp can warp the kernel's clocksource.
2029702785aSThomas Gleixner */
get_abs_timeout(unsigned long delta)2039702785aSThomas Gleixner static s64 get_abs_timeout(unsigned long delta)
2049702785aSThomas Gleixner {
2059702785aSThomas Gleixner return xen_clocksource_read() + delta;
2069702785aSThomas Gleixner }
2079702785aSThomas Gleixner
xen_timerop_shutdown(struct clock_event_device * evt)2089702785aSThomas Gleixner static int xen_timerop_shutdown(struct clock_event_device *evt)
209955381ddSViresh Kumar {
2109702785aSThomas Gleixner /* cancel timeout */
211955381ddSViresh Kumar HYPERVISOR_set_timer_op(0);
212955381ddSViresh Kumar
2139702785aSThomas Gleixner return 0;
214955381ddSViresh Kumar }
2159702785aSThomas Gleixner
xen_timerop_set_next_event(unsigned long delta,struct clock_event_device * evt)2169702785aSThomas Gleixner static int xen_timerop_set_next_event(unsigned long delta,
2179702785aSThomas Gleixner struct clock_event_device *evt)
2189702785aSThomas Gleixner {
2199702785aSThomas Gleixner WARN_ON(!clockevent_state_oneshot(evt));
220955381ddSViresh Kumar
2219702785aSThomas Gleixner if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
2229702785aSThomas Gleixner BUG();
2239702785aSThomas Gleixner
2249702785aSThomas Gleixner /* We may have missed the deadline, but there's no real way of
2259702785aSThomas Gleixner knowing for sure. If the event was in the past, then we'll
2269702785aSThomas Gleixner get an immediate interrupt. */
2279702785aSThomas Gleixner
2289702785aSThomas Gleixner return 0;
2299702785aSThomas Gleixner }
2309702785aSThomas Gleixner
2319702785aSThomas Gleixner static struct clock_event_device xen_timerop_clockevent __ro_after_init = {
2322ec16bc0SRyan Thibodeaux .name = "xen",
2339702785aSThomas Gleixner .features = CLOCK_EVT_FEAT_ONESHOT,
2349702785aSThomas Gleixner
2359702785aSThomas Gleixner .max_delta_ns = 0xffffffff,
2369702785aSThomas Gleixner .max_delta_ticks = 0xffffffff,
2373d18d661SNicolai Stange .min_delta_ns = TIMER_SLOP,
2389702785aSThomas Gleixner .min_delta_ticks = TIMER_SLOP,
2393d18d661SNicolai Stange
2409702785aSThomas Gleixner .mult = 1,
2419702785aSThomas Gleixner .shift = 0,
2429702785aSThomas Gleixner .rating = 500,
2439702785aSThomas Gleixner
2449702785aSThomas Gleixner .set_state_shutdown = xen_timerop_shutdown,
245955381ddSViresh Kumar .set_next_event = xen_timerop_set_next_event,
2469702785aSThomas Gleixner };
2479702785aSThomas Gleixner
xen_vcpuop_shutdown(struct clock_event_device * evt)2489702785aSThomas Gleixner static int xen_vcpuop_shutdown(struct clock_event_device *evt)
249955381ddSViresh Kumar {
2509702785aSThomas Gleixner int cpu = smp_processor_id();
2519702785aSThomas Gleixner
2529702785aSThomas Gleixner if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(cpu),
253ad5475f9SVitaly Kuznetsov NULL) ||
254ad5475f9SVitaly Kuznetsov HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
255ad5475f9SVitaly Kuznetsov NULL))
256ad5475f9SVitaly Kuznetsov BUG();
2579702785aSThomas Gleixner
258955381ddSViresh Kumar return 0;
259955381ddSViresh Kumar }
2609702785aSThomas Gleixner
xen_vcpuop_set_oneshot(struct clock_event_device * evt)261955381ddSViresh Kumar static int xen_vcpuop_set_oneshot(struct clock_event_device *evt)
262955381ddSViresh Kumar {
263955381ddSViresh Kumar int cpu = smp_processor_id();
264955381ddSViresh Kumar
265955381ddSViresh Kumar if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
266ad5475f9SVitaly Kuznetsov NULL))
267ad5475f9SVitaly Kuznetsov BUG();
268955381ddSViresh Kumar
269955381ddSViresh Kumar return 0;
270955381ddSViresh Kumar }
2719702785aSThomas Gleixner
xen_vcpuop_set_next_event(unsigned long delta,struct clock_event_device * evt)2729702785aSThomas Gleixner static int xen_vcpuop_set_next_event(unsigned long delta,
2739702785aSThomas Gleixner struct clock_event_device *evt)
2749702785aSThomas Gleixner {
2759702785aSThomas Gleixner int cpu = smp_processor_id();
2769702785aSThomas Gleixner struct vcpu_set_singleshot_timer single;
2779702785aSThomas Gleixner int ret;
2789702785aSThomas Gleixner
2799702785aSThomas Gleixner WARN_ON(!clockevent_state_oneshot(evt));
280955381ddSViresh Kumar
2819702785aSThomas Gleixner single.timeout_abs_ns = get_abs_timeout(delta);
2829702785aSThomas Gleixner /* Get an event anyway, even if the timeout is already expired */
283c06b6d70SStefano Stabellini single.flags = 0;
284c06b6d70SStefano Stabellini
2859702785aSThomas Gleixner ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(cpu),
286ad5475f9SVitaly Kuznetsov &single);
287ad5475f9SVitaly Kuznetsov BUG_ON(ret != 0);
288c06b6d70SStefano Stabellini
2899702785aSThomas Gleixner return ret;
2909702785aSThomas Gleixner }
2919702785aSThomas Gleixner
2929702785aSThomas Gleixner static struct clock_event_device xen_vcpuop_clockevent __ro_after_init = {
2932ec16bc0SRyan Thibodeaux .name = "xen",
2949702785aSThomas Gleixner .features = CLOCK_EVT_FEAT_ONESHOT,
2959702785aSThomas Gleixner
2969702785aSThomas Gleixner .max_delta_ns = 0xffffffff,
2979702785aSThomas Gleixner .max_delta_ticks = 0xffffffff,
2983d18d661SNicolai Stange .min_delta_ns = TIMER_SLOP,
2999702785aSThomas Gleixner .min_delta_ticks = TIMER_SLOP,
3003d18d661SNicolai Stange
3019702785aSThomas Gleixner .mult = 1,
3029702785aSThomas Gleixner .shift = 0,
3039702785aSThomas Gleixner .rating = 500,
3049702785aSThomas Gleixner
3059702785aSThomas Gleixner .set_state_shutdown = xen_vcpuop_shutdown,
306955381ddSViresh Kumar .set_state_oneshot = xen_vcpuop_set_oneshot,
307955381ddSViresh Kumar .set_next_event = xen_vcpuop_set_next_event,
3089702785aSThomas Gleixner };
3099702785aSThomas Gleixner
3109702785aSThomas Gleixner static const struct clock_event_device *xen_clockevent =
3119702785aSThomas Gleixner &xen_timerop_clockevent;
3129702785aSThomas Gleixner
31331620a19SKonrad Rzeszutek Wilk struct xen_clock_event_device {
31431620a19SKonrad Rzeszutek Wilk struct clock_event_device evt;
31531620a19SKonrad Rzeszutek Wilk char name[16];
3167be0772dSVitaly Kuznetsov };
31731620a19SKonrad Rzeszutek Wilk static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 };
31831620a19SKonrad Rzeszutek Wilk
xen_timer_interrupt(int irq,void * dev_id)3199702785aSThomas Gleixner static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
3209702785aSThomas Gleixner {
3219702785aSThomas Gleixner struct clock_event_device *evt = this_cpu_ptr(&xen_clock_events.evt);
32289cbc767SChristoph Lameter irqreturn_t ret;
3239702785aSThomas Gleixner
3249702785aSThomas Gleixner ret = IRQ_NONE;
3259702785aSThomas Gleixner if (evt->event_handler) {
3269702785aSThomas Gleixner evt->event_handler(evt);
3279702785aSThomas Gleixner ret = IRQ_HANDLED;
3289702785aSThomas Gleixner }
3299702785aSThomas Gleixner
3309702785aSThomas Gleixner return ret;
3319702785aSThomas Gleixner }
3329702785aSThomas Gleixner
xen_teardown_timer(int cpu)3339702785aSThomas Gleixner void xen_teardown_timer(int cpu)
33409e99da7SKonrad Rzeszutek Wilk {
33509e99da7SKonrad Rzeszutek Wilk struct clock_event_device *evt;
33609e99da7SKonrad Rzeszutek Wilk evt = &per_cpu(xen_clock_events, cpu).evt;
33709e99da7SKonrad Rzeszutek Wilk
33809e99da7SKonrad Rzeszutek Wilk if (evt->irq >= 0) {
33909e99da7SKonrad Rzeszutek Wilk unbind_from_irqhandler(evt->irq, NULL);
34009e99da7SKonrad Rzeszutek Wilk evt->irq = -1;
34109e99da7SKonrad Rzeszutek Wilk }
34209e99da7SKonrad Rzeszutek Wilk }
34309e99da7SKonrad Rzeszutek Wilk
xen_setup_timer(int cpu)34409e99da7SKonrad Rzeszutek Wilk void xen_setup_timer(int cpu)
3459702785aSThomas Gleixner {
3469702785aSThomas Gleixner struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu);
3477be0772dSVitaly Kuznetsov struct clock_event_device *evt = &xevt->evt;
3487be0772dSVitaly Kuznetsov int irq;
3499702785aSThomas Gleixner
3509702785aSThomas Gleixner WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
351ef35a4e6SKonrad Rzeszutek Wilk if (evt->irq >= 0)
35209e99da7SKonrad Rzeszutek Wilk xen_teardown_timer(cpu);
35309e99da7SKonrad Rzeszutek Wilk
354ef35a4e6SKonrad Rzeszutek Wilk printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
3559702785aSThomas Gleixner
3569702785aSThomas Gleixner snprintf(xevt->name, sizeof(xevt->name), "timer%d", cpu);
3577be0772dSVitaly Kuznetsov
3589702785aSThomas Gleixner irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
3599702785aSThomas Gleixner IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
3609d71cee6SMichael Opdenacker IRQF_FORCE_RESUME|IRQF_EARLY_RESUME,
3618d5999dfSDavid Vrabel xevt->name, NULL);
3627be0772dSVitaly Kuznetsov (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
3638785c676SDavid Vrabel
3649702785aSThomas Gleixner memcpy(evt, xen_clockevent, sizeof(*evt));
3659702785aSThomas Gleixner
3669702785aSThomas Gleixner evt->cpumask = cpumask_of(cpu);
367320ab2b0SRusty Russell evt->irq = irq;
3689702785aSThomas Gleixner }
3699702785aSThomas Gleixner
3709702785aSThomas Gleixner
xen_setup_cpu_clockevents(void)371d68d82afSAlex Nixon void xen_setup_cpu_clockevents(void)
3729702785aSThomas Gleixner {
3739702785aSThomas Gleixner clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt));
37489cbc767SChristoph Lameter }
3759702785aSThomas Gleixner
xen_timer_resume(void)3769702785aSThomas Gleixner void xen_timer_resume(void)
377d07af1f0SJeremy Fitzhardinge {
378d07af1f0SJeremy Fitzhardinge int cpu;
379d07af1f0SJeremy Fitzhardinge
380d07af1f0SJeremy Fitzhardinge if (xen_clockevent != &xen_vcpuop_clockevent)
381d07af1f0SJeremy Fitzhardinge return;
382d07af1f0SJeremy Fitzhardinge
383d07af1f0SJeremy Fitzhardinge for_each_online_cpu(cpu) {
384d07af1f0SJeremy Fitzhardinge if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer,
385ad5475f9SVitaly Kuznetsov xen_vcpu_nr(cpu), NULL))
386ad5475f9SVitaly Kuznetsov BUG();
387d07af1f0SJeremy Fitzhardinge }
388d07af1f0SJeremy Fitzhardinge }
389d07af1f0SJeremy Fitzhardinge
390d07af1f0SJeremy Fitzhardinge static struct pvclock_vsyscall_time_info *xen_clock __read_mostly;
3912229f70bSJoao Martins static u64 xen_clock_value_saved;
392867cefb4SJuergen Gross
xen_save_time_memory_area(void)3932229f70bSJoao Martins void xen_save_time_memory_area(void)
3942229f70bSJoao Martins {
3952229f70bSJoao Martins struct vcpu_register_time_memory_area t;
3962229f70bSJoao Martins int ret;
3972229f70bSJoao Martins
3982229f70bSJoao Martins xen_clock_value_saved = xen_clocksource_read() - xen_sched_clock_offset;
399867cefb4SJuergen Gross
400867cefb4SJuergen Gross if (!xen_clock)
4012229f70bSJoao Martins return;
4022229f70bSJoao Martins
4032229f70bSJoao Martins t.addr.v = NULL;
4042229f70bSJoao Martins
4052229f70bSJoao Martins ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
4062229f70bSJoao Martins if (ret != 0)
4072229f70bSJoao Martins pr_notice("Cannot save secondary vcpu_time_info (err %d)",
4082229f70bSJoao Martins ret);
4092229f70bSJoao Martins else
4102229f70bSJoao Martins clear_page(xen_clock);
4112229f70bSJoao Martins }
4122229f70bSJoao Martins
xen_restore_time_memory_area(void)4132229f70bSJoao Martins void xen_restore_time_memory_area(void)
4142229f70bSJoao Martins {
4152229f70bSJoao Martins struct vcpu_register_time_memory_area t;
4162229f70bSJoao Martins int ret;
4172229f70bSJoao Martins
4182229f70bSJoao Martins if (!xen_clock)
4192229f70bSJoao Martins goto out;
420867cefb4SJuergen Gross
4212229f70bSJoao Martins t.addr.v = &xen_clock->pvti;
4222229f70bSJoao Martins
4232229f70bSJoao Martins ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
4242229f70bSJoao Martins
4252229f70bSJoao Martins /*
4262229f70bSJoao Martins * We don't disable VDSO_CLOCKMODE_PVCLOCK entirely if it fails to
427b95a8a27SThomas Gleixner * register the secondary time info with Xen or if we migrated to a
428b95a8a27SThomas Gleixner * host without the necessary flags. On both of these cases what
429b95a8a27SThomas Gleixner * happens is either process seeing a zeroed out pvti or seeing no
430b95a8a27SThomas Gleixner * PVCLOCK_TSC_STABLE_BIT bit set. Userspace checks the latter and
431b95a8a27SThomas Gleixner * if 0, it discards the data in pvti and fallbacks to a system
432b95a8a27SThomas Gleixner * call for a reliable timestamp.
433b95a8a27SThomas Gleixner */
4342229f70bSJoao Martins if (ret != 0)
4352229f70bSJoao Martins pr_notice("Cannot restore secondary vcpu_time_info (err %d)",
4362229f70bSJoao Martins ret);
4372229f70bSJoao Martins
438867cefb4SJuergen Gross out:
439867cefb4SJuergen Gross /* Need pvclock_resume() before using xen_clocksource_read(). */
440867cefb4SJuergen Gross pvclock_resume();
441867cefb4SJuergen Gross xen_sched_clock_offset = xen_clocksource_read() - xen_clock_value_saved;
442867cefb4SJuergen Gross }
4432229f70bSJoao Martins
xen_setup_vsyscall_time_info(void)4442229f70bSJoao Martins static void xen_setup_vsyscall_time_info(void)
4452229f70bSJoao Martins {
4462229f70bSJoao Martins struct vcpu_register_time_memory_area t;
4472229f70bSJoao Martins struct pvclock_vsyscall_time_info *ti;
4482229f70bSJoao Martins int ret;
4492229f70bSJoao Martins
4502229f70bSJoao Martins ti = (struct pvclock_vsyscall_time_info *)get_zeroed_page(GFP_KERNEL);
4512229f70bSJoao Martins if (!ti)
4522229f70bSJoao Martins return;
4532229f70bSJoao Martins
4542229f70bSJoao Martins t.addr.v = &ti->pvti;
4552229f70bSJoao Martins
4562229f70bSJoao Martins ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
4572229f70bSJoao Martins if (ret) {
4582229f70bSJoao Martins pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (err %d)\n", ret);
459b95a8a27SThomas Gleixner free_page((unsigned long)ti);
4602229f70bSJoao Martins return;
4612229f70bSJoao Martins }
4622229f70bSJoao Martins
4632229f70bSJoao Martins /*
4642229f70bSJoao Martins * If primary time info had this bit set, secondary should too since
4652229f70bSJoao Martins * it's the same data on both just different memory regions. But we
4662229f70bSJoao Martins * still check it in case hypervisor is buggy.
4672229f70bSJoao Martins */
4682229f70bSJoao Martins if (!(ti->pvti.flags & PVCLOCK_TSC_STABLE_BIT)) {
4692229f70bSJoao Martins t.addr.v = NULL;
4702229f70bSJoao Martins ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area,
4712229f70bSJoao Martins 0, &t);
4722229f70bSJoao Martins if (!ret)
4732229f70bSJoao Martins free_page((unsigned long)ti);
4742229f70bSJoao Martins
4752229f70bSJoao Martins pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (tsc unstable)\n");
476b95a8a27SThomas Gleixner return;
4772229f70bSJoao Martins }
4782229f70bSJoao Martins
4792229f70bSJoao Martins xen_clock = ti;
4802229f70bSJoao Martins pvclock_set_pvti_cpu0_va(xen_clock);
4812229f70bSJoao Martins
4822229f70bSJoao Martins xen_clocksource.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
483b95a8a27SThomas Gleixner }
4842229f70bSJoao Martins
4852229f70bSJoao Martins /*
486caea091eSKrister Johansen * Check if it is possible to safely use the tsc as a clocksource. This is
487caea091eSKrister Johansen * only true if the hypervisor notifies the guest that its tsc is invariant,
488caea091eSKrister Johansen * the tsc is stable, and the tsc instruction will never be emulated.
489caea091eSKrister Johansen */
xen_tsc_safe_clocksource(void)490caea091eSKrister Johansen static int __init xen_tsc_safe_clocksource(void)
491caea091eSKrister Johansen {
492caea091eSKrister Johansen u32 eax, ebx, ecx, edx;
493caea091eSKrister Johansen
494caea091eSKrister Johansen if (!(boot_cpu_has(X86_FEATURE_CONSTANT_TSC)))
495caea091eSKrister Johansen return 0;
496caea091eSKrister Johansen
497caea091eSKrister Johansen if (!(boot_cpu_has(X86_FEATURE_NONSTOP_TSC)))
498caea091eSKrister Johansen return 0;
499caea091eSKrister Johansen
500caea091eSKrister Johansen if (check_tsc_unstable())
501caea091eSKrister Johansen return 0;
502caea091eSKrister Johansen
503caea091eSKrister Johansen /* Leaf 4, sub-leaf 0 (0x40000x03) */
504caea091eSKrister Johansen cpuid_count(xen_cpuid_base() + 3, 0, &eax, &ebx, &ecx, &edx);
505caea091eSKrister Johansen
506caea091eSKrister Johansen return ebx == XEN_CPUID_TSC_MODE_NEVER_EMULATE;
507*99a7bcafSKrister Johansen }
508caea091eSKrister Johansen
xen_time_init(void)509caea091eSKrister Johansen static void __init xen_time_init(void)
510fb6ce5deSDaniel Kiper {
5119702785aSThomas Gleixner struct pvclock_vcpu_time_info *pvti;
512b8888080SJoao Martins int cpu = smp_processor_id();
5139702785aSThomas Gleixner struct timespec64 tp;
514e27c4929SArnd Bergmann
5159702785aSThomas Gleixner /*
516caea091eSKrister Johansen * As Dom0 is never moved, no penalty on using TSC there.
517caea091eSKrister Johansen *
518caea091eSKrister Johansen * If it is possible for the guest to determine that the tsc is a safe
519caea091eSKrister Johansen * clocksource, then set xen_clocksource rating below that of the tsc
520caea091eSKrister Johansen * so that the system prefers tsc instead.
521caea091eSKrister Johansen */
522caea091eSKrister Johansen if (xen_initial_domain())
52394dd85f6SPalik, Imre xen_clocksource.rating = 275;
52494dd85f6SPalik, Imre else if (xen_tsc_safe_clocksource())
525caea091eSKrister Johansen xen_clocksource.rating = 299;
526caea091eSKrister Johansen
52794dd85f6SPalik, Imre clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
528b01cc1b0SJohn Stultz
5299702785aSThomas Gleixner if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
530ad5475f9SVitaly Kuznetsov NULL) == 0) {
531ad5475f9SVitaly Kuznetsov /* Successfully turned off 100Hz tick, so we have the
5329702785aSThomas Gleixner vcpuop-based timer interface */
5339702785aSThomas Gleixner printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
5349702785aSThomas Gleixner xen_clockevent = &xen_vcpuop_clockevent;
5359702785aSThomas Gleixner }
5369702785aSThomas Gleixner
5379702785aSThomas Gleixner /* Set initial system time with full resolution */
5389702785aSThomas Gleixner xen_read_wallclock(&tp);
539c4507257SJohn Stultz do_settimeofday64(&tp);
540e27c4929SArnd Bergmann
5419702785aSThomas Gleixner setup_force_cpu_cap(X86_FEATURE_TSC);
542404ee5b1SAndi Kleen
5439702785aSThomas Gleixner /*
544b8888080SJoao Martins * We check ahead on the primary time info if this
545b8888080SJoao Martins * bit is supported hence speeding up Xen clocksource.
546b8888080SJoao Martins */
547b8888080SJoao Martins pvti = &__this_cpu_read(xen_vcpu)->time;
548b8888080SJoao Martins if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) {
5492229f70bSJoao Martins pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
550b8888080SJoao Martins xen_setup_vsyscall_time_info();
5512229f70bSJoao Martins }
5522229f70bSJoao Martins
553b8888080SJoao Martins xen_setup_runstate_info(cpu);
554be012920SIan Campbell xen_setup_timer(cpu);
5559702785aSThomas Gleixner xen_setup_cpu_clockevents();
5569702785aSThomas Gleixner
5575584880eSDavid Vrabel xen_time_setup_guest();
558ecb23dc6SJuergen Gross
559ecb23dc6SJuergen Gross if (xen_initial_domain())
5605584880eSDavid Vrabel pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
5615584880eSDavid Vrabel }
5629702785aSThomas Gleixner
xen_init_time_common(void)563409771d2SStefano Stabellini static void __init xen_init_time_common(void)
564a0e2bf7cSJuergen Gross {
565409771d2SStefano Stabellini xen_sched_clock_offset = xen_clocksource_read();
56638669ba2SPavel Tatashin static_call_update(pv_steal_clock, xen_steal_clock);
567a0e2bf7cSJuergen Gross paravirt_set_sched_clock(xen_sched_clock);
568a0e2bf7cSJuergen Gross
569a0e2bf7cSJuergen Gross x86_platform.calibrate_tsc = xen_tsc_khz;
570a0e2bf7cSJuergen Gross x86_platform.get_wallclock = xen_get_wallclock;
571a0e2bf7cSJuergen Gross }
572a0e2bf7cSJuergen Gross
xen_init_time_ops(void)573a0e2bf7cSJuergen Gross void __init xen_init_time_ops(void)
574a0e2bf7cSJuergen Gross {
575a0e2bf7cSJuergen Gross xen_init_time_common();
576a0e2bf7cSJuergen Gross
577409771d2SStefano Stabellini x86_init.timers.timer_init = xen_time_init;
578409771d2SStefano Stabellini x86_init.timers.setup_percpu_clockev = x86_init_noop;
579409771d2SStefano Stabellini x86_cpuinit.setup_percpu_clockev = x86_init_noop;
580409771d2SStefano Stabellini
581409771d2SStefano Stabellini /* Dom0 uses the native method to set the hardware RTC. */
58247433b8cSDavid Vrabel if (!xen_initial_domain())
58347433b8cSDavid Vrabel x86_platform.set_wallclock = xen_set_wallclock;
584409771d2SStefano Stabellini }
585409771d2SStefano Stabellini
586409771d2SStefano Stabellini #ifdef CONFIG_XEN_PVHVM
xen_hvm_setup_cpu_clockevents(void)587ca65f9fcSStefano Stabellini static void xen_hvm_setup_cpu_clockevents(void)
588409771d2SStefano Stabellini {
589409771d2SStefano Stabellini int cpu = smp_processor_id();
590409771d2SStefano Stabellini xen_setup_runstate_info(cpu);
591409771d2SStefano Stabellini /*
5927918c92aSKonrad Rzeszutek Wilk * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence
5937918c92aSKonrad Rzeszutek Wilk * doing it xen_hvm_cpu_notify (which gets called by smp_init during
5947918c92aSKonrad Rzeszutek Wilk * early bootup and also during CPU hotplug events).
5957918c92aSKonrad Rzeszutek Wilk */
5967918c92aSKonrad Rzeszutek Wilk xen_setup_cpu_clockevents();
597409771d2SStefano Stabellini }
598409771d2SStefano Stabellini
xen_hvm_init_time_ops(void)599409771d2SStefano Stabellini void __init xen_hvm_init_time_ops(void)
600fb6ce5deSDaniel Kiper {
601409771d2SStefano Stabellini static bool hvm_time_initialized;
602eed05744SDongli Zhang
603eed05744SDongli Zhang if (hvm_time_initialized)
604eed05744SDongli Zhang return;
605eed05744SDongli Zhang
606eed05744SDongli Zhang /*
60784d582d2SBoris Ostrovsky * vector callback is needed otherwise we cannot receive interrupts
60884d582d2SBoris Ostrovsky * on cpu > 0 and at this point we don't know how many cpus are
60984d582d2SBoris Ostrovsky * available.
61084d582d2SBoris Ostrovsky */
61184d582d2SBoris Ostrovsky if (!xen_have_vector_callback)
61284d582d2SBoris Ostrovsky return;
61384d582d2SBoris Ostrovsky
61484d582d2SBoris Ostrovsky if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
615409771d2SStefano Stabellini pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer");
616eed05744SDongli Zhang return;
617eed05744SDongli Zhang }
618eed05744SDongli Zhang
619eed05744SDongli Zhang /*
620eed05744SDongli Zhang * Only MAX_VIRT_CPUS 'vcpu_info' are embedded inside 'shared_info'.
621eed05744SDongli Zhang * The __this_cpu_read(xen_vcpu) is still NULL when Xen HVM guest
622eed05744SDongli Zhang * boots on vcpu >= MAX_VIRT_CPUS (e.g., kexec), To access
623eed05744SDongli Zhang * __this_cpu_read(xen_vcpu) via xen_clocksource_read() will panic.
624eed05744SDongli Zhang *
625eed05744SDongli Zhang * The xen_hvm_init_time_ops() should be called again later after
626eed05744SDongli Zhang * __this_cpu_read(xen_vcpu) is available.
627eed05744SDongli Zhang */
628eed05744SDongli Zhang if (!__this_cpu_read(xen_vcpu)) {
629eed05744SDongli Zhang pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n",
630eed05744SDongli Zhang xen_vcpu_nr(0));
631eed05744SDongli Zhang return;
632409771d2SStefano Stabellini }
633409771d2SStefano Stabellini
634409771d2SStefano Stabellini xen_init_time_common();
635a0e2bf7cSJuergen Gross
636a0e2bf7cSJuergen Gross x86_init.timers.setup_percpu_clockev = xen_time_init;
637409771d2SStefano Stabellini x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
638409771d2SStefano Stabellini
639409771d2SStefano Stabellini x86_platform.set_wallclock = xen_set_wallclock;
640409771d2SStefano Stabellini
641eed05744SDongli Zhang hvm_time_initialized = true;
642eed05744SDongli Zhang }
643409771d2SStefano Stabellini #endif
644ca65f9fcSStefano Stabellini
6452ec16bc0SRyan Thibodeaux /* Kernel parameter to specify Xen timer slop */
parse_xen_timer_slop(char * ptr)6462ec16bc0SRyan Thibodeaux static int __init parse_xen_timer_slop(char *ptr)
6472ec16bc0SRyan Thibodeaux {
6482ec16bc0SRyan Thibodeaux unsigned long slop = memparse(ptr, NULL);
6492ec16bc0SRyan Thibodeaux
6502ec16bc0SRyan Thibodeaux xen_timerop_clockevent.min_delta_ns = slop;
6512ec16bc0SRyan Thibodeaux xen_timerop_clockevent.min_delta_ticks = slop;
6522ec16bc0SRyan Thibodeaux xen_vcpuop_clockevent.min_delta_ns = slop;
6532ec16bc0SRyan Thibodeaux xen_vcpuop_clockevent.min_delta_ticks = slop;
6542ec16bc0SRyan Thibodeaux
6552ec16bc0SRyan Thibodeaux return 0;
6562ec16bc0SRyan Thibodeaux }
6572ec16bc0SRyan Thibodeaux early_param("xen_timer_slop", parse_xen_timer_slop);
6582ec16bc0SRyan Thibodeaux