1c767a54bSJoe Perches #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 2c767a54bSJoe Perches 3bfc0f594SAlok Kataria #include <linux/kernel.h> 40ef95533SAlok Kataria #include <linux/sched.h> 5e6017571SIngo Molnar #include <linux/sched/clock.h> 60ef95533SAlok Kataria #include <linux/init.h> 7186f4360SPaul Gortmaker #include <linux/export.h> 80ef95533SAlok Kataria #include <linux/timer.h> 9bfc0f594SAlok Kataria #include <linux/acpi_pmtmr.h> 102dbe06faSAlok Kataria #include <linux/cpufreq.h> 118fbbc4b4SAlok Kataria #include <linux/delay.h> 128fbbc4b4SAlok Kataria #include <linux/clocksource.h> 138fbbc4b4SAlok Kataria #include <linux/percpu.h> 1408604bd9SArnd Bergmann #include <linux/timex.h> 1510b033d4SPeter Zijlstra #include <linux/static_key.h> 16bfc0f594SAlok Kataria 17bfc0f594SAlok Kataria #include <asm/hpet.h> 188fbbc4b4SAlok Kataria #include <asm/timer.h> 198fbbc4b4SAlok Kataria #include <asm/vgtod.h> 208fbbc4b4SAlok Kataria #include <asm/time.h> 218fbbc4b4SAlok Kataria #include <asm/delay.h> 2288b094fbSAlok Kataria #include <asm/hypervisor.h> 2308047c4fSThomas Gleixner #include <asm/nmi.h> 242d826404SThomas Gleixner #include <asm/x86_init.h> 2503da3ff1SDavid Woodhouse #include <asm/geode.h> 266731b0d6SNicolai Stange #include <asm/apic.h> 27655e52d2SPrarit Bhargava #include <asm/intel-family.h> 280ef95533SAlok Kataria 29f24ade3aSIngo Molnar unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ 300ef95533SAlok Kataria EXPORT_SYMBOL(cpu_khz); 31f24ade3aSIngo Molnar 32f24ade3aSIngo Molnar unsigned int __read_mostly tsc_khz; 330ef95533SAlok Kataria EXPORT_SYMBOL(tsc_khz); 340ef95533SAlok Kataria 350ef95533SAlok Kataria /* 360ef95533SAlok Kataria * TSC can be unstable due to cpufreq or due to unsynced TSCs 370ef95533SAlok Kataria */ 38f24ade3aSIngo Molnar static int __read_mostly tsc_unstable; 390ef95533SAlok Kataria 400ef95533SAlok Kataria /* native_sched_clock() is called before tsc_init(), so 410ef95533SAlok Kataria we must start with the TSC soft disabled to prevent 4259e21e3dSBorislav Petkov erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */ 43f24ade3aSIngo Molnar static int __read_mostly tsc_disabled = -1; 440ef95533SAlok Kataria 453bbfafb7SPeter Zijlstra static DEFINE_STATIC_KEY_FALSE(__use_tsc); 4610b033d4SPeter Zijlstra 4728a00184SSuresh Siddha int tsc_clocksource_reliable; 4857c67da2SPeter Zijlstra 49f9677e0fSChristopher S. Hall static u32 art_to_tsc_numerator; 50f9677e0fSChristopher S. Hall static u32 art_to_tsc_denominator; 51f9677e0fSChristopher S. Hall static u64 art_to_tsc_offset; 52f9677e0fSChristopher S. Hall struct clocksource *art_related_clocksource; 53f9677e0fSChristopher S. Hall 5420d1c86aSPeter Zijlstra struct cyc2ns { 5559eaef78SPeter Zijlstra struct cyc2ns_data data[2]; /* 0 + 2*16 = 32 */ 5659eaef78SPeter Zijlstra seqcount_t seq; /* 32 + 4 = 36 */ 5759eaef78SPeter Zijlstra 5859eaef78SPeter Zijlstra }; /* fits one cacheline */ 5920d1c86aSPeter Zijlstra 6020d1c86aSPeter Zijlstra static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); 6120d1c86aSPeter Zijlstra 6259eaef78SPeter Zijlstra void cyc2ns_read_begin(struct cyc2ns_data *data) 6320d1c86aSPeter Zijlstra { 6459eaef78SPeter Zijlstra int seq, idx; 6520d1c86aSPeter Zijlstra 6659eaef78SPeter Zijlstra preempt_disable_notrace(); 6720d1c86aSPeter Zijlstra 6859eaef78SPeter Zijlstra do { 6959eaef78SPeter Zijlstra seq = this_cpu_read(cyc2ns.seq.sequence); 7059eaef78SPeter Zijlstra idx = seq & 1; 7120d1c86aSPeter Zijlstra 7259eaef78SPeter Zijlstra data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset); 7359eaef78SPeter Zijlstra data->cyc2ns_mul = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul); 7459eaef78SPeter Zijlstra data->cyc2ns_shift = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift); 7559eaef78SPeter Zijlstra 7659eaef78SPeter Zijlstra } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence))); 7720d1c86aSPeter Zijlstra } 7820d1c86aSPeter Zijlstra 7959eaef78SPeter Zijlstra void cyc2ns_read_end(void) 8020d1c86aSPeter Zijlstra { 8159eaef78SPeter Zijlstra preempt_enable_notrace(); 8220d1c86aSPeter Zijlstra } 8320d1c86aSPeter Zijlstra 8420d1c86aSPeter Zijlstra /* 8520d1c86aSPeter Zijlstra * Accelerators for sched_clock() 8657c67da2SPeter Zijlstra * convert from cycles(64bits) => nanoseconds (64bits) 8757c67da2SPeter Zijlstra * basic equation: 8857c67da2SPeter Zijlstra * ns = cycles / (freq / ns_per_sec) 8957c67da2SPeter Zijlstra * ns = cycles * (ns_per_sec / freq) 9057c67da2SPeter Zijlstra * ns = cycles * (10^9 / (cpu_khz * 10^3)) 9157c67da2SPeter Zijlstra * ns = cycles * (10^6 / cpu_khz) 9257c67da2SPeter Zijlstra * 9357c67da2SPeter Zijlstra * Then we use scaling math (suggested by george@mvista.com) to get: 9457c67da2SPeter Zijlstra * ns = cycles * (10^6 * SC / cpu_khz) / SC 9557c67da2SPeter Zijlstra * ns = cycles * cyc2ns_scale / SC 9657c67da2SPeter Zijlstra * 9757c67da2SPeter Zijlstra * And since SC is a constant power of two, we can convert the div 98b20112edSAdrian Hunter * into a shift. The larger SC is, the more accurate the conversion, but 99b20112edSAdrian Hunter * cyc2ns_scale needs to be a 32-bit value so that 32-bit multiplication 100b20112edSAdrian Hunter * (64-bit result) can be used. 10157c67da2SPeter Zijlstra * 102b20112edSAdrian Hunter * We can use khz divisor instead of mhz to keep a better precision. 10357c67da2SPeter Zijlstra * (mathieu.desnoyers@polymtl.ca) 10457c67da2SPeter Zijlstra * 10557c67da2SPeter Zijlstra * -johnstul@us.ibm.com "math is hard, lets go shopping!" 10657c67da2SPeter Zijlstra */ 10757c67da2SPeter Zijlstra 10820d1c86aSPeter Zijlstra static void cyc2ns_data_init(struct cyc2ns_data *data) 10920d1c86aSPeter Zijlstra { 1105e3c1afdSPeter Zijlstra data->cyc2ns_mul = 0; 111b20112edSAdrian Hunter data->cyc2ns_shift = 0; 11220d1c86aSPeter Zijlstra data->cyc2ns_offset = 0; 11320d1c86aSPeter Zijlstra } 11420d1c86aSPeter Zijlstra 11520d1c86aSPeter Zijlstra static void cyc2ns_init(int cpu) 11620d1c86aSPeter Zijlstra { 11720d1c86aSPeter Zijlstra struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); 11820d1c86aSPeter Zijlstra 11920d1c86aSPeter Zijlstra cyc2ns_data_init(&c2n->data[0]); 12020d1c86aSPeter Zijlstra cyc2ns_data_init(&c2n->data[1]); 12120d1c86aSPeter Zijlstra 12259eaef78SPeter Zijlstra seqcount_init(&c2n->seq); 12320d1c86aSPeter Zijlstra } 12420d1c86aSPeter Zijlstra 12557c67da2SPeter Zijlstra static inline unsigned long long cycles_2_ns(unsigned long long cyc) 12657c67da2SPeter Zijlstra { 12759eaef78SPeter Zijlstra struct cyc2ns_data data; 12820d1c86aSPeter Zijlstra unsigned long long ns; 12920d1c86aSPeter Zijlstra 13059eaef78SPeter Zijlstra cyc2ns_read_begin(&data); 13120d1c86aSPeter Zijlstra 13259eaef78SPeter Zijlstra ns = data.cyc2ns_offset; 13359eaef78SPeter Zijlstra ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift); 13420d1c86aSPeter Zijlstra 13559eaef78SPeter Zijlstra cyc2ns_read_end(); 13620d1c86aSPeter Zijlstra 13757c67da2SPeter Zijlstra return ns; 13857c67da2SPeter Zijlstra } 13957c67da2SPeter Zijlstra 140615cd033SPeter Zijlstra static void __set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now) 14157c67da2SPeter Zijlstra { 142615cd033SPeter Zijlstra unsigned long long ns_now; 14359eaef78SPeter Zijlstra struct cyc2ns_data data; 14459eaef78SPeter Zijlstra struct cyc2ns *c2n; 14520d1c86aSPeter Zijlstra unsigned long flags; 14657c67da2SPeter Zijlstra 14757c67da2SPeter Zijlstra local_irq_save(flags); 14857c67da2SPeter Zijlstra sched_clock_idle_sleep_event(); 14957c67da2SPeter Zijlstra 150aa297292SLen Brown if (!khz) 15120d1c86aSPeter Zijlstra goto done; 15220d1c86aSPeter Zijlstra 15357c67da2SPeter Zijlstra ns_now = cycles_2_ns(tsc_now); 15457c67da2SPeter Zijlstra 15520d1c86aSPeter Zijlstra /* 15620d1c86aSPeter Zijlstra * Compute a new multiplier as per the above comment and ensure our 15720d1c86aSPeter Zijlstra * time function is continuous; see the comment near struct 15820d1c86aSPeter Zijlstra * cyc2ns_data. 15920d1c86aSPeter Zijlstra */ 16059eaef78SPeter Zijlstra clocks_calc_mult_shift(&data.cyc2ns_mul, &data.cyc2ns_shift, khz, 161b20112edSAdrian Hunter NSEC_PER_MSEC, 0); 162b20112edSAdrian Hunter 163b9511cd7SAdrian Hunter /* 164b9511cd7SAdrian Hunter * cyc2ns_shift is exported via arch_perf_update_userpage() where it is 165b9511cd7SAdrian Hunter * not expected to be greater than 31 due to the original published 166b9511cd7SAdrian Hunter * conversion algorithm shifting a 32-bit value (now specifies a 64-bit 167b9511cd7SAdrian Hunter * value) - refer perf_event_mmap_page documentation in perf_event.h. 168b9511cd7SAdrian Hunter */ 16959eaef78SPeter Zijlstra if (data.cyc2ns_shift == 32) { 17059eaef78SPeter Zijlstra data.cyc2ns_shift = 31; 17159eaef78SPeter Zijlstra data.cyc2ns_mul >>= 1; 172b9511cd7SAdrian Hunter } 173b9511cd7SAdrian Hunter 17459eaef78SPeter Zijlstra data.cyc2ns_offset = ns_now - 17559eaef78SPeter Zijlstra mul_u64_u32_shr(tsc_now, data.cyc2ns_mul, data.cyc2ns_shift); 17657c67da2SPeter Zijlstra 17759eaef78SPeter Zijlstra c2n = per_cpu_ptr(&cyc2ns, cpu); 17859eaef78SPeter Zijlstra 17959eaef78SPeter Zijlstra raw_write_seqcount_latch(&c2n->seq); 18059eaef78SPeter Zijlstra c2n->data[0] = data; 18159eaef78SPeter Zijlstra raw_write_seqcount_latch(&c2n->seq); 18259eaef78SPeter Zijlstra c2n->data[1] = data; 18320d1c86aSPeter Zijlstra 18420d1c86aSPeter Zijlstra done: 18557c67da2SPeter Zijlstra sched_clock_idle_wakeup_event(0); 18657c67da2SPeter Zijlstra local_irq_restore(flags); 18757c67da2SPeter Zijlstra } 188615cd033SPeter Zijlstra 189615cd033SPeter Zijlstra static void set_cyc2ns_scale(unsigned long khz, int cpu) 190615cd033SPeter Zijlstra { 191615cd033SPeter Zijlstra __set_cyc2ns_scale(khz, cpu, rdtsc()); 192615cd033SPeter Zijlstra } 193615cd033SPeter Zijlstra 1940ef95533SAlok Kataria /* 1950ef95533SAlok Kataria * Scheduler clock - returns current time in nanosec units. 1960ef95533SAlok Kataria */ 1970ef95533SAlok Kataria u64 native_sched_clock(void) 1980ef95533SAlok Kataria { 1993bbfafb7SPeter Zijlstra if (static_branch_likely(&__use_tsc)) { 2003bbfafb7SPeter Zijlstra u64 tsc_now = rdtsc(); 2013bbfafb7SPeter Zijlstra 2023bbfafb7SPeter Zijlstra /* return the value in ns */ 2033bbfafb7SPeter Zijlstra return cycles_2_ns(tsc_now); 2043bbfafb7SPeter Zijlstra } 2050ef95533SAlok Kataria 2060ef95533SAlok Kataria /* 2070ef95533SAlok Kataria * Fall back to jiffies if there's no TSC available: 2080ef95533SAlok Kataria * ( But note that we still use it if the TSC is marked 2090ef95533SAlok Kataria * unstable. We do this because unlike Time Of Day, 2100ef95533SAlok Kataria * the scheduler clock tolerates small errors and it's 2110ef95533SAlok Kataria * very important for it to be as fast as the platform 2123ad2f3fbSDaniel Mack * can achieve it. ) 2130ef95533SAlok Kataria */ 2143bbfafb7SPeter Zijlstra 2150ef95533SAlok Kataria /* No locking but a rare wrong value is not a big deal: */ 2160ef95533SAlok Kataria return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); 2170ef95533SAlok Kataria } 2180ef95533SAlok Kataria 219a94cab23SAndi Kleen /* 220a94cab23SAndi Kleen * Generate a sched_clock if you already have a TSC value. 221a94cab23SAndi Kleen */ 222a94cab23SAndi Kleen u64 native_sched_clock_from_tsc(u64 tsc) 223a94cab23SAndi Kleen { 224a94cab23SAndi Kleen return cycles_2_ns(tsc); 225a94cab23SAndi Kleen } 226a94cab23SAndi Kleen 2270ef95533SAlok Kataria /* We need to define a real function for sched_clock, to override the 2280ef95533SAlok Kataria weak default version */ 2290ef95533SAlok Kataria #ifdef CONFIG_PARAVIRT 2300ef95533SAlok Kataria unsigned long long sched_clock(void) 2310ef95533SAlok Kataria { 2320ef95533SAlok Kataria return paravirt_sched_clock(); 2330ef95533SAlok Kataria } 234f94c8d11SPeter Zijlstra 235698eff63SPeter Zijlstra bool using_native_sched_clock(void) 236f94c8d11SPeter Zijlstra { 237f94c8d11SPeter Zijlstra return pv_time_ops.sched_clock == native_sched_clock; 238f94c8d11SPeter Zijlstra } 2390ef95533SAlok Kataria #else 2400ef95533SAlok Kataria unsigned long long 2410ef95533SAlok Kataria sched_clock(void) __attribute__((alias("native_sched_clock"))); 242f94c8d11SPeter Zijlstra 243698eff63SPeter Zijlstra bool using_native_sched_clock(void) { return true; } 2440ef95533SAlok Kataria #endif 2450ef95533SAlok Kataria 2460ef95533SAlok Kataria int check_tsc_unstable(void) 2470ef95533SAlok Kataria { 2480ef95533SAlok Kataria return tsc_unstable; 2490ef95533SAlok Kataria } 2500ef95533SAlok Kataria EXPORT_SYMBOL_GPL(check_tsc_unstable); 2510ef95533SAlok Kataria 2520ef95533SAlok Kataria #ifdef CONFIG_X86_TSC 2530ef95533SAlok Kataria int __init notsc_setup(char *str) 2540ef95533SAlok Kataria { 255c767a54bSJoe Perches pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n"); 2560ef95533SAlok Kataria tsc_disabled = 1; 2570ef95533SAlok Kataria return 1; 2580ef95533SAlok Kataria } 2590ef95533SAlok Kataria #else 2600ef95533SAlok Kataria /* 2610ef95533SAlok Kataria * disable flag for tsc. Takes effect by clearing the TSC cpu flag 2620ef95533SAlok Kataria * in cpu/common.c 2630ef95533SAlok Kataria */ 2640ef95533SAlok Kataria int __init notsc_setup(char *str) 2650ef95533SAlok Kataria { 2660ef95533SAlok Kataria setup_clear_cpu_cap(X86_FEATURE_TSC); 2670ef95533SAlok Kataria return 1; 2680ef95533SAlok Kataria } 2690ef95533SAlok Kataria #endif 2700ef95533SAlok Kataria 2710ef95533SAlok Kataria __setup("notsc", notsc_setup); 272bfc0f594SAlok Kataria 273e82b8e4eSVenkatesh Pallipadi static int no_sched_irq_time; 274e82b8e4eSVenkatesh Pallipadi 275395628efSAlok Kataria static int __init tsc_setup(char *str) 276395628efSAlok Kataria { 277395628efSAlok Kataria if (!strcmp(str, "reliable")) 278395628efSAlok Kataria tsc_clocksource_reliable = 1; 279e82b8e4eSVenkatesh Pallipadi if (!strncmp(str, "noirqtime", 9)) 280e82b8e4eSVenkatesh Pallipadi no_sched_irq_time = 1; 2818309f86cSPeter Zijlstra if (!strcmp(str, "unstable")) 2828309f86cSPeter Zijlstra mark_tsc_unstable("boot parameter"); 283395628efSAlok Kataria return 1; 284395628efSAlok Kataria } 285395628efSAlok Kataria 286395628efSAlok Kataria __setup("tsc=", tsc_setup); 287395628efSAlok Kataria 288bfc0f594SAlok Kataria #define MAX_RETRIES 5 289bfc0f594SAlok Kataria #define SMI_TRESHOLD 50000 290bfc0f594SAlok Kataria 291bfc0f594SAlok Kataria /* 292bfc0f594SAlok Kataria * Read TSC and the reference counters. Take care of SMI disturbance 293bfc0f594SAlok Kataria */ 294827014beSThomas Gleixner static u64 tsc_read_refs(u64 *p, int hpet) 295bfc0f594SAlok Kataria { 296bfc0f594SAlok Kataria u64 t1, t2; 297bfc0f594SAlok Kataria int i; 298bfc0f594SAlok Kataria 299bfc0f594SAlok Kataria for (i = 0; i < MAX_RETRIES; i++) { 300bfc0f594SAlok Kataria t1 = get_cycles(); 301bfc0f594SAlok Kataria if (hpet) 302827014beSThomas Gleixner *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; 303bfc0f594SAlok Kataria else 304827014beSThomas Gleixner *p = acpi_pm_read_early(); 305bfc0f594SAlok Kataria t2 = get_cycles(); 306bfc0f594SAlok Kataria if ((t2 - t1) < SMI_TRESHOLD) 307bfc0f594SAlok Kataria return t2; 308bfc0f594SAlok Kataria } 309bfc0f594SAlok Kataria return ULLONG_MAX; 310bfc0f594SAlok Kataria } 311bfc0f594SAlok Kataria 312ec0c15afSLinus Torvalds /* 313d683ef7aSThomas Gleixner * Calculate the TSC frequency from HPET reference 314d683ef7aSThomas Gleixner */ 315d683ef7aSThomas Gleixner static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) 316d683ef7aSThomas Gleixner { 317d683ef7aSThomas Gleixner u64 tmp; 318d683ef7aSThomas Gleixner 319d683ef7aSThomas Gleixner if (hpet2 < hpet1) 320d683ef7aSThomas Gleixner hpet2 += 0x100000000ULL; 321d683ef7aSThomas Gleixner hpet2 -= hpet1; 322d683ef7aSThomas Gleixner tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); 323d683ef7aSThomas Gleixner do_div(tmp, 1000000); 324d683ef7aSThomas Gleixner do_div(deltatsc, tmp); 325d683ef7aSThomas Gleixner 326d683ef7aSThomas Gleixner return (unsigned long) deltatsc; 327d683ef7aSThomas Gleixner } 328d683ef7aSThomas Gleixner 329d683ef7aSThomas Gleixner /* 330d683ef7aSThomas Gleixner * Calculate the TSC frequency from PMTimer reference 331d683ef7aSThomas Gleixner */ 332d683ef7aSThomas Gleixner static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) 333d683ef7aSThomas Gleixner { 334d683ef7aSThomas Gleixner u64 tmp; 335d683ef7aSThomas Gleixner 336d683ef7aSThomas Gleixner if (!pm1 && !pm2) 337d683ef7aSThomas Gleixner return ULONG_MAX; 338d683ef7aSThomas Gleixner 339d683ef7aSThomas Gleixner if (pm2 < pm1) 340d683ef7aSThomas Gleixner pm2 += (u64)ACPI_PM_OVRRUN; 341d683ef7aSThomas Gleixner pm2 -= pm1; 342d683ef7aSThomas Gleixner tmp = pm2 * 1000000000LL; 343d683ef7aSThomas Gleixner do_div(tmp, PMTMR_TICKS_PER_SEC); 344d683ef7aSThomas Gleixner do_div(deltatsc, tmp); 345d683ef7aSThomas Gleixner 346d683ef7aSThomas Gleixner return (unsigned long) deltatsc; 347d683ef7aSThomas Gleixner } 348d683ef7aSThomas Gleixner 349a977c400SThomas Gleixner #define CAL_MS 10 350b7743970SDeepak Saxena #define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS)) 351a977c400SThomas Gleixner #define CAL_PIT_LOOPS 1000 352a977c400SThomas Gleixner 353a977c400SThomas Gleixner #define CAL2_MS 50 354b7743970SDeepak Saxena #define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS)) 355a977c400SThomas Gleixner #define CAL2_PIT_LOOPS 5000 356a977c400SThomas Gleixner 357cce3e057SThomas Gleixner 358ec0c15afSLinus Torvalds /* 359ec0c15afSLinus Torvalds * Try to calibrate the TSC against the Programmable 360ec0c15afSLinus Torvalds * Interrupt Timer and return the frequency of the TSC 361ec0c15afSLinus Torvalds * in kHz. 362ec0c15afSLinus Torvalds * 363ec0c15afSLinus Torvalds * Return ULONG_MAX on failure to calibrate. 364ec0c15afSLinus Torvalds */ 365a977c400SThomas Gleixner static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) 366ec0c15afSLinus Torvalds { 367ec0c15afSLinus Torvalds u64 tsc, t1, t2, delta; 368ec0c15afSLinus Torvalds unsigned long tscmin, tscmax; 369ec0c15afSLinus Torvalds int pitcnt; 370ec0c15afSLinus Torvalds 371ec0c15afSLinus Torvalds /* Set the Gate high, disable speaker */ 372ec0c15afSLinus Torvalds outb((inb(0x61) & ~0x02) | 0x01, 0x61); 373ec0c15afSLinus Torvalds 374ec0c15afSLinus Torvalds /* 375ec0c15afSLinus Torvalds * Setup CTC channel 2* for mode 0, (interrupt on terminal 376ec0c15afSLinus Torvalds * count mode), binary count. Set the latch register to 50ms 377ec0c15afSLinus Torvalds * (LSB then MSB) to begin countdown. 378ec0c15afSLinus Torvalds */ 379ec0c15afSLinus Torvalds outb(0xb0, 0x43); 380a977c400SThomas Gleixner outb(latch & 0xff, 0x42); 381a977c400SThomas Gleixner outb(latch >> 8, 0x42); 382ec0c15afSLinus Torvalds 383ec0c15afSLinus Torvalds tsc = t1 = t2 = get_cycles(); 384ec0c15afSLinus Torvalds 385ec0c15afSLinus Torvalds pitcnt = 0; 386ec0c15afSLinus Torvalds tscmax = 0; 387ec0c15afSLinus Torvalds tscmin = ULONG_MAX; 388ec0c15afSLinus Torvalds while ((inb(0x61) & 0x20) == 0) { 389ec0c15afSLinus Torvalds t2 = get_cycles(); 390ec0c15afSLinus Torvalds delta = t2 - tsc; 391ec0c15afSLinus Torvalds tsc = t2; 392ec0c15afSLinus Torvalds if ((unsigned long) delta < tscmin) 393ec0c15afSLinus Torvalds tscmin = (unsigned int) delta; 394ec0c15afSLinus Torvalds if ((unsigned long) delta > tscmax) 395ec0c15afSLinus Torvalds tscmax = (unsigned int) delta; 396ec0c15afSLinus Torvalds pitcnt++; 397ec0c15afSLinus Torvalds } 398ec0c15afSLinus Torvalds 399ec0c15afSLinus Torvalds /* 400ec0c15afSLinus Torvalds * Sanity checks: 401ec0c15afSLinus Torvalds * 402a977c400SThomas Gleixner * If we were not able to read the PIT more than loopmin 403ec0c15afSLinus Torvalds * times, then we have been hit by a massive SMI 404ec0c15afSLinus Torvalds * 405ec0c15afSLinus Torvalds * If the maximum is 10 times larger than the minimum, 406ec0c15afSLinus Torvalds * then we got hit by an SMI as well. 407ec0c15afSLinus Torvalds */ 408a977c400SThomas Gleixner if (pitcnt < loopmin || tscmax > 10 * tscmin) 409ec0c15afSLinus Torvalds return ULONG_MAX; 410ec0c15afSLinus Torvalds 411ec0c15afSLinus Torvalds /* Calculate the PIT value */ 412ec0c15afSLinus Torvalds delta = t2 - t1; 413a977c400SThomas Gleixner do_div(delta, ms); 414ec0c15afSLinus Torvalds return delta; 415ec0c15afSLinus Torvalds } 416ec0c15afSLinus Torvalds 4176ac40ed0SLinus Torvalds /* 4186ac40ed0SLinus Torvalds * This reads the current MSB of the PIT counter, and 4196ac40ed0SLinus Torvalds * checks if we are running on sufficiently fast and 4206ac40ed0SLinus Torvalds * non-virtualized hardware. 4216ac40ed0SLinus Torvalds * 4226ac40ed0SLinus Torvalds * Our expectations are: 4236ac40ed0SLinus Torvalds * 4246ac40ed0SLinus Torvalds * - the PIT is running at roughly 1.19MHz 4256ac40ed0SLinus Torvalds * 4266ac40ed0SLinus Torvalds * - each IO is going to take about 1us on real hardware, 4276ac40ed0SLinus Torvalds * but we allow it to be much faster (by a factor of 10) or 4286ac40ed0SLinus Torvalds * _slightly_ slower (ie we allow up to a 2us read+counter 4296ac40ed0SLinus Torvalds * update - anything else implies a unacceptably slow CPU 4306ac40ed0SLinus Torvalds * or PIT for the fast calibration to work. 4316ac40ed0SLinus Torvalds * 4326ac40ed0SLinus Torvalds * - with 256 PIT ticks to read the value, we have 214us to 4336ac40ed0SLinus Torvalds * see the same MSB (and overhead like doing a single TSC 4346ac40ed0SLinus Torvalds * read per MSB value etc). 4356ac40ed0SLinus Torvalds * 4366ac40ed0SLinus Torvalds * - We're doing 2 reads per loop (LSB, MSB), and we expect 4376ac40ed0SLinus Torvalds * them each to take about a microsecond on real hardware. 4386ac40ed0SLinus Torvalds * So we expect a count value of around 100. But we'll be 4396ac40ed0SLinus Torvalds * generous, and accept anything over 50. 4406ac40ed0SLinus Torvalds * 4416ac40ed0SLinus Torvalds * - if the PIT is stuck, and we see *many* more reads, we 4426ac40ed0SLinus Torvalds * return early (and the next caller of pit_expect_msb() 4436ac40ed0SLinus Torvalds * then consider it a failure when they don't see the 4446ac40ed0SLinus Torvalds * next expected value). 4456ac40ed0SLinus Torvalds * 4466ac40ed0SLinus Torvalds * These expectations mean that we know that we have seen the 4476ac40ed0SLinus Torvalds * transition from one expected value to another with a fairly 4486ac40ed0SLinus Torvalds * high accuracy, and we didn't miss any events. We can thus 4496ac40ed0SLinus Torvalds * use the TSC value at the transitions to calculate a pretty 4506ac40ed0SLinus Torvalds * good value for the TSC frequencty. 4516ac40ed0SLinus Torvalds */ 452b6e61eefSLinus Torvalds static inline int pit_verify_msb(unsigned char val) 453b6e61eefSLinus Torvalds { 454b6e61eefSLinus Torvalds /* Ignore LSB */ 455b6e61eefSLinus Torvalds inb(0x42); 456b6e61eefSLinus Torvalds return inb(0x42) == val; 457b6e61eefSLinus Torvalds } 458b6e61eefSLinus Torvalds 4599e8912e0SLinus Torvalds static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) 4606ac40ed0SLinus Torvalds { 4619e8912e0SLinus Torvalds int count; 46268f30fbeSLinus Torvalds u64 tsc = 0, prev_tsc = 0; 4636ac40ed0SLinus Torvalds 4646ac40ed0SLinus Torvalds for (count = 0; count < 50000; count++) { 465b6e61eefSLinus Torvalds if (!pit_verify_msb(val)) 4666ac40ed0SLinus Torvalds break; 46768f30fbeSLinus Torvalds prev_tsc = tsc; 4689e8912e0SLinus Torvalds tsc = get_cycles(); 4696ac40ed0SLinus Torvalds } 47068f30fbeSLinus Torvalds *deltap = get_cycles() - prev_tsc; 4719e8912e0SLinus Torvalds *tscp = tsc; 4729e8912e0SLinus Torvalds 4739e8912e0SLinus Torvalds /* 4749e8912e0SLinus Torvalds * We require _some_ success, but the quality control 4759e8912e0SLinus Torvalds * will be based on the error terms on the TSC values. 4769e8912e0SLinus Torvalds */ 4779e8912e0SLinus Torvalds return count > 5; 4786ac40ed0SLinus Torvalds } 4796ac40ed0SLinus Torvalds 4806ac40ed0SLinus Torvalds /* 4819e8912e0SLinus Torvalds * How many MSB values do we want to see? We aim for 4829e8912e0SLinus Torvalds * a maximum error rate of 500ppm (in practice the 4839e8912e0SLinus Torvalds * real error is much smaller), but refuse to spend 48468f30fbeSLinus Torvalds * more than 50ms on it. 4856ac40ed0SLinus Torvalds */ 48668f30fbeSLinus Torvalds #define MAX_QUICK_PIT_MS 50 4879e8912e0SLinus Torvalds #define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) 4886ac40ed0SLinus Torvalds 4896ac40ed0SLinus Torvalds static unsigned long quick_pit_calibrate(void) 4906ac40ed0SLinus Torvalds { 4919e8912e0SLinus Torvalds int i; 4929e8912e0SLinus Torvalds u64 tsc, delta; 4939e8912e0SLinus Torvalds unsigned long d1, d2; 4949e8912e0SLinus Torvalds 4956ac40ed0SLinus Torvalds /* Set the Gate high, disable speaker */ 4966ac40ed0SLinus Torvalds outb((inb(0x61) & ~0x02) | 0x01, 0x61); 4976ac40ed0SLinus Torvalds 4986ac40ed0SLinus Torvalds /* 4996ac40ed0SLinus Torvalds * Counter 2, mode 0 (one-shot), binary count 5006ac40ed0SLinus Torvalds * 5016ac40ed0SLinus Torvalds * NOTE! Mode 2 decrements by two (and then the 5026ac40ed0SLinus Torvalds * output is flipped each time, giving the same 5036ac40ed0SLinus Torvalds * final output frequency as a decrement-by-one), 5046ac40ed0SLinus Torvalds * so mode 0 is much better when looking at the 5056ac40ed0SLinus Torvalds * individual counts. 5066ac40ed0SLinus Torvalds */ 5076ac40ed0SLinus Torvalds outb(0xb0, 0x43); 5086ac40ed0SLinus Torvalds 5096ac40ed0SLinus Torvalds /* Start at 0xffff */ 5106ac40ed0SLinus Torvalds outb(0xff, 0x42); 5116ac40ed0SLinus Torvalds outb(0xff, 0x42); 5126ac40ed0SLinus Torvalds 513a6a80e1dSLinus Torvalds /* 514a6a80e1dSLinus Torvalds * The PIT starts counting at the next edge, so we 515a6a80e1dSLinus Torvalds * need to delay for a microsecond. The easiest way 516a6a80e1dSLinus Torvalds * to do that is to just read back the 16-bit counter 517a6a80e1dSLinus Torvalds * once from the PIT. 518a6a80e1dSLinus Torvalds */ 519b6e61eefSLinus Torvalds pit_verify_msb(0); 520a6a80e1dSLinus Torvalds 5219e8912e0SLinus Torvalds if (pit_expect_msb(0xff, &tsc, &d1)) { 5229e8912e0SLinus Torvalds for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { 5239e8912e0SLinus Torvalds if (!pit_expect_msb(0xff-i, &delta, &d2)) 5249e8912e0SLinus Torvalds break; 5256ac40ed0SLinus Torvalds 5265aac644aSAdrian Hunter delta -= tsc; 5275aac644aSAdrian Hunter 5285aac644aSAdrian Hunter /* 5295aac644aSAdrian Hunter * Extrapolate the error and fail fast if the error will 5305aac644aSAdrian Hunter * never be below 500 ppm. 5315aac644aSAdrian Hunter */ 5325aac644aSAdrian Hunter if (i == 1 && 5335aac644aSAdrian Hunter d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11) 5345aac644aSAdrian Hunter return 0; 5355aac644aSAdrian Hunter 5366ac40ed0SLinus Torvalds /* 5379e8912e0SLinus Torvalds * Iterate until the error is less than 500 ppm 5384156e9a8SIngo Molnar */ 539b6e61eefSLinus Torvalds if (d1+d2 >= delta >> 11) 540b6e61eefSLinus Torvalds continue; 541b6e61eefSLinus Torvalds 542b6e61eefSLinus Torvalds /* 543b6e61eefSLinus Torvalds * Check the PIT one more time to verify that 544b6e61eefSLinus Torvalds * all TSC reads were stable wrt the PIT. 545b6e61eefSLinus Torvalds * 546b6e61eefSLinus Torvalds * This also guarantees serialization of the 547b6e61eefSLinus Torvalds * last cycle read ('d2') in pit_expect_msb. 548b6e61eefSLinus Torvalds */ 549b6e61eefSLinus Torvalds if (!pit_verify_msb(0xfe - i)) 550b6e61eefSLinus Torvalds break; 5519e8912e0SLinus Torvalds goto success; 5529e8912e0SLinus Torvalds } 5539e8912e0SLinus Torvalds } 55452045217SAlexandre Demers pr_info("Fast TSC calibration failed\n"); 5559e8912e0SLinus Torvalds return 0; 5564156e9a8SIngo Molnar 5579e8912e0SLinus Torvalds success: 5584156e9a8SIngo Molnar /* 5596ac40ed0SLinus Torvalds * Ok, if we get here, then we've seen the 5609e8912e0SLinus Torvalds * MSB of the PIT decrement 'i' times, and the 5619e8912e0SLinus Torvalds * error has shrunk to less than 500 ppm. 5626ac40ed0SLinus Torvalds * 5636ac40ed0SLinus Torvalds * As a result, we can depend on there not being 5646ac40ed0SLinus Torvalds * any odd delays anywhere, and the TSC reads are 56568f30fbeSLinus Torvalds * reliable (within the error). 5666ac40ed0SLinus Torvalds * 5676ac40ed0SLinus Torvalds * kHz = ticks / time-in-seconds / 1000; 5689e8912e0SLinus Torvalds * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000 5699e8912e0SLinus Torvalds * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000) 5706ac40ed0SLinus Torvalds */ 5719e8912e0SLinus Torvalds delta *= PIT_TICK_RATE; 5729e8912e0SLinus Torvalds do_div(delta, i*256*1000); 573c767a54bSJoe Perches pr_info("Fast TSC calibration using PIT\n"); 5746ac40ed0SLinus Torvalds return delta; 5756ac40ed0SLinus Torvalds } 576ec0c15afSLinus Torvalds 577bfc0f594SAlok Kataria /** 578aa297292SLen Brown * native_calibrate_tsc 579aa297292SLen Brown * Determine TSC frequency via CPUID, else return 0. 580bfc0f594SAlok Kataria */ 581e93ef949SAlok Kataria unsigned long native_calibrate_tsc(void) 582bfc0f594SAlok Kataria { 583aa297292SLen Brown unsigned int eax_denominator, ebx_numerator, ecx_hz, edx; 584aa297292SLen Brown unsigned int crystal_khz; 585aa297292SLen Brown 586aa297292SLen Brown if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 587aa297292SLen Brown return 0; 588aa297292SLen Brown 589aa297292SLen Brown if (boot_cpu_data.cpuid_level < 0x15) 590aa297292SLen Brown return 0; 591aa297292SLen Brown 592aa297292SLen Brown eax_denominator = ebx_numerator = ecx_hz = edx = 0; 593aa297292SLen Brown 594aa297292SLen Brown /* CPUID 15H TSC/Crystal ratio, plus optionally Crystal Hz */ 595aa297292SLen Brown cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx); 596aa297292SLen Brown 597aa297292SLen Brown if (ebx_numerator == 0 || eax_denominator == 0) 598aa297292SLen Brown return 0; 599aa297292SLen Brown 600aa297292SLen Brown crystal_khz = ecx_hz / 1000; 601aa297292SLen Brown 602aa297292SLen Brown if (crystal_khz == 0) { 603aa297292SLen Brown switch (boot_cpu_data.x86_model) { 604655e52d2SPrarit Bhargava case INTEL_FAM6_SKYLAKE_MOBILE: 605655e52d2SPrarit Bhargava case INTEL_FAM6_SKYLAKE_DESKTOP: 6066baf3d61SPrarit Bhargava case INTEL_FAM6_KABYLAKE_MOBILE: 6076baf3d61SPrarit Bhargava case INTEL_FAM6_KABYLAKE_DESKTOP: 608ff4c8663SLen Brown crystal_khz = 24000; /* 24.0 MHz */ 609ff4c8663SLen Brown break; 6106baf3d61SPrarit Bhargava case INTEL_FAM6_SKYLAKE_X: 611695085b4SLen Brown case INTEL_FAM6_ATOM_DENVERTON: 6126baf3d61SPrarit Bhargava crystal_khz = 25000; /* 25.0 MHz */ 6136baf3d61SPrarit Bhargava break; 614655e52d2SPrarit Bhargava case INTEL_FAM6_ATOM_GOLDMONT: 615ff4c8663SLen Brown crystal_khz = 19200; /* 19.2 MHz */ 616ff4c8663SLen Brown break; 617aa297292SLen Brown } 618aa297292SLen Brown } 619aa297292SLen Brown 6204ca4df0bSBin Gao /* 6214ca4df0bSBin Gao * TSC frequency determined by CPUID is a "hardware reported" 6224ca4df0bSBin Gao * frequency and is the most accurate one so far we have. This 6234ca4df0bSBin Gao * is considered a known frequency. 6244ca4df0bSBin Gao */ 6254ca4df0bSBin Gao setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); 6264ca4df0bSBin Gao 6274635fdc6SBin Gao /* 6284635fdc6SBin Gao * For Atom SoCs TSC is the only reliable clocksource. 6294635fdc6SBin Gao * Mark TSC reliable so no watchdog on it. 6304635fdc6SBin Gao */ 6314635fdc6SBin Gao if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT) 6324635fdc6SBin Gao setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); 6334635fdc6SBin Gao 634aa297292SLen Brown return crystal_khz * ebx_numerator / eax_denominator; 635aa297292SLen Brown } 636aa297292SLen Brown 637aa297292SLen Brown static unsigned long cpu_khz_from_cpuid(void) 638aa297292SLen Brown { 639aa297292SLen Brown unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx; 640aa297292SLen Brown 641aa297292SLen Brown if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 642aa297292SLen Brown return 0; 643aa297292SLen Brown 644aa297292SLen Brown if (boot_cpu_data.cpuid_level < 0x16) 645aa297292SLen Brown return 0; 646aa297292SLen Brown 647aa297292SLen Brown eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0; 648aa297292SLen Brown 649aa297292SLen Brown cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx); 650aa297292SLen Brown 651aa297292SLen Brown return eax_base_mhz * 1000; 652aa297292SLen Brown } 653aa297292SLen Brown 654aa297292SLen Brown /** 655aa297292SLen Brown * native_calibrate_cpu - calibrate the cpu on boot 656aa297292SLen Brown */ 657aa297292SLen Brown unsigned long native_calibrate_cpu(void) 658aa297292SLen Brown { 659827014beSThomas Gleixner u64 tsc1, tsc2, delta, ref1, ref2; 660fbb16e24SThomas Gleixner unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; 6612d826404SThomas Gleixner unsigned long flags, latch, ms, fast_calibrate; 662a977c400SThomas Gleixner int hpet = is_hpet_enabled(), i, loopmin; 663bfc0f594SAlok Kataria 664aa297292SLen Brown fast_calibrate = cpu_khz_from_cpuid(); 665aa297292SLen Brown if (fast_calibrate) 666aa297292SLen Brown return fast_calibrate; 667aa297292SLen Brown 66802c0cd2dSLen Brown fast_calibrate = cpu_khz_from_msr(); 6695f0e0309SThomas Gleixner if (fast_calibrate) 6707da7c156SBin Gao return fast_calibrate; 6717da7c156SBin Gao 672bfc0f594SAlok Kataria local_irq_save(flags); 6736ac40ed0SLinus Torvalds fast_calibrate = quick_pit_calibrate(); 674bfc0f594SAlok Kataria local_irq_restore(flags); 6756ac40ed0SLinus Torvalds if (fast_calibrate) 6766ac40ed0SLinus Torvalds return fast_calibrate; 677fbb16e24SThomas Gleixner 678fbb16e24SThomas Gleixner /* 679fbb16e24SThomas Gleixner * Run 5 calibration loops to get the lowest frequency value 680fbb16e24SThomas Gleixner * (the best estimate). We use two different calibration modes 681fbb16e24SThomas Gleixner * here: 682fbb16e24SThomas Gleixner * 683fbb16e24SThomas Gleixner * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and 684fbb16e24SThomas Gleixner * load a timeout of 50ms. We read the time right after we 685fbb16e24SThomas Gleixner * started the timer and wait until the PIT count down reaches 686fbb16e24SThomas Gleixner * zero. In each wait loop iteration we read the TSC and check 687fbb16e24SThomas Gleixner * the delta to the previous read. We keep track of the min 688fbb16e24SThomas Gleixner * and max values of that delta. The delta is mostly defined 689fbb16e24SThomas Gleixner * by the IO time of the PIT access, so we can detect when a 6900d2eb44fSLucas De Marchi * SMI/SMM disturbance happened between the two reads. If the 691fbb16e24SThomas Gleixner * maximum time is significantly larger than the minimum time, 692fbb16e24SThomas Gleixner * then we discard the result and have another try. 693fbb16e24SThomas Gleixner * 694fbb16e24SThomas Gleixner * 2) Reference counter. If available we use the HPET or the 695fbb16e24SThomas Gleixner * PMTIMER as a reference to check the sanity of that value. 696fbb16e24SThomas Gleixner * We use separate TSC readouts and check inside of the 697fbb16e24SThomas Gleixner * reference read for a SMI/SMM disturbance. We dicard 698fbb16e24SThomas Gleixner * disturbed values here as well. We do that around the PIT 699fbb16e24SThomas Gleixner * calibration delay loop as we have to wait for a certain 700fbb16e24SThomas Gleixner * amount of time anyway. 701fbb16e24SThomas Gleixner */ 702a977c400SThomas Gleixner 703a977c400SThomas Gleixner /* Preset PIT loop values */ 704a977c400SThomas Gleixner latch = CAL_LATCH; 705a977c400SThomas Gleixner ms = CAL_MS; 706a977c400SThomas Gleixner loopmin = CAL_PIT_LOOPS; 707a977c400SThomas Gleixner 708a977c400SThomas Gleixner for (i = 0; i < 3; i++) { 709ec0c15afSLinus Torvalds unsigned long tsc_pit_khz; 710bfc0f594SAlok Kataria 711fbb16e24SThomas Gleixner /* 712fbb16e24SThomas Gleixner * Read the start value and the reference count of 713ec0c15afSLinus Torvalds * hpet/pmtimer when available. Then do the PIT 714ec0c15afSLinus Torvalds * calibration, which will take at least 50ms, and 715ec0c15afSLinus Torvalds * read the end value. 716fbb16e24SThomas Gleixner */ 717ec0c15afSLinus Torvalds local_irq_save(flags); 718827014beSThomas Gleixner tsc1 = tsc_read_refs(&ref1, hpet); 719a977c400SThomas Gleixner tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); 720827014beSThomas Gleixner tsc2 = tsc_read_refs(&ref2, hpet); 721bfc0f594SAlok Kataria local_irq_restore(flags); 722bfc0f594SAlok Kataria 723ec0c15afSLinus Torvalds /* Pick the lowest PIT TSC calibration so far */ 724ec0c15afSLinus Torvalds tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); 725bfc0f594SAlok Kataria 726bfc0f594SAlok Kataria /* hpet or pmtimer available ? */ 72762627becSJohn Stultz if (ref1 == ref2) 728fbb16e24SThomas Gleixner continue; 729bfc0f594SAlok Kataria 730bfc0f594SAlok Kataria /* Check, whether the sampling was disturbed by an SMI */ 731fbb16e24SThomas Gleixner if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) 732fbb16e24SThomas Gleixner continue; 733bfc0f594SAlok Kataria 734bfc0f594SAlok Kataria tsc2 = (tsc2 - tsc1) * 1000000LL; 735d683ef7aSThomas Gleixner if (hpet) 736827014beSThomas Gleixner tsc2 = calc_hpet_ref(tsc2, ref1, ref2); 737d683ef7aSThomas Gleixner else 738827014beSThomas Gleixner tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); 739bfc0f594SAlok Kataria 740fbb16e24SThomas Gleixner tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); 741a977c400SThomas Gleixner 742a977c400SThomas Gleixner /* Check the reference deviation */ 743a977c400SThomas Gleixner delta = ((u64) tsc_pit_min) * 100; 744a977c400SThomas Gleixner do_div(delta, tsc_ref_min); 745a977c400SThomas Gleixner 746a977c400SThomas Gleixner /* 747a977c400SThomas Gleixner * If both calibration results are inside a 10% window 748a977c400SThomas Gleixner * then we can be sure, that the calibration 749a977c400SThomas Gleixner * succeeded. We break out of the loop right away. We 750a977c400SThomas Gleixner * use the reference value, as it is more precise. 751a977c400SThomas Gleixner */ 752a977c400SThomas Gleixner if (delta >= 90 && delta <= 110) { 753c767a54bSJoe Perches pr_info("PIT calibration matches %s. %d loops\n", 754a977c400SThomas Gleixner hpet ? "HPET" : "PMTIMER", i + 1); 755a977c400SThomas Gleixner return tsc_ref_min; 756bfc0f594SAlok Kataria } 757bfc0f594SAlok Kataria 758a977c400SThomas Gleixner /* 759a977c400SThomas Gleixner * Check whether PIT failed more than once. This 760a977c400SThomas Gleixner * happens in virtualized environments. We need to 761a977c400SThomas Gleixner * give the virtual PC a slightly longer timeframe for 762a977c400SThomas Gleixner * the HPET/PMTIMER to make the result precise. 763a977c400SThomas Gleixner */ 764a977c400SThomas Gleixner if (i == 1 && tsc_pit_min == ULONG_MAX) { 765a977c400SThomas Gleixner latch = CAL2_LATCH; 766a977c400SThomas Gleixner ms = CAL2_MS; 767a977c400SThomas Gleixner loopmin = CAL2_PIT_LOOPS; 768a977c400SThomas Gleixner } 769bfc0f594SAlok Kataria } 770bfc0f594SAlok Kataria 771fbb16e24SThomas Gleixner /* 772fbb16e24SThomas Gleixner * Now check the results. 773fbb16e24SThomas Gleixner */ 774fbb16e24SThomas Gleixner if (tsc_pit_min == ULONG_MAX) { 775fbb16e24SThomas Gleixner /* PIT gave no useful value */ 776c767a54bSJoe Perches pr_warn("Unable to calibrate against PIT\n"); 777fbb16e24SThomas Gleixner 778fbb16e24SThomas Gleixner /* We don't have an alternative source, disable TSC */ 779827014beSThomas Gleixner if (!hpet && !ref1 && !ref2) { 780c767a54bSJoe Perches pr_notice("No reference (HPET/PMTIMER) available\n"); 781fbb16e24SThomas Gleixner return 0; 782fbb16e24SThomas Gleixner } 783fbb16e24SThomas Gleixner 784fbb16e24SThomas Gleixner /* The alternative source failed as well, disable TSC */ 785fbb16e24SThomas Gleixner if (tsc_ref_min == ULONG_MAX) { 786c767a54bSJoe Perches pr_warn("HPET/PMTIMER calibration failed\n"); 787fbb16e24SThomas Gleixner return 0; 788fbb16e24SThomas Gleixner } 789fbb16e24SThomas Gleixner 790fbb16e24SThomas Gleixner /* Use the alternative source */ 791c767a54bSJoe Perches pr_info("using %s reference calibration\n", 792fbb16e24SThomas Gleixner hpet ? "HPET" : "PMTIMER"); 793fbb16e24SThomas Gleixner 794fbb16e24SThomas Gleixner return tsc_ref_min; 795fbb16e24SThomas Gleixner } 796fbb16e24SThomas Gleixner 797fbb16e24SThomas Gleixner /* We don't have an alternative source, use the PIT calibration value */ 798827014beSThomas Gleixner if (!hpet && !ref1 && !ref2) { 799c767a54bSJoe Perches pr_info("Using PIT calibration value\n"); 800fbb16e24SThomas Gleixner return tsc_pit_min; 801fbb16e24SThomas Gleixner } 802fbb16e24SThomas Gleixner 803fbb16e24SThomas Gleixner /* The alternative source failed, use the PIT calibration value */ 804fbb16e24SThomas Gleixner if (tsc_ref_min == ULONG_MAX) { 805c767a54bSJoe Perches pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n"); 806fbb16e24SThomas Gleixner return tsc_pit_min; 807fbb16e24SThomas Gleixner } 808fbb16e24SThomas Gleixner 809fbb16e24SThomas Gleixner /* 810fbb16e24SThomas Gleixner * The calibration values differ too much. In doubt, we use 811fbb16e24SThomas Gleixner * the PIT value as we know that there are PMTIMERs around 812a977c400SThomas Gleixner * running at double speed. At least we let the user know: 813fbb16e24SThomas Gleixner */ 814c767a54bSJoe Perches pr_warn("PIT calibration deviates from %s: %lu %lu\n", 815a977c400SThomas Gleixner hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); 816c767a54bSJoe Perches pr_info("Using PIT calibration value\n"); 817fbb16e24SThomas Gleixner return tsc_pit_min; 818fbb16e24SThomas Gleixner } 819bfc0f594SAlok Kataria 820bfc0f594SAlok Kataria int recalibrate_cpu_khz(void) 821bfc0f594SAlok Kataria { 822bfc0f594SAlok Kataria #ifndef CONFIG_SMP 823bfc0f594SAlok Kataria unsigned long cpu_khz_old = cpu_khz; 824bfc0f594SAlok Kataria 825eff4677eSBorislav Petkov if (!boot_cpu_has(X86_FEATURE_TSC)) 826eff4677eSBorislav Petkov return -ENODEV; 827eff4677eSBorislav Petkov 828aa297292SLen Brown cpu_khz = x86_platform.calibrate_cpu(); 8292d826404SThomas Gleixner tsc_khz = x86_platform.calibrate_tsc(); 830aa297292SLen Brown if (tsc_khz == 0) 831aa297292SLen Brown tsc_khz = cpu_khz; 832ff4c8663SLen Brown else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz) 833ff4c8663SLen Brown cpu_khz = tsc_khz; 834eff4677eSBorislav Petkov cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy, 835bfc0f594SAlok Kataria cpu_khz_old, cpu_khz); 836eff4677eSBorislav Petkov 837bfc0f594SAlok Kataria return 0; 838bfc0f594SAlok Kataria #else 839bfc0f594SAlok Kataria return -ENODEV; 840bfc0f594SAlok Kataria #endif 841bfc0f594SAlok Kataria } 842bfc0f594SAlok Kataria 843bfc0f594SAlok Kataria EXPORT_SYMBOL(recalibrate_cpu_khz); 844bfc0f594SAlok Kataria 8452dbe06faSAlok Kataria 846cd7240c0SSuresh Siddha static unsigned long long cyc2ns_suspend; 847cd7240c0SSuresh Siddha 848b74f05d6SMarcelo Tosatti void tsc_save_sched_clock_state(void) 849cd7240c0SSuresh Siddha { 85035af99e6SPeter Zijlstra if (!sched_clock_stable()) 851cd7240c0SSuresh Siddha return; 852cd7240c0SSuresh Siddha 853cd7240c0SSuresh Siddha cyc2ns_suspend = sched_clock(); 854cd7240c0SSuresh Siddha } 855cd7240c0SSuresh Siddha 856cd7240c0SSuresh Siddha /* 857cd7240c0SSuresh Siddha * Even on processors with invariant TSC, TSC gets reset in some the 858cd7240c0SSuresh Siddha * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to 859cd7240c0SSuresh Siddha * arbitrary value (still sync'd across cpu's) during resume from such sleep 860cd7240c0SSuresh Siddha * states. To cope up with this, recompute the cyc2ns_offset for each cpu so 861cd7240c0SSuresh Siddha * that sched_clock() continues from the point where it was left off during 862cd7240c0SSuresh Siddha * suspend. 863cd7240c0SSuresh Siddha */ 864b74f05d6SMarcelo Tosatti void tsc_restore_sched_clock_state(void) 865cd7240c0SSuresh Siddha { 866cd7240c0SSuresh Siddha unsigned long long offset; 867cd7240c0SSuresh Siddha unsigned long flags; 868cd7240c0SSuresh Siddha int cpu; 869cd7240c0SSuresh Siddha 87035af99e6SPeter Zijlstra if (!sched_clock_stable()) 871cd7240c0SSuresh Siddha return; 872cd7240c0SSuresh Siddha 873cd7240c0SSuresh Siddha local_irq_save(flags); 874cd7240c0SSuresh Siddha 87520d1c86aSPeter Zijlstra /* 8766a6256f9SAdam Buchbinder * We're coming out of suspend, there's no concurrency yet; don't 87720d1c86aSPeter Zijlstra * bother being nice about the RCU stuff, just write to both 87820d1c86aSPeter Zijlstra * data fields. 87920d1c86aSPeter Zijlstra */ 88020d1c86aSPeter Zijlstra 88120d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0); 88220d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0); 88320d1c86aSPeter Zijlstra 884cd7240c0SSuresh Siddha offset = cyc2ns_suspend - sched_clock(); 885cd7240c0SSuresh Siddha 88620d1c86aSPeter Zijlstra for_each_possible_cpu(cpu) { 88720d1c86aSPeter Zijlstra per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset; 88820d1c86aSPeter Zijlstra per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset; 88920d1c86aSPeter Zijlstra } 890cd7240c0SSuresh Siddha 891cd7240c0SSuresh Siddha local_irq_restore(flags); 892cd7240c0SSuresh Siddha } 893cd7240c0SSuresh Siddha 8942dbe06faSAlok Kataria #ifdef CONFIG_CPU_FREQ 8952dbe06faSAlok Kataria 8962dbe06faSAlok Kataria /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency 8972dbe06faSAlok Kataria * changes. 8982dbe06faSAlok Kataria * 8992dbe06faSAlok Kataria * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's 9002dbe06faSAlok Kataria * not that important because current Opteron setups do not support 9012dbe06faSAlok Kataria * scaling on SMP anyroads. 9022dbe06faSAlok Kataria * 9032dbe06faSAlok Kataria * Should fix up last_tsc too. Currently gettimeofday in the 9042dbe06faSAlok Kataria * first tick after the change will be slightly wrong. 9052dbe06faSAlok Kataria */ 9062dbe06faSAlok Kataria 9072dbe06faSAlok Kataria static unsigned int ref_freq; 9082dbe06faSAlok Kataria static unsigned long loops_per_jiffy_ref; 9092dbe06faSAlok Kataria static unsigned long tsc_khz_ref; 9102dbe06faSAlok Kataria 9112dbe06faSAlok Kataria static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 9122dbe06faSAlok Kataria void *data) 9132dbe06faSAlok Kataria { 9142dbe06faSAlok Kataria struct cpufreq_freqs *freq = data; 915931db6a3SDave Jones unsigned long *lpj; 9162dbe06faSAlok Kataria 9172dbe06faSAlok Kataria lpj = &boot_cpu_data.loops_per_jiffy; 918931db6a3SDave Jones #ifdef CONFIG_SMP 919931db6a3SDave Jones if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 920931db6a3SDave Jones lpj = &cpu_data(freq->cpu).loops_per_jiffy; 9212dbe06faSAlok Kataria #endif 9222dbe06faSAlok Kataria 9232dbe06faSAlok Kataria if (!ref_freq) { 9242dbe06faSAlok Kataria ref_freq = freq->old; 9252dbe06faSAlok Kataria loops_per_jiffy_ref = *lpj; 9262dbe06faSAlok Kataria tsc_khz_ref = tsc_khz; 9272dbe06faSAlok Kataria } 9282dbe06faSAlok Kataria if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || 9290b443eadSViresh Kumar (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { 9302dbe06faSAlok Kataria *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); 9312dbe06faSAlok Kataria 9322dbe06faSAlok Kataria tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); 9332dbe06faSAlok Kataria if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 9342dbe06faSAlok Kataria mark_tsc_unstable("cpufreq changes"); 9352dbe06faSAlok Kataria 93652a8968cSPeter Zijlstra set_cyc2ns_scale(tsc_khz, freq->cpu); 9373896c329SPeter Zijlstra } 9382dbe06faSAlok Kataria 9392dbe06faSAlok Kataria return 0; 9402dbe06faSAlok Kataria } 9412dbe06faSAlok Kataria 9422dbe06faSAlok Kataria static struct notifier_block time_cpufreq_notifier_block = { 9432dbe06faSAlok Kataria .notifier_call = time_cpufreq_notifier 9442dbe06faSAlok Kataria }; 9452dbe06faSAlok Kataria 946a841cca7SBorislav Petkov static int __init cpufreq_register_tsc_scaling(void) 9472dbe06faSAlok Kataria { 94859e21e3dSBorislav Petkov if (!boot_cpu_has(X86_FEATURE_TSC)) 949060700b5SLinus Torvalds return 0; 950060700b5SLinus Torvalds if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 951060700b5SLinus Torvalds return 0; 9522dbe06faSAlok Kataria cpufreq_register_notifier(&time_cpufreq_notifier_block, 9532dbe06faSAlok Kataria CPUFREQ_TRANSITION_NOTIFIER); 9542dbe06faSAlok Kataria return 0; 9552dbe06faSAlok Kataria } 9562dbe06faSAlok Kataria 957a841cca7SBorislav Petkov core_initcall(cpufreq_register_tsc_scaling); 9582dbe06faSAlok Kataria 9592dbe06faSAlok Kataria #endif /* CONFIG_CPU_FREQ */ 9608fbbc4b4SAlok Kataria 961f9677e0fSChristopher S. Hall #define ART_CPUID_LEAF (0x15) 962f9677e0fSChristopher S. Hall #define ART_MIN_DENOMINATOR (1) 963f9677e0fSChristopher S. Hall 964f9677e0fSChristopher S. Hall 965f9677e0fSChristopher S. Hall /* 966f9677e0fSChristopher S. Hall * If ART is present detect the numerator:denominator to convert to TSC 967f9677e0fSChristopher S. Hall */ 968f9677e0fSChristopher S. Hall static void detect_art(void) 969f9677e0fSChristopher S. Hall { 970f9677e0fSChristopher S. Hall unsigned int unused[2]; 971f9677e0fSChristopher S. Hall 972f9677e0fSChristopher S. Hall if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF) 973f9677e0fSChristopher S. Hall return; 974f9677e0fSChristopher S. Hall 9757b3d2f6eSThomas Gleixner /* Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required */ 9767b3d2f6eSThomas Gleixner if (boot_cpu_has(X86_FEATURE_HYPERVISOR) || 9777b3d2f6eSThomas Gleixner !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) || 9787b3d2f6eSThomas Gleixner !boot_cpu_has(X86_FEATURE_TSC_ADJUST)) 9797b3d2f6eSThomas Gleixner return; 9807b3d2f6eSThomas Gleixner 981f9677e0fSChristopher S. Hall cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator, 982f9677e0fSChristopher S. Hall &art_to_tsc_numerator, unused, unused+1); 983f9677e0fSChristopher S. Hall 9847b3d2f6eSThomas Gleixner if (art_to_tsc_denominator < ART_MIN_DENOMINATOR) 985f9677e0fSChristopher S. Hall return; 986f9677e0fSChristopher S. Hall 9877b3d2f6eSThomas Gleixner rdmsrl(MSR_IA32_TSC_ADJUST, art_to_tsc_offset); 988f9677e0fSChristopher S. Hall 989f9677e0fSChristopher S. Hall /* Make this sticky over multiple CPU init calls */ 990f9677e0fSChristopher S. Hall setup_force_cpu_cap(X86_FEATURE_ART); 991f9677e0fSChristopher S. Hall } 992f9677e0fSChristopher S. Hall 993f9677e0fSChristopher S. Hall 9948fbbc4b4SAlok Kataria /* clocksource code */ 9958fbbc4b4SAlok Kataria 9968fbbc4b4SAlok Kataria static struct clocksource clocksource_tsc; 9978fbbc4b4SAlok Kataria 9986a369583SThomas Gleixner static void tsc_resume(struct clocksource *cs) 9996a369583SThomas Gleixner { 10006a369583SThomas Gleixner tsc_verify_tsc_adjust(true); 10016a369583SThomas Gleixner } 10026a369583SThomas Gleixner 10038fbbc4b4SAlok Kataria /* 100409ec5442SThomas Gleixner * We used to compare the TSC to the cycle_last value in the clocksource 10058fbbc4b4SAlok Kataria * structure to avoid a nasty time-warp. This can be observed in a 10068fbbc4b4SAlok Kataria * very small window right after one CPU updated cycle_last under 10078fbbc4b4SAlok Kataria * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which 10088fbbc4b4SAlok Kataria * is smaller than the cycle_last reference value due to a TSC which 10098fbbc4b4SAlok Kataria * is slighty behind. This delta is nowhere else observable, but in 10108fbbc4b4SAlok Kataria * that case it results in a forward time jump in the range of hours 10118fbbc4b4SAlok Kataria * due to the unsigned delta calculation of the time keeping core 10128fbbc4b4SAlok Kataria * code, which is necessary to support wrapping clocksources like pm 10138fbbc4b4SAlok Kataria * timer. 101409ec5442SThomas Gleixner * 101509ec5442SThomas Gleixner * This sanity check is now done in the core timekeeping code. 101609ec5442SThomas Gleixner * checking the result of read_tsc() - cycle_last for being negative. 101709ec5442SThomas Gleixner * That works because CLOCKSOURCE_MASK(64) does not mask out any bit. 10188fbbc4b4SAlok Kataria */ 1019a5a1d1c2SThomas Gleixner static u64 read_tsc(struct clocksource *cs) 10208fbbc4b4SAlok Kataria { 1021a5a1d1c2SThomas Gleixner return (u64)rdtsc_ordered(); 10228fbbc4b4SAlok Kataria } 10238fbbc4b4SAlok Kataria 102412907fbbSThomas Gleixner static void tsc_cs_mark_unstable(struct clocksource *cs) 102512907fbbSThomas Gleixner { 102612907fbbSThomas Gleixner if (tsc_unstable) 102712907fbbSThomas Gleixner return; 1028f94c8d11SPeter Zijlstra 102912907fbbSThomas Gleixner tsc_unstable = 1; 1030f94c8d11SPeter Zijlstra if (using_native_sched_clock()) 103112907fbbSThomas Gleixner clear_sched_clock_stable(); 103212907fbbSThomas Gleixner disable_sched_clock_irqtime(); 103312907fbbSThomas Gleixner pr_info("Marking TSC unstable due to clocksource watchdog\n"); 103412907fbbSThomas Gleixner } 103512907fbbSThomas Gleixner 103609ec5442SThomas Gleixner /* 103709ec5442SThomas Gleixner * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() 103809ec5442SThomas Gleixner */ 10398fbbc4b4SAlok Kataria static struct clocksource clocksource_tsc = { 10408fbbc4b4SAlok Kataria .name = "tsc", 10418fbbc4b4SAlok Kataria .rating = 300, 10428fbbc4b4SAlok Kataria .read = read_tsc, 10438fbbc4b4SAlok Kataria .mask = CLOCKSOURCE_MASK(64), 10448fbbc4b4SAlok Kataria .flags = CLOCK_SOURCE_IS_CONTINUOUS | 10458fbbc4b4SAlok Kataria CLOCK_SOURCE_MUST_VERIFY, 104698d0ac38SAndy Lutomirski .archdata = { .vclock_mode = VCLOCK_TSC }, 10476a369583SThomas Gleixner .resume = tsc_resume, 104812907fbbSThomas Gleixner .mark_unstable = tsc_cs_mark_unstable, 10498fbbc4b4SAlok Kataria }; 10508fbbc4b4SAlok Kataria 10518fbbc4b4SAlok Kataria void mark_tsc_unstable(char *reason) 10528fbbc4b4SAlok Kataria { 1053f94c8d11SPeter Zijlstra if (tsc_unstable) 1054f94c8d11SPeter Zijlstra return; 1055f94c8d11SPeter Zijlstra 10568fbbc4b4SAlok Kataria tsc_unstable = 1; 1057f94c8d11SPeter Zijlstra if (using_native_sched_clock()) 105835af99e6SPeter Zijlstra clear_sched_clock_stable(); 1059e82b8e4eSVenkatesh Pallipadi disable_sched_clock_irqtime(); 1060c767a54bSJoe Perches pr_info("Marking TSC unstable due to %s\n", reason); 10618fbbc4b4SAlok Kataria /* Change only the rating, when not registered */ 1062f94c8d11SPeter Zijlstra if (clocksource_tsc.mult) { 10637285dd7fSThomas Gleixner clocksource_mark_unstable(&clocksource_tsc); 1064f94c8d11SPeter Zijlstra } else { 10657285dd7fSThomas Gleixner clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; 10668fbbc4b4SAlok Kataria clocksource_tsc.rating = 0; 10678fbbc4b4SAlok Kataria } 10688fbbc4b4SAlok Kataria } 10698fbbc4b4SAlok Kataria 10708fbbc4b4SAlok Kataria EXPORT_SYMBOL_GPL(mark_tsc_unstable); 10718fbbc4b4SAlok Kataria 1072395628efSAlok Kataria static void __init check_system_tsc_reliable(void) 1073395628efSAlok Kataria { 107403da3ff1SDavid Woodhouse #if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) 107503da3ff1SDavid Woodhouse if (is_geode_lx()) { 10768fbbc4b4SAlok Kataria /* RTSC counts during suspend */ 10778fbbc4b4SAlok Kataria #define RTSC_SUSP 0x100 10788fbbc4b4SAlok Kataria unsigned long res_low, res_high; 10798fbbc4b4SAlok Kataria 10808fbbc4b4SAlok Kataria rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); 108100097c4fSThadeu Lima de Souza Cascardo /* Geode_LX - the OLPC CPU has a very reliable TSC */ 10828fbbc4b4SAlok Kataria if (res_low & RTSC_SUSP) 1083395628efSAlok Kataria tsc_clocksource_reliable = 1; 108403da3ff1SDavid Woodhouse } 10858fbbc4b4SAlok Kataria #endif 1086395628efSAlok Kataria if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) 1087395628efSAlok Kataria tsc_clocksource_reliable = 1; 1088395628efSAlok Kataria } 10898fbbc4b4SAlok Kataria 10908fbbc4b4SAlok Kataria /* 10918fbbc4b4SAlok Kataria * Make an educated guess if the TSC is trustworthy and synchronized 10928fbbc4b4SAlok Kataria * over all CPUs. 10938fbbc4b4SAlok Kataria */ 1094148f9bb8SPaul Gortmaker int unsynchronized_tsc(void) 10958fbbc4b4SAlok Kataria { 109659e21e3dSBorislav Petkov if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable) 10978fbbc4b4SAlok Kataria return 1; 10988fbbc4b4SAlok Kataria 10993e5095d1SIngo Molnar #ifdef CONFIG_SMP 11008fbbc4b4SAlok Kataria if (apic_is_clustered_box()) 11018fbbc4b4SAlok Kataria return 1; 11028fbbc4b4SAlok Kataria #endif 11038fbbc4b4SAlok Kataria 11048fbbc4b4SAlok Kataria if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 11058fbbc4b4SAlok Kataria return 0; 1106d3b8f889Sjohn stultz 1107d3b8f889Sjohn stultz if (tsc_clocksource_reliable) 1108d3b8f889Sjohn stultz return 0; 11098fbbc4b4SAlok Kataria /* 11108fbbc4b4SAlok Kataria * Intel systems are normally all synchronized. 11118fbbc4b4SAlok Kataria * Exceptions must mark TSC as unstable: 11128fbbc4b4SAlok Kataria */ 11138fbbc4b4SAlok Kataria if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { 11148fbbc4b4SAlok Kataria /* assume multi socket systems are not synchronized: */ 11158fbbc4b4SAlok Kataria if (num_possible_cpus() > 1) 1116d3b8f889Sjohn stultz return 1; 11178fbbc4b4SAlok Kataria } 11188fbbc4b4SAlok Kataria 1119d3b8f889Sjohn stultz return 0; 11208fbbc4b4SAlok Kataria } 11218fbbc4b4SAlok Kataria 1122f9677e0fSChristopher S. Hall /* 1123f9677e0fSChristopher S. Hall * Convert ART to TSC given numerator/denominator found in detect_art() 1124f9677e0fSChristopher S. Hall */ 1125a5a1d1c2SThomas Gleixner struct system_counterval_t convert_art_to_tsc(u64 art) 1126f9677e0fSChristopher S. Hall { 1127f9677e0fSChristopher S. Hall u64 tmp, res, rem; 1128f9677e0fSChristopher S. Hall 1129f9677e0fSChristopher S. Hall rem = do_div(art, art_to_tsc_denominator); 1130f9677e0fSChristopher S. Hall 1131f9677e0fSChristopher S. Hall res = art * art_to_tsc_numerator; 1132f9677e0fSChristopher S. Hall tmp = rem * art_to_tsc_numerator; 1133f9677e0fSChristopher S. Hall 1134f9677e0fSChristopher S. Hall do_div(tmp, art_to_tsc_denominator); 1135f9677e0fSChristopher S. Hall res += tmp + art_to_tsc_offset; 1136f9677e0fSChristopher S. Hall 1137f9677e0fSChristopher S. Hall return (struct system_counterval_t) {.cs = art_related_clocksource, 1138f9677e0fSChristopher S. Hall .cycles = res}; 1139f9677e0fSChristopher S. Hall } 1140f9677e0fSChristopher S. Hall EXPORT_SYMBOL(convert_art_to_tsc); 114108ec0c58SJohn Stultz 114208ec0c58SJohn Stultz static void tsc_refine_calibration_work(struct work_struct *work); 114308ec0c58SJohn Stultz static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); 114408ec0c58SJohn Stultz /** 114508ec0c58SJohn Stultz * tsc_refine_calibration_work - Further refine tsc freq calibration 114608ec0c58SJohn Stultz * @work - ignored. 114708ec0c58SJohn Stultz * 114808ec0c58SJohn Stultz * This functions uses delayed work over a period of a 114908ec0c58SJohn Stultz * second to further refine the TSC freq value. Since this is 115008ec0c58SJohn Stultz * timer based, instead of loop based, we don't block the boot 115108ec0c58SJohn Stultz * process while this longer calibration is done. 115208ec0c58SJohn Stultz * 11530d2eb44fSLucas De Marchi * If there are any calibration anomalies (too many SMIs, etc), 115408ec0c58SJohn Stultz * or the refined calibration is off by 1% of the fast early 115508ec0c58SJohn Stultz * calibration, we throw out the new calibration and use the 115608ec0c58SJohn Stultz * early calibration. 115708ec0c58SJohn Stultz */ 115808ec0c58SJohn Stultz static void tsc_refine_calibration_work(struct work_struct *work) 115908ec0c58SJohn Stultz { 116008ec0c58SJohn Stultz static u64 tsc_start = -1, ref_start; 116108ec0c58SJohn Stultz static int hpet; 116208ec0c58SJohn Stultz u64 tsc_stop, ref_stop, delta; 116308ec0c58SJohn Stultz unsigned long freq; 1164aa7b630eSPeter Zijlstra int cpu; 116508ec0c58SJohn Stultz 116608ec0c58SJohn Stultz /* Don't bother refining TSC on unstable systems */ 116708ec0c58SJohn Stultz if (check_tsc_unstable()) 116808ec0c58SJohn Stultz goto out; 116908ec0c58SJohn Stultz 117008ec0c58SJohn Stultz /* 117108ec0c58SJohn Stultz * Since the work is started early in boot, we may be 117208ec0c58SJohn Stultz * delayed the first time we expire. So set the workqueue 117308ec0c58SJohn Stultz * again once we know timers are working. 117408ec0c58SJohn Stultz */ 117508ec0c58SJohn Stultz if (tsc_start == -1) { 117608ec0c58SJohn Stultz /* 117708ec0c58SJohn Stultz * Only set hpet once, to avoid mixing hardware 117808ec0c58SJohn Stultz * if the hpet becomes enabled later. 117908ec0c58SJohn Stultz */ 118008ec0c58SJohn Stultz hpet = is_hpet_enabled(); 118108ec0c58SJohn Stultz schedule_delayed_work(&tsc_irqwork, HZ); 118208ec0c58SJohn Stultz tsc_start = tsc_read_refs(&ref_start, hpet); 118308ec0c58SJohn Stultz return; 118408ec0c58SJohn Stultz } 118508ec0c58SJohn Stultz 118608ec0c58SJohn Stultz tsc_stop = tsc_read_refs(&ref_stop, hpet); 118708ec0c58SJohn Stultz 118808ec0c58SJohn Stultz /* hpet or pmtimer available ? */ 118962627becSJohn Stultz if (ref_start == ref_stop) 119008ec0c58SJohn Stultz goto out; 119108ec0c58SJohn Stultz 119208ec0c58SJohn Stultz /* Check, whether the sampling was disturbed by an SMI */ 119308ec0c58SJohn Stultz if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) 119408ec0c58SJohn Stultz goto out; 119508ec0c58SJohn Stultz 119608ec0c58SJohn Stultz delta = tsc_stop - tsc_start; 119708ec0c58SJohn Stultz delta *= 1000000LL; 119808ec0c58SJohn Stultz if (hpet) 119908ec0c58SJohn Stultz freq = calc_hpet_ref(delta, ref_start, ref_stop); 120008ec0c58SJohn Stultz else 120108ec0c58SJohn Stultz freq = calc_pmtimer_ref(delta, ref_start, ref_stop); 120208ec0c58SJohn Stultz 120308ec0c58SJohn Stultz /* Make sure we're within 1% */ 120408ec0c58SJohn Stultz if (abs(tsc_khz - freq) > tsc_khz/100) 120508ec0c58SJohn Stultz goto out; 120608ec0c58SJohn Stultz 120708ec0c58SJohn Stultz tsc_khz = freq; 1208c767a54bSJoe Perches pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n", 1209c767a54bSJoe Perches (unsigned long)tsc_khz / 1000, 121008ec0c58SJohn Stultz (unsigned long)tsc_khz % 1000); 121108ec0c58SJohn Stultz 12126731b0d6SNicolai Stange /* Inform the TSC deadline clockevent devices about the recalibration */ 12136731b0d6SNicolai Stange lapic_update_tsc_freq(); 12146731b0d6SNicolai Stange 1215aa7b630eSPeter Zijlstra /* Update the sched_clock() rate to match the clocksource one */ 1216aa7b630eSPeter Zijlstra for_each_possible_cpu(cpu) 1217aa7b630eSPeter Zijlstra __set_cyc2ns_scale(tsc_khz, cpu, tsc_stop); 1218aa7b630eSPeter Zijlstra 121908ec0c58SJohn Stultz out: 1220f9677e0fSChristopher S. Hall if (boot_cpu_has(X86_FEATURE_ART)) 1221f9677e0fSChristopher S. Hall art_related_clocksource = &clocksource_tsc; 122208ec0c58SJohn Stultz clocksource_register_khz(&clocksource_tsc, tsc_khz); 122308ec0c58SJohn Stultz } 122408ec0c58SJohn Stultz 122508ec0c58SJohn Stultz 122608ec0c58SJohn Stultz static int __init init_tsc_clocksource(void) 12278fbbc4b4SAlok Kataria { 122859e21e3dSBorislav Petkov if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz) 1229a8760ecaSThomas Gleixner return 0; 1230a8760ecaSThomas Gleixner 1231395628efSAlok Kataria if (tsc_clocksource_reliable) 1232395628efSAlok Kataria clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 12338fbbc4b4SAlok Kataria /* lower the rating if we already know its unstable: */ 12348fbbc4b4SAlok Kataria if (check_tsc_unstable()) { 12358fbbc4b4SAlok Kataria clocksource_tsc.rating = 0; 12368fbbc4b4SAlok Kataria clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 12378fbbc4b4SAlok Kataria } 123857779dc2SAlok Kataria 123982f9c080SFeng Tang if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) 124082f9c080SFeng Tang clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; 124182f9c080SFeng Tang 124257779dc2SAlok Kataria /* 124347c95a46SBin Gao * When TSC frequency is known (retrieved via MSR or CPUID), we skip 124447c95a46SBin Gao * the refined calibration and directly register it as a clocksource. 124557779dc2SAlok Kataria */ 1246984fecebSThomas Gleixner if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) { 124744fee88cSPeter Zijlstra if (boot_cpu_has(X86_FEATURE_ART)) 124844fee88cSPeter Zijlstra art_related_clocksource = &clocksource_tsc; 124957779dc2SAlok Kataria clocksource_register_khz(&clocksource_tsc, tsc_khz); 125057779dc2SAlok Kataria return 0; 125157779dc2SAlok Kataria } 125257779dc2SAlok Kataria 125308ec0c58SJohn Stultz schedule_delayed_work(&tsc_irqwork, 0); 125408ec0c58SJohn Stultz return 0; 12558fbbc4b4SAlok Kataria } 125608ec0c58SJohn Stultz /* 125708ec0c58SJohn Stultz * We use device_initcall here, to ensure we run after the hpet 125808ec0c58SJohn Stultz * is fully initialized, which may occur at fs_initcall time. 125908ec0c58SJohn Stultz */ 126008ec0c58SJohn Stultz device_initcall(init_tsc_clocksource); 12618fbbc4b4SAlok Kataria 12628fbbc4b4SAlok Kataria void __init tsc_init(void) 12638fbbc4b4SAlok Kataria { 1264615cd033SPeter Zijlstra u64 lpj, cyc; 12658fbbc4b4SAlok Kataria int cpu; 12668fbbc4b4SAlok Kataria 126759e21e3dSBorislav Petkov if (!boot_cpu_has(X86_FEATURE_TSC)) { 1268b47dcbdcSAndy Lutomirski setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); 12698fbbc4b4SAlok Kataria return; 1270b47dcbdcSAndy Lutomirski } 12718fbbc4b4SAlok Kataria 1272aa297292SLen Brown cpu_khz = x86_platform.calibrate_cpu(); 12732d826404SThomas Gleixner tsc_khz = x86_platform.calibrate_tsc(); 1274ff4c8663SLen Brown 1275ff4c8663SLen Brown /* 1276ff4c8663SLen Brown * Trust non-zero tsc_khz as authorative, 1277ff4c8663SLen Brown * and use it to sanity check cpu_khz, 1278ff4c8663SLen Brown * which will be off if system timer is off. 1279ff4c8663SLen Brown */ 1280aa297292SLen Brown if (tsc_khz == 0) 1281aa297292SLen Brown tsc_khz = cpu_khz; 1282ff4c8663SLen Brown else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz) 1283ff4c8663SLen Brown cpu_khz = tsc_khz; 12848fbbc4b4SAlok Kataria 1285e93ef949SAlok Kataria if (!tsc_khz) { 12868fbbc4b4SAlok Kataria mark_tsc_unstable("could not calculate TSC khz"); 1287b47dcbdcSAndy Lutomirski setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); 12888fbbc4b4SAlok Kataria return; 12898fbbc4b4SAlok Kataria } 12908fbbc4b4SAlok Kataria 1291c767a54bSJoe Perches pr_info("Detected %lu.%03lu MHz processor\n", 12928fbbc4b4SAlok Kataria (unsigned long)cpu_khz / 1000, 12938fbbc4b4SAlok Kataria (unsigned long)cpu_khz % 1000); 12948fbbc4b4SAlok Kataria 1295f2e04214SThomas Gleixner /* Sanitize TSC ADJUST before cyc2ns gets initialized */ 1296f2e04214SThomas Gleixner tsc_store_and_check_tsc_adjust(true); 1297f2e04214SThomas Gleixner 12988fbbc4b4SAlok Kataria /* 12998fbbc4b4SAlok Kataria * Secondary CPUs do not run through tsc_init(), so set up 13008fbbc4b4SAlok Kataria * all the scale factors for all CPUs, assuming the same 13018fbbc4b4SAlok Kataria * speed as the bootup CPU. (cpufreq notifiers will fix this 13028fbbc4b4SAlok Kataria * up if their speed diverges) 13038fbbc4b4SAlok Kataria */ 1304615cd033SPeter Zijlstra cyc = rdtsc(); 130520d1c86aSPeter Zijlstra for_each_possible_cpu(cpu) { 130620d1c86aSPeter Zijlstra cyc2ns_init(cpu); 1307615cd033SPeter Zijlstra __set_cyc2ns_scale(tsc_khz, cpu, cyc); 130820d1c86aSPeter Zijlstra } 13098fbbc4b4SAlok Kataria 13108fbbc4b4SAlok Kataria if (tsc_disabled > 0) 13118fbbc4b4SAlok Kataria return; 13128fbbc4b4SAlok Kataria 13138fbbc4b4SAlok Kataria /* now allow native_sched_clock() to use rdtsc */ 131410b033d4SPeter Zijlstra 13158fbbc4b4SAlok Kataria tsc_disabled = 0; 13163bbfafb7SPeter Zijlstra static_branch_enable(&__use_tsc); 13178fbbc4b4SAlok Kataria 1318e82b8e4eSVenkatesh Pallipadi if (!no_sched_irq_time) 1319e82b8e4eSVenkatesh Pallipadi enable_sched_clock_irqtime(); 1320e82b8e4eSVenkatesh Pallipadi 132170de9a97SAlok Kataria lpj = ((u64)tsc_khz * 1000); 132270de9a97SAlok Kataria do_div(lpj, HZ); 132370de9a97SAlok Kataria lpj_fine = lpj; 132470de9a97SAlok Kataria 13258fbbc4b4SAlok Kataria use_tsc_delay(); 13268fbbc4b4SAlok Kataria 13278fbbc4b4SAlok Kataria if (unsynchronized_tsc()) 13288fbbc4b4SAlok Kataria mark_tsc_unstable("TSCs unsynchronized"); 13298fbbc4b4SAlok Kataria 1330395628efSAlok Kataria check_system_tsc_reliable(); 1331f9677e0fSChristopher S. Hall 1332f9677e0fSChristopher S. Hall detect_art(); 13338fbbc4b4SAlok Kataria } 13348fbbc4b4SAlok Kataria 1335b565201cSJack Steiner #ifdef CONFIG_SMP 1336b565201cSJack Steiner /* 1337b565201cSJack Steiner * If we have a constant TSC and are using the TSC for the delay loop, 1338b565201cSJack Steiner * we can skip clock calibration if another cpu in the same socket has already 1339b565201cSJack Steiner * been calibrated. This assumes that CONSTANT_TSC applies to all 1340b565201cSJack Steiner * cpus in the socket - this should be a safe assumption. 1341b565201cSJack Steiner */ 1342148f9bb8SPaul Gortmaker unsigned long calibrate_delay_is_known(void) 1343b565201cSJack Steiner { 1344c25323c0SThomas Gleixner int sibling, cpu = smp_processor_id(); 1345f508a5baSThomas Gleixner struct cpumask *mask = topology_core_cpumask(cpu); 1346b565201cSJack Steiner 1347b565201cSJack Steiner if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) 1348b565201cSJack Steiner return 0; 1349b565201cSJack Steiner 1350f508a5baSThomas Gleixner if (!mask) 1351f508a5baSThomas Gleixner return 0; 1352f508a5baSThomas Gleixner 1353f508a5baSThomas Gleixner sibling = cpumask_any_but(mask, cpu); 1354c25323c0SThomas Gleixner if (sibling < nr_cpu_ids) 1355c25323c0SThomas Gleixner return cpu_data(sibling).loops_per_jiffy; 1356b565201cSJack Steiner return 0; 1357b565201cSJack Steiner } 1358b565201cSJack Steiner #endif 1359