1c767a54bSJoe Perches #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 2c767a54bSJoe Perches 3bfc0f594SAlok Kataria #include <linux/kernel.h> 40ef95533SAlok Kataria #include <linux/sched.h> 50ef95533SAlok Kataria #include <linux/init.h> 6186f4360SPaul Gortmaker #include <linux/export.h> 70ef95533SAlok Kataria #include <linux/timer.h> 8bfc0f594SAlok Kataria #include <linux/acpi_pmtmr.h> 92dbe06faSAlok Kataria #include <linux/cpufreq.h> 108fbbc4b4SAlok Kataria #include <linux/delay.h> 118fbbc4b4SAlok Kataria #include <linux/clocksource.h> 128fbbc4b4SAlok Kataria #include <linux/percpu.h> 1308604bd9SArnd Bergmann #include <linux/timex.h> 1410b033d4SPeter Zijlstra #include <linux/static_key.h> 15bfc0f594SAlok Kataria 16bfc0f594SAlok Kataria #include <asm/hpet.h> 178fbbc4b4SAlok Kataria #include <asm/timer.h> 188fbbc4b4SAlok Kataria #include <asm/vgtod.h> 198fbbc4b4SAlok Kataria #include <asm/time.h> 208fbbc4b4SAlok Kataria #include <asm/delay.h> 2188b094fbSAlok Kataria #include <asm/hypervisor.h> 2208047c4fSThomas Gleixner #include <asm/nmi.h> 232d826404SThomas Gleixner #include <asm/x86_init.h> 2403da3ff1SDavid Woodhouse #include <asm/geode.h> 256731b0d6SNicolai Stange #include <asm/apic.h> 26655e52d2SPrarit Bhargava #include <asm/intel-family.h> 270ef95533SAlok Kataria 28f24ade3aSIngo Molnar unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ 290ef95533SAlok Kataria EXPORT_SYMBOL(cpu_khz); 30f24ade3aSIngo Molnar 31f24ade3aSIngo Molnar unsigned int __read_mostly tsc_khz; 320ef95533SAlok Kataria EXPORT_SYMBOL(tsc_khz); 330ef95533SAlok Kataria 340ef95533SAlok Kataria /* 350ef95533SAlok Kataria * TSC can be unstable due to cpufreq or due to unsynced TSCs 360ef95533SAlok Kataria */ 37f24ade3aSIngo Molnar static int __read_mostly tsc_unstable; 380ef95533SAlok Kataria 390ef95533SAlok Kataria /* native_sched_clock() is called before tsc_init(), so 400ef95533SAlok Kataria we must start with the TSC soft disabled to prevent 4159e21e3dSBorislav Petkov erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */ 42f24ade3aSIngo Molnar static int __read_mostly tsc_disabled = -1; 430ef95533SAlok Kataria 443bbfafb7SPeter Zijlstra static DEFINE_STATIC_KEY_FALSE(__use_tsc); 4510b033d4SPeter Zijlstra 4628a00184SSuresh Siddha int tsc_clocksource_reliable; 4757c67da2SPeter Zijlstra 48f9677e0fSChristopher S. Hall static u32 art_to_tsc_numerator; 49f9677e0fSChristopher S. Hall static u32 art_to_tsc_denominator; 50f9677e0fSChristopher S. Hall static u64 art_to_tsc_offset; 51f9677e0fSChristopher S. Hall struct clocksource *art_related_clocksource; 52f9677e0fSChristopher S. Hall 5320d1c86aSPeter Zijlstra /* 5420d1c86aSPeter Zijlstra * Use a ring-buffer like data structure, where a writer advances the head by 5520d1c86aSPeter Zijlstra * writing a new data entry and a reader advances the tail when it observes a 5620d1c86aSPeter Zijlstra * new entry. 5720d1c86aSPeter Zijlstra * 5820d1c86aSPeter Zijlstra * Writers are made to wait on readers until there's space to write a new 5920d1c86aSPeter Zijlstra * entry. 6020d1c86aSPeter Zijlstra * 6120d1c86aSPeter Zijlstra * This means that we can always use an {offset, mul} pair to compute a ns 6220d1c86aSPeter Zijlstra * value that is 'roughly' in the right direction, even if we're writing a new 6320d1c86aSPeter Zijlstra * {offset, mul} pair during the clock read. 6420d1c86aSPeter Zijlstra * 6520d1c86aSPeter Zijlstra * The down-side is that we can no longer guarantee strict monotonicity anymore 6620d1c86aSPeter Zijlstra * (assuming the TSC was that to begin with), because while we compute the 6720d1c86aSPeter Zijlstra * intersection point of the two clock slopes and make sure the time is 6820d1c86aSPeter Zijlstra * continuous at the point of switching; we can no longer guarantee a reader is 6920d1c86aSPeter Zijlstra * strictly before or after the switch point. 7020d1c86aSPeter Zijlstra * 7120d1c86aSPeter Zijlstra * It does mean a reader no longer needs to disable IRQs in order to avoid 7220d1c86aSPeter Zijlstra * CPU-Freq updates messing with his times, and similarly an NMI reader will 7320d1c86aSPeter Zijlstra * no longer run the risk of hitting half-written state. 7420d1c86aSPeter Zijlstra */ 7520d1c86aSPeter Zijlstra 7620d1c86aSPeter Zijlstra struct cyc2ns { 7720d1c86aSPeter Zijlstra struct cyc2ns_data data[2]; /* 0 + 2*24 = 48 */ 7820d1c86aSPeter Zijlstra struct cyc2ns_data *head; /* 48 + 8 = 56 */ 7920d1c86aSPeter Zijlstra struct cyc2ns_data *tail; /* 56 + 8 = 64 */ 8020d1c86aSPeter Zijlstra }; /* exactly fits one cacheline */ 8120d1c86aSPeter Zijlstra 8220d1c86aSPeter Zijlstra static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); 8320d1c86aSPeter Zijlstra 8420d1c86aSPeter Zijlstra struct cyc2ns_data *cyc2ns_read_begin(void) 8520d1c86aSPeter Zijlstra { 8620d1c86aSPeter Zijlstra struct cyc2ns_data *head; 8720d1c86aSPeter Zijlstra 8820d1c86aSPeter Zijlstra preempt_disable(); 8920d1c86aSPeter Zijlstra 9020d1c86aSPeter Zijlstra head = this_cpu_read(cyc2ns.head); 9120d1c86aSPeter Zijlstra /* 9220d1c86aSPeter Zijlstra * Ensure we observe the entry when we observe the pointer to it. 9320d1c86aSPeter Zijlstra * matches the wmb from cyc2ns_write_end(). 9420d1c86aSPeter Zijlstra */ 9520d1c86aSPeter Zijlstra smp_read_barrier_depends(); 9620d1c86aSPeter Zijlstra head->__count++; 9720d1c86aSPeter Zijlstra barrier(); 9820d1c86aSPeter Zijlstra 9920d1c86aSPeter Zijlstra return head; 10020d1c86aSPeter Zijlstra } 10120d1c86aSPeter Zijlstra 10220d1c86aSPeter Zijlstra void cyc2ns_read_end(struct cyc2ns_data *head) 10320d1c86aSPeter Zijlstra { 10420d1c86aSPeter Zijlstra barrier(); 10520d1c86aSPeter Zijlstra /* 10620d1c86aSPeter Zijlstra * If we're the outer most nested read; update the tail pointer 10720d1c86aSPeter Zijlstra * when we're done. This notifies possible pending writers 10820d1c86aSPeter Zijlstra * that we've observed the head pointer and that the other 10920d1c86aSPeter Zijlstra * entry is now free. 11020d1c86aSPeter Zijlstra */ 11120d1c86aSPeter Zijlstra if (!--head->__count) { 11220d1c86aSPeter Zijlstra /* 11320d1c86aSPeter Zijlstra * x86-TSO does not reorder writes with older reads; 11420d1c86aSPeter Zijlstra * therefore once this write becomes visible to another 11520d1c86aSPeter Zijlstra * cpu, we must be finished reading the cyc2ns_data. 11620d1c86aSPeter Zijlstra * 11720d1c86aSPeter Zijlstra * matches with cyc2ns_write_begin(). 11820d1c86aSPeter Zijlstra */ 11920d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.tail, head); 12020d1c86aSPeter Zijlstra } 12120d1c86aSPeter Zijlstra preempt_enable(); 12220d1c86aSPeter Zijlstra } 12320d1c86aSPeter Zijlstra 12420d1c86aSPeter Zijlstra /* 12520d1c86aSPeter Zijlstra * Begin writing a new @data entry for @cpu. 12620d1c86aSPeter Zijlstra * 12720d1c86aSPeter Zijlstra * Assumes some sort of write side lock; currently 'provided' by the assumption 12820d1c86aSPeter Zijlstra * that cpufreq will call its notifiers sequentially. 12920d1c86aSPeter Zijlstra */ 13020d1c86aSPeter Zijlstra static struct cyc2ns_data *cyc2ns_write_begin(int cpu) 13120d1c86aSPeter Zijlstra { 13220d1c86aSPeter Zijlstra struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); 13320d1c86aSPeter Zijlstra struct cyc2ns_data *data = c2n->data; 13420d1c86aSPeter Zijlstra 13520d1c86aSPeter Zijlstra if (data == c2n->head) 13620d1c86aSPeter Zijlstra data++; 13720d1c86aSPeter Zijlstra 13820d1c86aSPeter Zijlstra /* XXX send an IPI to @cpu in order to guarantee a read? */ 13920d1c86aSPeter Zijlstra 14020d1c86aSPeter Zijlstra /* 14120d1c86aSPeter Zijlstra * When we observe the tail write from cyc2ns_read_end(), 14220d1c86aSPeter Zijlstra * the cpu must be done with that entry and its safe 14320d1c86aSPeter Zijlstra * to start writing to it. 14420d1c86aSPeter Zijlstra */ 14520d1c86aSPeter Zijlstra while (c2n->tail == data) 14620d1c86aSPeter Zijlstra cpu_relax(); 14720d1c86aSPeter Zijlstra 14820d1c86aSPeter Zijlstra return data; 14920d1c86aSPeter Zijlstra } 15020d1c86aSPeter Zijlstra 15120d1c86aSPeter Zijlstra static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data) 15220d1c86aSPeter Zijlstra { 15320d1c86aSPeter Zijlstra struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); 15420d1c86aSPeter Zijlstra 15520d1c86aSPeter Zijlstra /* 15620d1c86aSPeter Zijlstra * Ensure the @data writes are visible before we publish the 15720d1c86aSPeter Zijlstra * entry. Matches the data-depencency in cyc2ns_read_begin(). 15820d1c86aSPeter Zijlstra */ 15920d1c86aSPeter Zijlstra smp_wmb(); 16020d1c86aSPeter Zijlstra 16120d1c86aSPeter Zijlstra ACCESS_ONCE(c2n->head) = data; 16220d1c86aSPeter Zijlstra } 16320d1c86aSPeter Zijlstra 16420d1c86aSPeter Zijlstra /* 16520d1c86aSPeter Zijlstra * Accelerators for sched_clock() 16657c67da2SPeter Zijlstra * convert from cycles(64bits) => nanoseconds (64bits) 16757c67da2SPeter Zijlstra * basic equation: 16857c67da2SPeter Zijlstra * ns = cycles / (freq / ns_per_sec) 16957c67da2SPeter Zijlstra * ns = cycles * (ns_per_sec / freq) 17057c67da2SPeter Zijlstra * ns = cycles * (10^9 / (cpu_khz * 10^3)) 17157c67da2SPeter Zijlstra * ns = cycles * (10^6 / cpu_khz) 17257c67da2SPeter Zijlstra * 17357c67da2SPeter Zijlstra * Then we use scaling math (suggested by george@mvista.com) to get: 17457c67da2SPeter Zijlstra * ns = cycles * (10^6 * SC / cpu_khz) / SC 17557c67da2SPeter Zijlstra * ns = cycles * cyc2ns_scale / SC 17657c67da2SPeter Zijlstra * 17757c67da2SPeter Zijlstra * And since SC is a constant power of two, we can convert the div 178b20112edSAdrian Hunter * into a shift. The larger SC is, the more accurate the conversion, but 179b20112edSAdrian Hunter * cyc2ns_scale needs to be a 32-bit value so that 32-bit multiplication 180b20112edSAdrian Hunter * (64-bit result) can be used. 18157c67da2SPeter Zijlstra * 182b20112edSAdrian Hunter * We can use khz divisor instead of mhz to keep a better precision. 18357c67da2SPeter Zijlstra * (mathieu.desnoyers@polymtl.ca) 18457c67da2SPeter Zijlstra * 18557c67da2SPeter Zijlstra * -johnstul@us.ibm.com "math is hard, lets go shopping!" 18657c67da2SPeter Zijlstra */ 18757c67da2SPeter Zijlstra 18820d1c86aSPeter Zijlstra static void cyc2ns_data_init(struct cyc2ns_data *data) 18920d1c86aSPeter Zijlstra { 1905e3c1afdSPeter Zijlstra data->cyc2ns_mul = 0; 191b20112edSAdrian Hunter data->cyc2ns_shift = 0; 19220d1c86aSPeter Zijlstra data->cyc2ns_offset = 0; 19320d1c86aSPeter Zijlstra data->__count = 0; 19420d1c86aSPeter Zijlstra } 19520d1c86aSPeter Zijlstra 19620d1c86aSPeter Zijlstra static void cyc2ns_init(int cpu) 19720d1c86aSPeter Zijlstra { 19820d1c86aSPeter Zijlstra struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); 19920d1c86aSPeter Zijlstra 20020d1c86aSPeter Zijlstra cyc2ns_data_init(&c2n->data[0]); 20120d1c86aSPeter Zijlstra cyc2ns_data_init(&c2n->data[1]); 20220d1c86aSPeter Zijlstra 20320d1c86aSPeter Zijlstra c2n->head = c2n->data; 20420d1c86aSPeter Zijlstra c2n->tail = c2n->data; 20520d1c86aSPeter Zijlstra } 20620d1c86aSPeter Zijlstra 20757c67da2SPeter Zijlstra static inline unsigned long long cycles_2_ns(unsigned long long cyc) 20857c67da2SPeter Zijlstra { 20920d1c86aSPeter Zijlstra struct cyc2ns_data *data, *tail; 21020d1c86aSPeter Zijlstra unsigned long long ns; 21120d1c86aSPeter Zijlstra 21220d1c86aSPeter Zijlstra /* 21320d1c86aSPeter Zijlstra * See cyc2ns_read_*() for details; replicated in order to avoid 21420d1c86aSPeter Zijlstra * an extra few instructions that came with the abstraction. 21520d1c86aSPeter Zijlstra * Notable, it allows us to only do the __count and tail update 21620d1c86aSPeter Zijlstra * dance when its actually needed. 21720d1c86aSPeter Zijlstra */ 21820d1c86aSPeter Zijlstra 219569d6557SSteven Rostedt preempt_disable_notrace(); 22020d1c86aSPeter Zijlstra data = this_cpu_read(cyc2ns.head); 22120d1c86aSPeter Zijlstra tail = this_cpu_read(cyc2ns.tail); 22220d1c86aSPeter Zijlstra 22320d1c86aSPeter Zijlstra if (likely(data == tail)) { 22420d1c86aSPeter Zijlstra ns = data->cyc2ns_offset; 225b20112edSAdrian Hunter ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift); 22620d1c86aSPeter Zijlstra } else { 22720d1c86aSPeter Zijlstra data->__count++; 22820d1c86aSPeter Zijlstra 22920d1c86aSPeter Zijlstra barrier(); 23020d1c86aSPeter Zijlstra 23120d1c86aSPeter Zijlstra ns = data->cyc2ns_offset; 232b20112edSAdrian Hunter ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift); 23320d1c86aSPeter Zijlstra 23420d1c86aSPeter Zijlstra barrier(); 23520d1c86aSPeter Zijlstra 23620d1c86aSPeter Zijlstra if (!--data->__count) 23720d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.tail, data); 23820d1c86aSPeter Zijlstra } 239569d6557SSteven Rostedt preempt_enable_notrace(); 24020d1c86aSPeter Zijlstra 24157c67da2SPeter Zijlstra return ns; 24257c67da2SPeter Zijlstra } 24357c67da2SPeter Zijlstra 244aa297292SLen Brown static void set_cyc2ns_scale(unsigned long khz, int cpu) 24557c67da2SPeter Zijlstra { 24620d1c86aSPeter Zijlstra unsigned long long tsc_now, ns_now; 24720d1c86aSPeter Zijlstra struct cyc2ns_data *data; 24820d1c86aSPeter Zijlstra unsigned long flags; 24957c67da2SPeter Zijlstra 25057c67da2SPeter Zijlstra local_irq_save(flags); 25157c67da2SPeter Zijlstra sched_clock_idle_sleep_event(); 25257c67da2SPeter Zijlstra 253aa297292SLen Brown if (!khz) 25420d1c86aSPeter Zijlstra goto done; 25520d1c86aSPeter Zijlstra 25620d1c86aSPeter Zijlstra data = cyc2ns_write_begin(cpu); 25757c67da2SPeter Zijlstra 2584ea1636bSAndy Lutomirski tsc_now = rdtsc(); 25957c67da2SPeter Zijlstra ns_now = cycles_2_ns(tsc_now); 26057c67da2SPeter Zijlstra 26120d1c86aSPeter Zijlstra /* 26220d1c86aSPeter Zijlstra * Compute a new multiplier as per the above comment and ensure our 26320d1c86aSPeter Zijlstra * time function is continuous; see the comment near struct 26420d1c86aSPeter Zijlstra * cyc2ns_data. 26520d1c86aSPeter Zijlstra */ 266aa297292SLen Brown clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, khz, 267b20112edSAdrian Hunter NSEC_PER_MSEC, 0); 268b20112edSAdrian Hunter 269b9511cd7SAdrian Hunter /* 270b9511cd7SAdrian Hunter * cyc2ns_shift is exported via arch_perf_update_userpage() where it is 271b9511cd7SAdrian Hunter * not expected to be greater than 31 due to the original published 272b9511cd7SAdrian Hunter * conversion algorithm shifting a 32-bit value (now specifies a 64-bit 273b9511cd7SAdrian Hunter * value) - refer perf_event_mmap_page documentation in perf_event.h. 274b9511cd7SAdrian Hunter */ 275b9511cd7SAdrian Hunter if (data->cyc2ns_shift == 32) { 276b9511cd7SAdrian Hunter data->cyc2ns_shift = 31; 277b9511cd7SAdrian Hunter data->cyc2ns_mul >>= 1; 278b9511cd7SAdrian Hunter } 279b9511cd7SAdrian Hunter 28020d1c86aSPeter Zijlstra data->cyc2ns_offset = ns_now - 281b20112edSAdrian Hunter mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, data->cyc2ns_shift); 28257c67da2SPeter Zijlstra 28320d1c86aSPeter Zijlstra cyc2ns_write_end(cpu, data); 28420d1c86aSPeter Zijlstra 28520d1c86aSPeter Zijlstra done: 28657c67da2SPeter Zijlstra sched_clock_idle_wakeup_event(0); 28757c67da2SPeter Zijlstra local_irq_restore(flags); 28857c67da2SPeter Zijlstra } 2890ef95533SAlok Kataria /* 2900ef95533SAlok Kataria * Scheduler clock - returns current time in nanosec units. 2910ef95533SAlok Kataria */ 2920ef95533SAlok Kataria u64 native_sched_clock(void) 2930ef95533SAlok Kataria { 2943bbfafb7SPeter Zijlstra if (static_branch_likely(&__use_tsc)) { 2953bbfafb7SPeter Zijlstra u64 tsc_now = rdtsc(); 2963bbfafb7SPeter Zijlstra 2973bbfafb7SPeter Zijlstra /* return the value in ns */ 2983bbfafb7SPeter Zijlstra return cycles_2_ns(tsc_now); 2993bbfafb7SPeter Zijlstra } 3000ef95533SAlok Kataria 3010ef95533SAlok Kataria /* 3020ef95533SAlok Kataria * Fall back to jiffies if there's no TSC available: 3030ef95533SAlok Kataria * ( But note that we still use it if the TSC is marked 3040ef95533SAlok Kataria * unstable. We do this because unlike Time Of Day, 3050ef95533SAlok Kataria * the scheduler clock tolerates small errors and it's 3060ef95533SAlok Kataria * very important for it to be as fast as the platform 3073ad2f3fbSDaniel Mack * can achieve it. ) 3080ef95533SAlok Kataria */ 3093bbfafb7SPeter Zijlstra 3100ef95533SAlok Kataria /* No locking but a rare wrong value is not a big deal: */ 3110ef95533SAlok Kataria return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); 3120ef95533SAlok Kataria } 3130ef95533SAlok Kataria 314a94cab23SAndi Kleen /* 315a94cab23SAndi Kleen * Generate a sched_clock if you already have a TSC value. 316a94cab23SAndi Kleen */ 317a94cab23SAndi Kleen u64 native_sched_clock_from_tsc(u64 tsc) 318a94cab23SAndi Kleen { 319a94cab23SAndi Kleen return cycles_2_ns(tsc); 320a94cab23SAndi Kleen } 321a94cab23SAndi Kleen 3220ef95533SAlok Kataria /* We need to define a real function for sched_clock, to override the 3230ef95533SAlok Kataria weak default version */ 3240ef95533SAlok Kataria #ifdef CONFIG_PARAVIRT 3250ef95533SAlok Kataria unsigned long long sched_clock(void) 3260ef95533SAlok Kataria { 3270ef95533SAlok Kataria return paravirt_sched_clock(); 3280ef95533SAlok Kataria } 3290ef95533SAlok Kataria #else 3300ef95533SAlok Kataria unsigned long long 3310ef95533SAlok Kataria sched_clock(void) __attribute__((alias("native_sched_clock"))); 3320ef95533SAlok Kataria #endif 3330ef95533SAlok Kataria 3340ef95533SAlok Kataria int check_tsc_unstable(void) 3350ef95533SAlok Kataria { 3360ef95533SAlok Kataria return tsc_unstable; 3370ef95533SAlok Kataria } 3380ef95533SAlok Kataria EXPORT_SYMBOL_GPL(check_tsc_unstable); 3390ef95533SAlok Kataria 3400ef95533SAlok Kataria #ifdef CONFIG_X86_TSC 3410ef95533SAlok Kataria int __init notsc_setup(char *str) 3420ef95533SAlok Kataria { 343c767a54bSJoe Perches pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n"); 3440ef95533SAlok Kataria tsc_disabled = 1; 3450ef95533SAlok Kataria return 1; 3460ef95533SAlok Kataria } 3470ef95533SAlok Kataria #else 3480ef95533SAlok Kataria /* 3490ef95533SAlok Kataria * disable flag for tsc. Takes effect by clearing the TSC cpu flag 3500ef95533SAlok Kataria * in cpu/common.c 3510ef95533SAlok Kataria */ 3520ef95533SAlok Kataria int __init notsc_setup(char *str) 3530ef95533SAlok Kataria { 3540ef95533SAlok Kataria setup_clear_cpu_cap(X86_FEATURE_TSC); 3550ef95533SAlok Kataria return 1; 3560ef95533SAlok Kataria } 3570ef95533SAlok Kataria #endif 3580ef95533SAlok Kataria 3590ef95533SAlok Kataria __setup("notsc", notsc_setup); 360bfc0f594SAlok Kataria 361e82b8e4eSVenkatesh Pallipadi static int no_sched_irq_time; 362e82b8e4eSVenkatesh Pallipadi 363395628efSAlok Kataria static int __init tsc_setup(char *str) 364395628efSAlok Kataria { 365395628efSAlok Kataria if (!strcmp(str, "reliable")) 366395628efSAlok Kataria tsc_clocksource_reliable = 1; 367e82b8e4eSVenkatesh Pallipadi if (!strncmp(str, "noirqtime", 9)) 368e82b8e4eSVenkatesh Pallipadi no_sched_irq_time = 1; 369395628efSAlok Kataria return 1; 370395628efSAlok Kataria } 371395628efSAlok Kataria 372395628efSAlok Kataria __setup("tsc=", tsc_setup); 373395628efSAlok Kataria 374bfc0f594SAlok Kataria #define MAX_RETRIES 5 375bfc0f594SAlok Kataria #define SMI_TRESHOLD 50000 376bfc0f594SAlok Kataria 377bfc0f594SAlok Kataria /* 378bfc0f594SAlok Kataria * Read TSC and the reference counters. Take care of SMI disturbance 379bfc0f594SAlok Kataria */ 380827014beSThomas Gleixner static u64 tsc_read_refs(u64 *p, int hpet) 381bfc0f594SAlok Kataria { 382bfc0f594SAlok Kataria u64 t1, t2; 383bfc0f594SAlok Kataria int i; 384bfc0f594SAlok Kataria 385bfc0f594SAlok Kataria for (i = 0; i < MAX_RETRIES; i++) { 386bfc0f594SAlok Kataria t1 = get_cycles(); 387bfc0f594SAlok Kataria if (hpet) 388827014beSThomas Gleixner *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; 389bfc0f594SAlok Kataria else 390827014beSThomas Gleixner *p = acpi_pm_read_early(); 391bfc0f594SAlok Kataria t2 = get_cycles(); 392bfc0f594SAlok Kataria if ((t2 - t1) < SMI_TRESHOLD) 393bfc0f594SAlok Kataria return t2; 394bfc0f594SAlok Kataria } 395bfc0f594SAlok Kataria return ULLONG_MAX; 396bfc0f594SAlok Kataria } 397bfc0f594SAlok Kataria 398ec0c15afSLinus Torvalds /* 399d683ef7aSThomas Gleixner * Calculate the TSC frequency from HPET reference 400d683ef7aSThomas Gleixner */ 401d683ef7aSThomas Gleixner static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) 402d683ef7aSThomas Gleixner { 403d683ef7aSThomas Gleixner u64 tmp; 404d683ef7aSThomas Gleixner 405d683ef7aSThomas Gleixner if (hpet2 < hpet1) 406d683ef7aSThomas Gleixner hpet2 += 0x100000000ULL; 407d683ef7aSThomas Gleixner hpet2 -= hpet1; 408d683ef7aSThomas Gleixner tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); 409d683ef7aSThomas Gleixner do_div(tmp, 1000000); 410d683ef7aSThomas Gleixner do_div(deltatsc, tmp); 411d683ef7aSThomas Gleixner 412d683ef7aSThomas Gleixner return (unsigned long) deltatsc; 413d683ef7aSThomas Gleixner } 414d683ef7aSThomas Gleixner 415d683ef7aSThomas Gleixner /* 416d683ef7aSThomas Gleixner * Calculate the TSC frequency from PMTimer reference 417d683ef7aSThomas Gleixner */ 418d683ef7aSThomas Gleixner static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) 419d683ef7aSThomas Gleixner { 420d683ef7aSThomas Gleixner u64 tmp; 421d683ef7aSThomas Gleixner 422d683ef7aSThomas Gleixner if (!pm1 && !pm2) 423d683ef7aSThomas Gleixner return ULONG_MAX; 424d683ef7aSThomas Gleixner 425d683ef7aSThomas Gleixner if (pm2 < pm1) 426d683ef7aSThomas Gleixner pm2 += (u64)ACPI_PM_OVRRUN; 427d683ef7aSThomas Gleixner pm2 -= pm1; 428d683ef7aSThomas Gleixner tmp = pm2 * 1000000000LL; 429d683ef7aSThomas Gleixner do_div(tmp, PMTMR_TICKS_PER_SEC); 430d683ef7aSThomas Gleixner do_div(deltatsc, tmp); 431d683ef7aSThomas Gleixner 432d683ef7aSThomas Gleixner return (unsigned long) deltatsc; 433d683ef7aSThomas Gleixner } 434d683ef7aSThomas Gleixner 435a977c400SThomas Gleixner #define CAL_MS 10 436b7743970SDeepak Saxena #define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS)) 437a977c400SThomas Gleixner #define CAL_PIT_LOOPS 1000 438a977c400SThomas Gleixner 439a977c400SThomas Gleixner #define CAL2_MS 50 440b7743970SDeepak Saxena #define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS)) 441a977c400SThomas Gleixner #define CAL2_PIT_LOOPS 5000 442a977c400SThomas Gleixner 443cce3e057SThomas Gleixner 444ec0c15afSLinus Torvalds /* 445ec0c15afSLinus Torvalds * Try to calibrate the TSC against the Programmable 446ec0c15afSLinus Torvalds * Interrupt Timer and return the frequency of the TSC 447ec0c15afSLinus Torvalds * in kHz. 448ec0c15afSLinus Torvalds * 449ec0c15afSLinus Torvalds * Return ULONG_MAX on failure to calibrate. 450ec0c15afSLinus Torvalds */ 451a977c400SThomas Gleixner static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) 452ec0c15afSLinus Torvalds { 453ec0c15afSLinus Torvalds u64 tsc, t1, t2, delta; 454ec0c15afSLinus Torvalds unsigned long tscmin, tscmax; 455ec0c15afSLinus Torvalds int pitcnt; 456ec0c15afSLinus Torvalds 457ec0c15afSLinus Torvalds /* Set the Gate high, disable speaker */ 458ec0c15afSLinus Torvalds outb((inb(0x61) & ~0x02) | 0x01, 0x61); 459ec0c15afSLinus Torvalds 460ec0c15afSLinus Torvalds /* 461ec0c15afSLinus Torvalds * Setup CTC channel 2* for mode 0, (interrupt on terminal 462ec0c15afSLinus Torvalds * count mode), binary count. Set the latch register to 50ms 463ec0c15afSLinus Torvalds * (LSB then MSB) to begin countdown. 464ec0c15afSLinus Torvalds */ 465ec0c15afSLinus Torvalds outb(0xb0, 0x43); 466a977c400SThomas Gleixner outb(latch & 0xff, 0x42); 467a977c400SThomas Gleixner outb(latch >> 8, 0x42); 468ec0c15afSLinus Torvalds 469ec0c15afSLinus Torvalds tsc = t1 = t2 = get_cycles(); 470ec0c15afSLinus Torvalds 471ec0c15afSLinus Torvalds pitcnt = 0; 472ec0c15afSLinus Torvalds tscmax = 0; 473ec0c15afSLinus Torvalds tscmin = ULONG_MAX; 474ec0c15afSLinus Torvalds while ((inb(0x61) & 0x20) == 0) { 475ec0c15afSLinus Torvalds t2 = get_cycles(); 476ec0c15afSLinus Torvalds delta = t2 - tsc; 477ec0c15afSLinus Torvalds tsc = t2; 478ec0c15afSLinus Torvalds if ((unsigned long) delta < tscmin) 479ec0c15afSLinus Torvalds tscmin = (unsigned int) delta; 480ec0c15afSLinus Torvalds if ((unsigned long) delta > tscmax) 481ec0c15afSLinus Torvalds tscmax = (unsigned int) delta; 482ec0c15afSLinus Torvalds pitcnt++; 483ec0c15afSLinus Torvalds } 484ec0c15afSLinus Torvalds 485ec0c15afSLinus Torvalds /* 486ec0c15afSLinus Torvalds * Sanity checks: 487ec0c15afSLinus Torvalds * 488a977c400SThomas Gleixner * If we were not able to read the PIT more than loopmin 489ec0c15afSLinus Torvalds * times, then we have been hit by a massive SMI 490ec0c15afSLinus Torvalds * 491ec0c15afSLinus Torvalds * If the maximum is 10 times larger than the minimum, 492ec0c15afSLinus Torvalds * then we got hit by an SMI as well. 493ec0c15afSLinus Torvalds */ 494a977c400SThomas Gleixner if (pitcnt < loopmin || tscmax > 10 * tscmin) 495ec0c15afSLinus Torvalds return ULONG_MAX; 496ec0c15afSLinus Torvalds 497ec0c15afSLinus Torvalds /* Calculate the PIT value */ 498ec0c15afSLinus Torvalds delta = t2 - t1; 499a977c400SThomas Gleixner do_div(delta, ms); 500ec0c15afSLinus Torvalds return delta; 501ec0c15afSLinus Torvalds } 502ec0c15afSLinus Torvalds 5036ac40ed0SLinus Torvalds /* 5046ac40ed0SLinus Torvalds * This reads the current MSB of the PIT counter, and 5056ac40ed0SLinus Torvalds * checks if we are running on sufficiently fast and 5066ac40ed0SLinus Torvalds * non-virtualized hardware. 5076ac40ed0SLinus Torvalds * 5086ac40ed0SLinus Torvalds * Our expectations are: 5096ac40ed0SLinus Torvalds * 5106ac40ed0SLinus Torvalds * - the PIT is running at roughly 1.19MHz 5116ac40ed0SLinus Torvalds * 5126ac40ed0SLinus Torvalds * - each IO is going to take about 1us on real hardware, 5136ac40ed0SLinus Torvalds * but we allow it to be much faster (by a factor of 10) or 5146ac40ed0SLinus Torvalds * _slightly_ slower (ie we allow up to a 2us read+counter 5156ac40ed0SLinus Torvalds * update - anything else implies a unacceptably slow CPU 5166ac40ed0SLinus Torvalds * or PIT for the fast calibration to work. 5176ac40ed0SLinus Torvalds * 5186ac40ed0SLinus Torvalds * - with 256 PIT ticks to read the value, we have 214us to 5196ac40ed0SLinus Torvalds * see the same MSB (and overhead like doing a single TSC 5206ac40ed0SLinus Torvalds * read per MSB value etc). 5216ac40ed0SLinus Torvalds * 5226ac40ed0SLinus Torvalds * - We're doing 2 reads per loop (LSB, MSB), and we expect 5236ac40ed0SLinus Torvalds * them each to take about a microsecond on real hardware. 5246ac40ed0SLinus Torvalds * So we expect a count value of around 100. But we'll be 5256ac40ed0SLinus Torvalds * generous, and accept anything over 50. 5266ac40ed0SLinus Torvalds * 5276ac40ed0SLinus Torvalds * - if the PIT is stuck, and we see *many* more reads, we 5286ac40ed0SLinus Torvalds * return early (and the next caller of pit_expect_msb() 5296ac40ed0SLinus Torvalds * then consider it a failure when they don't see the 5306ac40ed0SLinus Torvalds * next expected value). 5316ac40ed0SLinus Torvalds * 5326ac40ed0SLinus Torvalds * These expectations mean that we know that we have seen the 5336ac40ed0SLinus Torvalds * transition from one expected value to another with a fairly 5346ac40ed0SLinus Torvalds * high accuracy, and we didn't miss any events. We can thus 5356ac40ed0SLinus Torvalds * use the TSC value at the transitions to calculate a pretty 5366ac40ed0SLinus Torvalds * good value for the TSC frequencty. 5376ac40ed0SLinus Torvalds */ 538b6e61eefSLinus Torvalds static inline int pit_verify_msb(unsigned char val) 539b6e61eefSLinus Torvalds { 540b6e61eefSLinus Torvalds /* Ignore LSB */ 541b6e61eefSLinus Torvalds inb(0x42); 542b6e61eefSLinus Torvalds return inb(0x42) == val; 543b6e61eefSLinus Torvalds } 544b6e61eefSLinus Torvalds 5459e8912e0SLinus Torvalds static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) 5466ac40ed0SLinus Torvalds { 5479e8912e0SLinus Torvalds int count; 54868f30fbeSLinus Torvalds u64 tsc = 0, prev_tsc = 0; 5496ac40ed0SLinus Torvalds 5506ac40ed0SLinus Torvalds for (count = 0; count < 50000; count++) { 551b6e61eefSLinus Torvalds if (!pit_verify_msb(val)) 5526ac40ed0SLinus Torvalds break; 55368f30fbeSLinus Torvalds prev_tsc = tsc; 5549e8912e0SLinus Torvalds tsc = get_cycles(); 5556ac40ed0SLinus Torvalds } 55668f30fbeSLinus Torvalds *deltap = get_cycles() - prev_tsc; 5579e8912e0SLinus Torvalds *tscp = tsc; 5589e8912e0SLinus Torvalds 5599e8912e0SLinus Torvalds /* 5609e8912e0SLinus Torvalds * We require _some_ success, but the quality control 5619e8912e0SLinus Torvalds * will be based on the error terms on the TSC values. 5629e8912e0SLinus Torvalds */ 5639e8912e0SLinus Torvalds return count > 5; 5646ac40ed0SLinus Torvalds } 5656ac40ed0SLinus Torvalds 5666ac40ed0SLinus Torvalds /* 5679e8912e0SLinus Torvalds * How many MSB values do we want to see? We aim for 5689e8912e0SLinus Torvalds * a maximum error rate of 500ppm (in practice the 5699e8912e0SLinus Torvalds * real error is much smaller), but refuse to spend 57068f30fbeSLinus Torvalds * more than 50ms on it. 5716ac40ed0SLinus Torvalds */ 57268f30fbeSLinus Torvalds #define MAX_QUICK_PIT_MS 50 5739e8912e0SLinus Torvalds #define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) 5746ac40ed0SLinus Torvalds 5756ac40ed0SLinus Torvalds static unsigned long quick_pit_calibrate(void) 5766ac40ed0SLinus Torvalds { 5779e8912e0SLinus Torvalds int i; 5789e8912e0SLinus Torvalds u64 tsc, delta; 5799e8912e0SLinus Torvalds unsigned long d1, d2; 5809e8912e0SLinus Torvalds 5816ac40ed0SLinus Torvalds /* Set the Gate high, disable speaker */ 5826ac40ed0SLinus Torvalds outb((inb(0x61) & ~0x02) | 0x01, 0x61); 5836ac40ed0SLinus Torvalds 5846ac40ed0SLinus Torvalds /* 5856ac40ed0SLinus Torvalds * Counter 2, mode 0 (one-shot), binary count 5866ac40ed0SLinus Torvalds * 5876ac40ed0SLinus Torvalds * NOTE! Mode 2 decrements by two (and then the 5886ac40ed0SLinus Torvalds * output is flipped each time, giving the same 5896ac40ed0SLinus Torvalds * final output frequency as a decrement-by-one), 5906ac40ed0SLinus Torvalds * so mode 0 is much better when looking at the 5916ac40ed0SLinus Torvalds * individual counts. 5926ac40ed0SLinus Torvalds */ 5936ac40ed0SLinus Torvalds outb(0xb0, 0x43); 5946ac40ed0SLinus Torvalds 5956ac40ed0SLinus Torvalds /* Start at 0xffff */ 5966ac40ed0SLinus Torvalds outb(0xff, 0x42); 5976ac40ed0SLinus Torvalds outb(0xff, 0x42); 5986ac40ed0SLinus Torvalds 599a6a80e1dSLinus Torvalds /* 600a6a80e1dSLinus Torvalds * The PIT starts counting at the next edge, so we 601a6a80e1dSLinus Torvalds * need to delay for a microsecond. The easiest way 602a6a80e1dSLinus Torvalds * to do that is to just read back the 16-bit counter 603a6a80e1dSLinus Torvalds * once from the PIT. 604a6a80e1dSLinus Torvalds */ 605b6e61eefSLinus Torvalds pit_verify_msb(0); 606a6a80e1dSLinus Torvalds 6079e8912e0SLinus Torvalds if (pit_expect_msb(0xff, &tsc, &d1)) { 6089e8912e0SLinus Torvalds for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { 6099e8912e0SLinus Torvalds if (!pit_expect_msb(0xff-i, &delta, &d2)) 6109e8912e0SLinus Torvalds break; 6116ac40ed0SLinus Torvalds 6125aac644aSAdrian Hunter delta -= tsc; 6135aac644aSAdrian Hunter 6145aac644aSAdrian Hunter /* 6155aac644aSAdrian Hunter * Extrapolate the error and fail fast if the error will 6165aac644aSAdrian Hunter * never be below 500 ppm. 6175aac644aSAdrian Hunter */ 6185aac644aSAdrian Hunter if (i == 1 && 6195aac644aSAdrian Hunter d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11) 6205aac644aSAdrian Hunter return 0; 6215aac644aSAdrian Hunter 6226ac40ed0SLinus Torvalds /* 6239e8912e0SLinus Torvalds * Iterate until the error is less than 500 ppm 6244156e9a8SIngo Molnar */ 625b6e61eefSLinus Torvalds if (d1+d2 >= delta >> 11) 626b6e61eefSLinus Torvalds continue; 627b6e61eefSLinus Torvalds 628b6e61eefSLinus Torvalds /* 629b6e61eefSLinus Torvalds * Check the PIT one more time to verify that 630b6e61eefSLinus Torvalds * all TSC reads were stable wrt the PIT. 631b6e61eefSLinus Torvalds * 632b6e61eefSLinus Torvalds * This also guarantees serialization of the 633b6e61eefSLinus Torvalds * last cycle read ('d2') in pit_expect_msb. 634b6e61eefSLinus Torvalds */ 635b6e61eefSLinus Torvalds if (!pit_verify_msb(0xfe - i)) 636b6e61eefSLinus Torvalds break; 6379e8912e0SLinus Torvalds goto success; 6389e8912e0SLinus Torvalds } 6399e8912e0SLinus Torvalds } 64052045217SAlexandre Demers pr_info("Fast TSC calibration failed\n"); 6419e8912e0SLinus Torvalds return 0; 6424156e9a8SIngo Molnar 6439e8912e0SLinus Torvalds success: 6444156e9a8SIngo Molnar /* 6456ac40ed0SLinus Torvalds * Ok, if we get here, then we've seen the 6469e8912e0SLinus Torvalds * MSB of the PIT decrement 'i' times, and the 6479e8912e0SLinus Torvalds * error has shrunk to less than 500 ppm. 6486ac40ed0SLinus Torvalds * 6496ac40ed0SLinus Torvalds * As a result, we can depend on there not being 6506ac40ed0SLinus Torvalds * any odd delays anywhere, and the TSC reads are 65168f30fbeSLinus Torvalds * reliable (within the error). 6526ac40ed0SLinus Torvalds * 6536ac40ed0SLinus Torvalds * kHz = ticks / time-in-seconds / 1000; 6549e8912e0SLinus Torvalds * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000 6559e8912e0SLinus Torvalds * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000) 6566ac40ed0SLinus Torvalds */ 6579e8912e0SLinus Torvalds delta *= PIT_TICK_RATE; 6589e8912e0SLinus Torvalds do_div(delta, i*256*1000); 659c767a54bSJoe Perches pr_info("Fast TSC calibration using PIT\n"); 6606ac40ed0SLinus Torvalds return delta; 6616ac40ed0SLinus Torvalds } 662ec0c15afSLinus Torvalds 663bfc0f594SAlok Kataria /** 664aa297292SLen Brown * native_calibrate_tsc 665aa297292SLen Brown * Determine TSC frequency via CPUID, else return 0. 666bfc0f594SAlok Kataria */ 667e93ef949SAlok Kataria unsigned long native_calibrate_tsc(void) 668bfc0f594SAlok Kataria { 669aa297292SLen Brown unsigned int eax_denominator, ebx_numerator, ecx_hz, edx; 670aa297292SLen Brown unsigned int crystal_khz; 671aa297292SLen Brown 672aa297292SLen Brown if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 673aa297292SLen Brown return 0; 674aa297292SLen Brown 675aa297292SLen Brown if (boot_cpu_data.cpuid_level < 0x15) 676aa297292SLen Brown return 0; 677aa297292SLen Brown 678aa297292SLen Brown eax_denominator = ebx_numerator = ecx_hz = edx = 0; 679aa297292SLen Brown 680aa297292SLen Brown /* CPUID 15H TSC/Crystal ratio, plus optionally Crystal Hz */ 681aa297292SLen Brown cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx); 682aa297292SLen Brown 683aa297292SLen Brown if (ebx_numerator == 0 || eax_denominator == 0) 684aa297292SLen Brown return 0; 685aa297292SLen Brown 686aa297292SLen Brown crystal_khz = ecx_hz / 1000; 687aa297292SLen Brown 688aa297292SLen Brown if (crystal_khz == 0) { 689aa297292SLen Brown switch (boot_cpu_data.x86_model) { 690655e52d2SPrarit Bhargava case INTEL_FAM6_SKYLAKE_MOBILE: 691655e52d2SPrarit Bhargava case INTEL_FAM6_SKYLAKE_DESKTOP: 6926baf3d61SPrarit Bhargava case INTEL_FAM6_KABYLAKE_MOBILE: 6936baf3d61SPrarit Bhargava case INTEL_FAM6_KABYLAKE_DESKTOP: 694ff4c8663SLen Brown crystal_khz = 24000; /* 24.0 MHz */ 695ff4c8663SLen Brown break; 6966baf3d61SPrarit Bhargava case INTEL_FAM6_SKYLAKE_X: 6976baf3d61SPrarit Bhargava crystal_khz = 25000; /* 25.0 MHz */ 6986baf3d61SPrarit Bhargava break; 699655e52d2SPrarit Bhargava case INTEL_FAM6_ATOM_GOLDMONT: 700ff4c8663SLen Brown crystal_khz = 19200; /* 19.2 MHz */ 701ff4c8663SLen Brown break; 702aa297292SLen Brown } 703aa297292SLen Brown } 704aa297292SLen Brown 7054ca4df0bSBin Gao /* 7064ca4df0bSBin Gao * TSC frequency determined by CPUID is a "hardware reported" 7074ca4df0bSBin Gao * frequency and is the most accurate one so far we have. This 7084ca4df0bSBin Gao * is considered a known frequency. 7094ca4df0bSBin Gao */ 7104ca4df0bSBin Gao setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); 7114ca4df0bSBin Gao 7124635fdc6SBin Gao /* 7134635fdc6SBin Gao * For Atom SoCs TSC is the only reliable clocksource. 7144635fdc6SBin Gao * Mark TSC reliable so no watchdog on it. 7154635fdc6SBin Gao */ 7164635fdc6SBin Gao if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT) 7174635fdc6SBin Gao setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); 7184635fdc6SBin Gao 719aa297292SLen Brown return crystal_khz * ebx_numerator / eax_denominator; 720aa297292SLen Brown } 721aa297292SLen Brown 722aa297292SLen Brown static unsigned long cpu_khz_from_cpuid(void) 723aa297292SLen Brown { 724aa297292SLen Brown unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx; 725aa297292SLen Brown 726aa297292SLen Brown if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 727aa297292SLen Brown return 0; 728aa297292SLen Brown 729aa297292SLen Brown if (boot_cpu_data.cpuid_level < 0x16) 730aa297292SLen Brown return 0; 731aa297292SLen Brown 732aa297292SLen Brown eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0; 733aa297292SLen Brown 734aa297292SLen Brown cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx); 735aa297292SLen Brown 736aa297292SLen Brown return eax_base_mhz * 1000; 737aa297292SLen Brown } 738aa297292SLen Brown 739aa297292SLen Brown /** 740aa297292SLen Brown * native_calibrate_cpu - calibrate the cpu on boot 741aa297292SLen Brown */ 742aa297292SLen Brown unsigned long native_calibrate_cpu(void) 743aa297292SLen Brown { 744827014beSThomas Gleixner u64 tsc1, tsc2, delta, ref1, ref2; 745fbb16e24SThomas Gleixner unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; 7462d826404SThomas Gleixner unsigned long flags, latch, ms, fast_calibrate; 747a977c400SThomas Gleixner int hpet = is_hpet_enabled(), i, loopmin; 748bfc0f594SAlok Kataria 749aa297292SLen Brown fast_calibrate = cpu_khz_from_cpuid(); 750aa297292SLen Brown if (fast_calibrate) 751aa297292SLen Brown return fast_calibrate; 752aa297292SLen Brown 75302c0cd2dSLen Brown fast_calibrate = cpu_khz_from_msr(); 7545f0e0309SThomas Gleixner if (fast_calibrate) 7557da7c156SBin Gao return fast_calibrate; 7567da7c156SBin Gao 757bfc0f594SAlok Kataria local_irq_save(flags); 7586ac40ed0SLinus Torvalds fast_calibrate = quick_pit_calibrate(); 759bfc0f594SAlok Kataria local_irq_restore(flags); 7606ac40ed0SLinus Torvalds if (fast_calibrate) 7616ac40ed0SLinus Torvalds return fast_calibrate; 762fbb16e24SThomas Gleixner 763fbb16e24SThomas Gleixner /* 764fbb16e24SThomas Gleixner * Run 5 calibration loops to get the lowest frequency value 765fbb16e24SThomas Gleixner * (the best estimate). We use two different calibration modes 766fbb16e24SThomas Gleixner * here: 767fbb16e24SThomas Gleixner * 768fbb16e24SThomas Gleixner * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and 769fbb16e24SThomas Gleixner * load a timeout of 50ms. We read the time right after we 770fbb16e24SThomas Gleixner * started the timer and wait until the PIT count down reaches 771fbb16e24SThomas Gleixner * zero. In each wait loop iteration we read the TSC and check 772fbb16e24SThomas Gleixner * the delta to the previous read. We keep track of the min 773fbb16e24SThomas Gleixner * and max values of that delta. The delta is mostly defined 774fbb16e24SThomas Gleixner * by the IO time of the PIT access, so we can detect when a 7750d2eb44fSLucas De Marchi * SMI/SMM disturbance happened between the two reads. If the 776fbb16e24SThomas Gleixner * maximum time is significantly larger than the minimum time, 777fbb16e24SThomas Gleixner * then we discard the result and have another try. 778fbb16e24SThomas Gleixner * 779fbb16e24SThomas Gleixner * 2) Reference counter. If available we use the HPET or the 780fbb16e24SThomas Gleixner * PMTIMER as a reference to check the sanity of that value. 781fbb16e24SThomas Gleixner * We use separate TSC readouts and check inside of the 782fbb16e24SThomas Gleixner * reference read for a SMI/SMM disturbance. We dicard 783fbb16e24SThomas Gleixner * disturbed values here as well. We do that around the PIT 784fbb16e24SThomas Gleixner * calibration delay loop as we have to wait for a certain 785fbb16e24SThomas Gleixner * amount of time anyway. 786fbb16e24SThomas Gleixner */ 787a977c400SThomas Gleixner 788a977c400SThomas Gleixner /* Preset PIT loop values */ 789a977c400SThomas Gleixner latch = CAL_LATCH; 790a977c400SThomas Gleixner ms = CAL_MS; 791a977c400SThomas Gleixner loopmin = CAL_PIT_LOOPS; 792a977c400SThomas Gleixner 793a977c400SThomas Gleixner for (i = 0; i < 3; i++) { 794ec0c15afSLinus Torvalds unsigned long tsc_pit_khz; 795bfc0f594SAlok Kataria 796fbb16e24SThomas Gleixner /* 797fbb16e24SThomas Gleixner * Read the start value and the reference count of 798ec0c15afSLinus Torvalds * hpet/pmtimer when available. Then do the PIT 799ec0c15afSLinus Torvalds * calibration, which will take at least 50ms, and 800ec0c15afSLinus Torvalds * read the end value. 801fbb16e24SThomas Gleixner */ 802ec0c15afSLinus Torvalds local_irq_save(flags); 803827014beSThomas Gleixner tsc1 = tsc_read_refs(&ref1, hpet); 804a977c400SThomas Gleixner tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); 805827014beSThomas Gleixner tsc2 = tsc_read_refs(&ref2, hpet); 806bfc0f594SAlok Kataria local_irq_restore(flags); 807bfc0f594SAlok Kataria 808ec0c15afSLinus Torvalds /* Pick the lowest PIT TSC calibration so far */ 809ec0c15afSLinus Torvalds tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); 810bfc0f594SAlok Kataria 811bfc0f594SAlok Kataria /* hpet or pmtimer available ? */ 81262627becSJohn Stultz if (ref1 == ref2) 813fbb16e24SThomas Gleixner continue; 814bfc0f594SAlok Kataria 815bfc0f594SAlok Kataria /* Check, whether the sampling was disturbed by an SMI */ 816fbb16e24SThomas Gleixner if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) 817fbb16e24SThomas Gleixner continue; 818bfc0f594SAlok Kataria 819bfc0f594SAlok Kataria tsc2 = (tsc2 - tsc1) * 1000000LL; 820d683ef7aSThomas Gleixner if (hpet) 821827014beSThomas Gleixner tsc2 = calc_hpet_ref(tsc2, ref1, ref2); 822d683ef7aSThomas Gleixner else 823827014beSThomas Gleixner tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); 824bfc0f594SAlok Kataria 825fbb16e24SThomas Gleixner tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); 826a977c400SThomas Gleixner 827a977c400SThomas Gleixner /* Check the reference deviation */ 828a977c400SThomas Gleixner delta = ((u64) tsc_pit_min) * 100; 829a977c400SThomas Gleixner do_div(delta, tsc_ref_min); 830a977c400SThomas Gleixner 831a977c400SThomas Gleixner /* 832a977c400SThomas Gleixner * If both calibration results are inside a 10% window 833a977c400SThomas Gleixner * then we can be sure, that the calibration 834a977c400SThomas Gleixner * succeeded. We break out of the loop right away. We 835a977c400SThomas Gleixner * use the reference value, as it is more precise. 836a977c400SThomas Gleixner */ 837a977c400SThomas Gleixner if (delta >= 90 && delta <= 110) { 838c767a54bSJoe Perches pr_info("PIT calibration matches %s. %d loops\n", 839a977c400SThomas Gleixner hpet ? "HPET" : "PMTIMER", i + 1); 840a977c400SThomas Gleixner return tsc_ref_min; 841bfc0f594SAlok Kataria } 842bfc0f594SAlok Kataria 843a977c400SThomas Gleixner /* 844a977c400SThomas Gleixner * Check whether PIT failed more than once. This 845a977c400SThomas Gleixner * happens in virtualized environments. We need to 846a977c400SThomas Gleixner * give the virtual PC a slightly longer timeframe for 847a977c400SThomas Gleixner * the HPET/PMTIMER to make the result precise. 848a977c400SThomas Gleixner */ 849a977c400SThomas Gleixner if (i == 1 && tsc_pit_min == ULONG_MAX) { 850a977c400SThomas Gleixner latch = CAL2_LATCH; 851a977c400SThomas Gleixner ms = CAL2_MS; 852a977c400SThomas Gleixner loopmin = CAL2_PIT_LOOPS; 853a977c400SThomas Gleixner } 854bfc0f594SAlok Kataria } 855bfc0f594SAlok Kataria 856fbb16e24SThomas Gleixner /* 857fbb16e24SThomas Gleixner * Now check the results. 858fbb16e24SThomas Gleixner */ 859fbb16e24SThomas Gleixner if (tsc_pit_min == ULONG_MAX) { 860fbb16e24SThomas Gleixner /* PIT gave no useful value */ 861c767a54bSJoe Perches pr_warn("Unable to calibrate against PIT\n"); 862fbb16e24SThomas Gleixner 863fbb16e24SThomas Gleixner /* We don't have an alternative source, disable TSC */ 864827014beSThomas Gleixner if (!hpet && !ref1 && !ref2) { 865c767a54bSJoe Perches pr_notice("No reference (HPET/PMTIMER) available\n"); 866fbb16e24SThomas Gleixner return 0; 867fbb16e24SThomas Gleixner } 868fbb16e24SThomas Gleixner 869fbb16e24SThomas Gleixner /* The alternative source failed as well, disable TSC */ 870fbb16e24SThomas Gleixner if (tsc_ref_min == ULONG_MAX) { 871c767a54bSJoe Perches pr_warn("HPET/PMTIMER calibration failed\n"); 872fbb16e24SThomas Gleixner return 0; 873fbb16e24SThomas Gleixner } 874fbb16e24SThomas Gleixner 875fbb16e24SThomas Gleixner /* Use the alternative source */ 876c767a54bSJoe Perches pr_info("using %s reference calibration\n", 877fbb16e24SThomas Gleixner hpet ? "HPET" : "PMTIMER"); 878fbb16e24SThomas Gleixner 879fbb16e24SThomas Gleixner return tsc_ref_min; 880fbb16e24SThomas Gleixner } 881fbb16e24SThomas Gleixner 882fbb16e24SThomas Gleixner /* We don't have an alternative source, use the PIT calibration value */ 883827014beSThomas Gleixner if (!hpet && !ref1 && !ref2) { 884c767a54bSJoe Perches pr_info("Using PIT calibration value\n"); 885fbb16e24SThomas Gleixner return tsc_pit_min; 886fbb16e24SThomas Gleixner } 887fbb16e24SThomas Gleixner 888fbb16e24SThomas Gleixner /* The alternative source failed, use the PIT calibration value */ 889fbb16e24SThomas Gleixner if (tsc_ref_min == ULONG_MAX) { 890c767a54bSJoe Perches pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n"); 891fbb16e24SThomas Gleixner return tsc_pit_min; 892fbb16e24SThomas Gleixner } 893fbb16e24SThomas Gleixner 894fbb16e24SThomas Gleixner /* 895fbb16e24SThomas Gleixner * The calibration values differ too much. In doubt, we use 896fbb16e24SThomas Gleixner * the PIT value as we know that there are PMTIMERs around 897a977c400SThomas Gleixner * running at double speed. At least we let the user know: 898fbb16e24SThomas Gleixner */ 899c767a54bSJoe Perches pr_warn("PIT calibration deviates from %s: %lu %lu\n", 900a977c400SThomas Gleixner hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); 901c767a54bSJoe Perches pr_info("Using PIT calibration value\n"); 902fbb16e24SThomas Gleixner return tsc_pit_min; 903fbb16e24SThomas Gleixner } 904bfc0f594SAlok Kataria 905bfc0f594SAlok Kataria int recalibrate_cpu_khz(void) 906bfc0f594SAlok Kataria { 907bfc0f594SAlok Kataria #ifndef CONFIG_SMP 908bfc0f594SAlok Kataria unsigned long cpu_khz_old = cpu_khz; 909bfc0f594SAlok Kataria 910eff4677eSBorislav Petkov if (!boot_cpu_has(X86_FEATURE_TSC)) 911eff4677eSBorislav Petkov return -ENODEV; 912eff4677eSBorislav Petkov 913aa297292SLen Brown cpu_khz = x86_platform.calibrate_cpu(); 9142d826404SThomas Gleixner tsc_khz = x86_platform.calibrate_tsc(); 915aa297292SLen Brown if (tsc_khz == 0) 916aa297292SLen Brown tsc_khz = cpu_khz; 917ff4c8663SLen Brown else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz) 918ff4c8663SLen Brown cpu_khz = tsc_khz; 919eff4677eSBorislav Petkov cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy, 920bfc0f594SAlok Kataria cpu_khz_old, cpu_khz); 921eff4677eSBorislav Petkov 922bfc0f594SAlok Kataria return 0; 923bfc0f594SAlok Kataria #else 924bfc0f594SAlok Kataria return -ENODEV; 925bfc0f594SAlok Kataria #endif 926bfc0f594SAlok Kataria } 927bfc0f594SAlok Kataria 928bfc0f594SAlok Kataria EXPORT_SYMBOL(recalibrate_cpu_khz); 929bfc0f594SAlok Kataria 9302dbe06faSAlok Kataria 931cd7240c0SSuresh Siddha static unsigned long long cyc2ns_suspend; 932cd7240c0SSuresh Siddha 933b74f05d6SMarcelo Tosatti void tsc_save_sched_clock_state(void) 934cd7240c0SSuresh Siddha { 93535af99e6SPeter Zijlstra if (!sched_clock_stable()) 936cd7240c0SSuresh Siddha return; 937cd7240c0SSuresh Siddha 938cd7240c0SSuresh Siddha cyc2ns_suspend = sched_clock(); 939cd7240c0SSuresh Siddha } 940cd7240c0SSuresh Siddha 941cd7240c0SSuresh Siddha /* 942cd7240c0SSuresh Siddha * Even on processors with invariant TSC, TSC gets reset in some the 943cd7240c0SSuresh Siddha * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to 944cd7240c0SSuresh Siddha * arbitrary value (still sync'd across cpu's) during resume from such sleep 945cd7240c0SSuresh Siddha * states. To cope up with this, recompute the cyc2ns_offset for each cpu so 946cd7240c0SSuresh Siddha * that sched_clock() continues from the point where it was left off during 947cd7240c0SSuresh Siddha * suspend. 948cd7240c0SSuresh Siddha */ 949b74f05d6SMarcelo Tosatti void tsc_restore_sched_clock_state(void) 950cd7240c0SSuresh Siddha { 951cd7240c0SSuresh Siddha unsigned long long offset; 952cd7240c0SSuresh Siddha unsigned long flags; 953cd7240c0SSuresh Siddha int cpu; 954cd7240c0SSuresh Siddha 95535af99e6SPeter Zijlstra if (!sched_clock_stable()) 956cd7240c0SSuresh Siddha return; 957cd7240c0SSuresh Siddha 958cd7240c0SSuresh Siddha local_irq_save(flags); 959cd7240c0SSuresh Siddha 96020d1c86aSPeter Zijlstra /* 9616a6256f9SAdam Buchbinder * We're coming out of suspend, there's no concurrency yet; don't 96220d1c86aSPeter Zijlstra * bother being nice about the RCU stuff, just write to both 96320d1c86aSPeter Zijlstra * data fields. 96420d1c86aSPeter Zijlstra */ 96520d1c86aSPeter Zijlstra 96620d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0); 96720d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0); 96820d1c86aSPeter Zijlstra 969cd7240c0SSuresh Siddha offset = cyc2ns_suspend - sched_clock(); 970cd7240c0SSuresh Siddha 97120d1c86aSPeter Zijlstra for_each_possible_cpu(cpu) { 97220d1c86aSPeter Zijlstra per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset; 97320d1c86aSPeter Zijlstra per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset; 97420d1c86aSPeter Zijlstra } 975cd7240c0SSuresh Siddha 976cd7240c0SSuresh Siddha local_irq_restore(flags); 977cd7240c0SSuresh Siddha } 978cd7240c0SSuresh Siddha 9792dbe06faSAlok Kataria #ifdef CONFIG_CPU_FREQ 9802dbe06faSAlok Kataria 9812dbe06faSAlok Kataria /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency 9822dbe06faSAlok Kataria * changes. 9832dbe06faSAlok Kataria * 9842dbe06faSAlok Kataria * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's 9852dbe06faSAlok Kataria * not that important because current Opteron setups do not support 9862dbe06faSAlok Kataria * scaling on SMP anyroads. 9872dbe06faSAlok Kataria * 9882dbe06faSAlok Kataria * Should fix up last_tsc too. Currently gettimeofday in the 9892dbe06faSAlok Kataria * first tick after the change will be slightly wrong. 9902dbe06faSAlok Kataria */ 9912dbe06faSAlok Kataria 9922dbe06faSAlok Kataria static unsigned int ref_freq; 9932dbe06faSAlok Kataria static unsigned long loops_per_jiffy_ref; 9942dbe06faSAlok Kataria static unsigned long tsc_khz_ref; 9952dbe06faSAlok Kataria 9962dbe06faSAlok Kataria static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 9972dbe06faSAlok Kataria void *data) 9982dbe06faSAlok Kataria { 9992dbe06faSAlok Kataria struct cpufreq_freqs *freq = data; 1000931db6a3SDave Jones unsigned long *lpj; 10012dbe06faSAlok Kataria 10022dbe06faSAlok Kataria lpj = &boot_cpu_data.loops_per_jiffy; 1003931db6a3SDave Jones #ifdef CONFIG_SMP 1004931db6a3SDave Jones if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 1005931db6a3SDave Jones lpj = &cpu_data(freq->cpu).loops_per_jiffy; 10062dbe06faSAlok Kataria #endif 10072dbe06faSAlok Kataria 10082dbe06faSAlok Kataria if (!ref_freq) { 10092dbe06faSAlok Kataria ref_freq = freq->old; 10102dbe06faSAlok Kataria loops_per_jiffy_ref = *lpj; 10112dbe06faSAlok Kataria tsc_khz_ref = tsc_khz; 10122dbe06faSAlok Kataria } 10132dbe06faSAlok Kataria if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || 10140b443eadSViresh Kumar (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { 10152dbe06faSAlok Kataria *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); 10162dbe06faSAlok Kataria 10172dbe06faSAlok Kataria tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); 10182dbe06faSAlok Kataria if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 10192dbe06faSAlok Kataria mark_tsc_unstable("cpufreq changes"); 10202dbe06faSAlok Kataria 102152a8968cSPeter Zijlstra set_cyc2ns_scale(tsc_khz, freq->cpu); 10223896c329SPeter Zijlstra } 10232dbe06faSAlok Kataria 10242dbe06faSAlok Kataria return 0; 10252dbe06faSAlok Kataria } 10262dbe06faSAlok Kataria 10272dbe06faSAlok Kataria static struct notifier_block time_cpufreq_notifier_block = { 10282dbe06faSAlok Kataria .notifier_call = time_cpufreq_notifier 10292dbe06faSAlok Kataria }; 10302dbe06faSAlok Kataria 1031a841cca7SBorislav Petkov static int __init cpufreq_register_tsc_scaling(void) 10322dbe06faSAlok Kataria { 103359e21e3dSBorislav Petkov if (!boot_cpu_has(X86_FEATURE_TSC)) 1034060700b5SLinus Torvalds return 0; 1035060700b5SLinus Torvalds if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 1036060700b5SLinus Torvalds return 0; 10372dbe06faSAlok Kataria cpufreq_register_notifier(&time_cpufreq_notifier_block, 10382dbe06faSAlok Kataria CPUFREQ_TRANSITION_NOTIFIER); 10392dbe06faSAlok Kataria return 0; 10402dbe06faSAlok Kataria } 10412dbe06faSAlok Kataria 1042a841cca7SBorislav Petkov core_initcall(cpufreq_register_tsc_scaling); 10432dbe06faSAlok Kataria 10442dbe06faSAlok Kataria #endif /* CONFIG_CPU_FREQ */ 10458fbbc4b4SAlok Kataria 1046f9677e0fSChristopher S. Hall #define ART_CPUID_LEAF (0x15) 1047f9677e0fSChristopher S. Hall #define ART_MIN_DENOMINATOR (1) 1048f9677e0fSChristopher S. Hall 1049f9677e0fSChristopher S. Hall 1050f9677e0fSChristopher S. Hall /* 1051f9677e0fSChristopher S. Hall * If ART is present detect the numerator:denominator to convert to TSC 1052f9677e0fSChristopher S. Hall */ 1053f9677e0fSChristopher S. Hall static void detect_art(void) 1054f9677e0fSChristopher S. Hall { 1055f9677e0fSChristopher S. Hall unsigned int unused[2]; 1056f9677e0fSChristopher S. Hall 1057f9677e0fSChristopher S. Hall if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF) 1058f9677e0fSChristopher S. Hall return; 1059f9677e0fSChristopher S. Hall 10607b3d2f6eSThomas Gleixner /* Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required */ 10617b3d2f6eSThomas Gleixner if (boot_cpu_has(X86_FEATURE_HYPERVISOR) || 10627b3d2f6eSThomas Gleixner !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) || 10637b3d2f6eSThomas Gleixner !boot_cpu_has(X86_FEATURE_TSC_ADJUST)) 10647b3d2f6eSThomas Gleixner return; 10657b3d2f6eSThomas Gleixner 1066f9677e0fSChristopher S. Hall cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator, 1067f9677e0fSChristopher S. Hall &art_to_tsc_numerator, unused, unused+1); 1068f9677e0fSChristopher S. Hall 10697b3d2f6eSThomas Gleixner if (art_to_tsc_denominator < ART_MIN_DENOMINATOR) 1070f9677e0fSChristopher S. Hall return; 1071f9677e0fSChristopher S. Hall 10727b3d2f6eSThomas Gleixner rdmsrl(MSR_IA32_TSC_ADJUST, art_to_tsc_offset); 1073f9677e0fSChristopher S. Hall 1074f9677e0fSChristopher S. Hall /* Make this sticky over multiple CPU init calls */ 1075f9677e0fSChristopher S. Hall setup_force_cpu_cap(X86_FEATURE_ART); 1076f9677e0fSChristopher S. Hall } 1077f9677e0fSChristopher S. Hall 1078f9677e0fSChristopher S. Hall 10798fbbc4b4SAlok Kataria /* clocksource code */ 10808fbbc4b4SAlok Kataria 10818fbbc4b4SAlok Kataria static struct clocksource clocksource_tsc; 10828fbbc4b4SAlok Kataria 10836a369583SThomas Gleixner static void tsc_resume(struct clocksource *cs) 10846a369583SThomas Gleixner { 10856a369583SThomas Gleixner tsc_verify_tsc_adjust(true); 10866a369583SThomas Gleixner } 10876a369583SThomas Gleixner 10888fbbc4b4SAlok Kataria /* 108909ec5442SThomas Gleixner * We used to compare the TSC to the cycle_last value in the clocksource 10908fbbc4b4SAlok Kataria * structure to avoid a nasty time-warp. This can be observed in a 10918fbbc4b4SAlok Kataria * very small window right after one CPU updated cycle_last under 10928fbbc4b4SAlok Kataria * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which 10938fbbc4b4SAlok Kataria * is smaller than the cycle_last reference value due to a TSC which 10948fbbc4b4SAlok Kataria * is slighty behind. This delta is nowhere else observable, but in 10958fbbc4b4SAlok Kataria * that case it results in a forward time jump in the range of hours 10968fbbc4b4SAlok Kataria * due to the unsigned delta calculation of the time keeping core 10978fbbc4b4SAlok Kataria * code, which is necessary to support wrapping clocksources like pm 10988fbbc4b4SAlok Kataria * timer. 109909ec5442SThomas Gleixner * 110009ec5442SThomas Gleixner * This sanity check is now done in the core timekeeping code. 110109ec5442SThomas Gleixner * checking the result of read_tsc() - cycle_last for being negative. 110209ec5442SThomas Gleixner * That works because CLOCKSOURCE_MASK(64) does not mask out any bit. 11038fbbc4b4SAlok Kataria */ 11048e19608eSMagnus Damm static cycle_t read_tsc(struct clocksource *cs) 11058fbbc4b4SAlok Kataria { 110627c63405SAndy Lutomirski return (cycle_t)rdtsc_ordered(); 11078fbbc4b4SAlok Kataria } 11088fbbc4b4SAlok Kataria 110909ec5442SThomas Gleixner /* 111009ec5442SThomas Gleixner * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() 111109ec5442SThomas Gleixner */ 11128fbbc4b4SAlok Kataria static struct clocksource clocksource_tsc = { 11138fbbc4b4SAlok Kataria .name = "tsc", 11148fbbc4b4SAlok Kataria .rating = 300, 11158fbbc4b4SAlok Kataria .read = read_tsc, 11168fbbc4b4SAlok Kataria .mask = CLOCKSOURCE_MASK(64), 11178fbbc4b4SAlok Kataria .flags = CLOCK_SOURCE_IS_CONTINUOUS | 11188fbbc4b4SAlok Kataria CLOCK_SOURCE_MUST_VERIFY, 111998d0ac38SAndy Lutomirski .archdata = { .vclock_mode = VCLOCK_TSC }, 11206a369583SThomas Gleixner .resume = tsc_resume, 11218fbbc4b4SAlok Kataria }; 11228fbbc4b4SAlok Kataria 11238fbbc4b4SAlok Kataria void mark_tsc_unstable(char *reason) 11248fbbc4b4SAlok Kataria { 11258fbbc4b4SAlok Kataria if (!tsc_unstable) { 11268fbbc4b4SAlok Kataria tsc_unstable = 1; 112735af99e6SPeter Zijlstra clear_sched_clock_stable(); 1128e82b8e4eSVenkatesh Pallipadi disable_sched_clock_irqtime(); 1129c767a54bSJoe Perches pr_info("Marking TSC unstable due to %s\n", reason); 11308fbbc4b4SAlok Kataria /* Change only the rating, when not registered */ 11318fbbc4b4SAlok Kataria if (clocksource_tsc.mult) 11327285dd7fSThomas Gleixner clocksource_mark_unstable(&clocksource_tsc); 11337285dd7fSThomas Gleixner else { 11347285dd7fSThomas Gleixner clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; 11358fbbc4b4SAlok Kataria clocksource_tsc.rating = 0; 11368fbbc4b4SAlok Kataria } 11378fbbc4b4SAlok Kataria } 11387285dd7fSThomas Gleixner } 11398fbbc4b4SAlok Kataria 11408fbbc4b4SAlok Kataria EXPORT_SYMBOL_GPL(mark_tsc_unstable); 11418fbbc4b4SAlok Kataria 1142395628efSAlok Kataria static void __init check_system_tsc_reliable(void) 1143395628efSAlok Kataria { 114403da3ff1SDavid Woodhouse #if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) 114503da3ff1SDavid Woodhouse if (is_geode_lx()) { 11468fbbc4b4SAlok Kataria /* RTSC counts during suspend */ 11478fbbc4b4SAlok Kataria #define RTSC_SUSP 0x100 11488fbbc4b4SAlok Kataria unsigned long res_low, res_high; 11498fbbc4b4SAlok Kataria 11508fbbc4b4SAlok Kataria rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); 115100097c4fSThadeu Lima de Souza Cascardo /* Geode_LX - the OLPC CPU has a very reliable TSC */ 11528fbbc4b4SAlok Kataria if (res_low & RTSC_SUSP) 1153395628efSAlok Kataria tsc_clocksource_reliable = 1; 115403da3ff1SDavid Woodhouse } 11558fbbc4b4SAlok Kataria #endif 1156395628efSAlok Kataria if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) 1157395628efSAlok Kataria tsc_clocksource_reliable = 1; 1158395628efSAlok Kataria } 11598fbbc4b4SAlok Kataria 11608fbbc4b4SAlok Kataria /* 11618fbbc4b4SAlok Kataria * Make an educated guess if the TSC is trustworthy and synchronized 11628fbbc4b4SAlok Kataria * over all CPUs. 11638fbbc4b4SAlok Kataria */ 1164148f9bb8SPaul Gortmaker int unsynchronized_tsc(void) 11658fbbc4b4SAlok Kataria { 116659e21e3dSBorislav Petkov if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable) 11678fbbc4b4SAlok Kataria return 1; 11688fbbc4b4SAlok Kataria 11693e5095d1SIngo Molnar #ifdef CONFIG_SMP 11708fbbc4b4SAlok Kataria if (apic_is_clustered_box()) 11718fbbc4b4SAlok Kataria return 1; 11728fbbc4b4SAlok Kataria #endif 11738fbbc4b4SAlok Kataria 11748fbbc4b4SAlok Kataria if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 11758fbbc4b4SAlok Kataria return 0; 1176d3b8f889Sjohn stultz 1177d3b8f889Sjohn stultz if (tsc_clocksource_reliable) 1178d3b8f889Sjohn stultz return 0; 11798fbbc4b4SAlok Kataria /* 11808fbbc4b4SAlok Kataria * Intel systems are normally all synchronized. 11818fbbc4b4SAlok Kataria * Exceptions must mark TSC as unstable: 11828fbbc4b4SAlok Kataria */ 11838fbbc4b4SAlok Kataria if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { 11848fbbc4b4SAlok Kataria /* assume multi socket systems are not synchronized: */ 11858fbbc4b4SAlok Kataria if (num_possible_cpus() > 1) 1186d3b8f889Sjohn stultz return 1; 11878fbbc4b4SAlok Kataria } 11888fbbc4b4SAlok Kataria 1189d3b8f889Sjohn stultz return 0; 11908fbbc4b4SAlok Kataria } 11918fbbc4b4SAlok Kataria 1192f9677e0fSChristopher S. Hall /* 1193f9677e0fSChristopher S. Hall * Convert ART to TSC given numerator/denominator found in detect_art() 1194f9677e0fSChristopher S. Hall */ 1195f9677e0fSChristopher S. Hall struct system_counterval_t convert_art_to_tsc(cycle_t art) 1196f9677e0fSChristopher S. Hall { 1197f9677e0fSChristopher S. Hall u64 tmp, res, rem; 1198f9677e0fSChristopher S. Hall 1199f9677e0fSChristopher S. Hall rem = do_div(art, art_to_tsc_denominator); 1200f9677e0fSChristopher S. Hall 1201f9677e0fSChristopher S. Hall res = art * art_to_tsc_numerator; 1202f9677e0fSChristopher S. Hall tmp = rem * art_to_tsc_numerator; 1203f9677e0fSChristopher S. Hall 1204f9677e0fSChristopher S. Hall do_div(tmp, art_to_tsc_denominator); 1205f9677e0fSChristopher S. Hall res += tmp + art_to_tsc_offset; 1206f9677e0fSChristopher S. Hall 1207f9677e0fSChristopher S. Hall return (struct system_counterval_t) {.cs = art_related_clocksource, 1208f9677e0fSChristopher S. Hall .cycles = res}; 1209f9677e0fSChristopher S. Hall } 1210f9677e0fSChristopher S. Hall EXPORT_SYMBOL(convert_art_to_tsc); 121108ec0c58SJohn Stultz 121208ec0c58SJohn Stultz static void tsc_refine_calibration_work(struct work_struct *work); 121308ec0c58SJohn Stultz static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); 121408ec0c58SJohn Stultz /** 121508ec0c58SJohn Stultz * tsc_refine_calibration_work - Further refine tsc freq calibration 121608ec0c58SJohn Stultz * @work - ignored. 121708ec0c58SJohn Stultz * 121808ec0c58SJohn Stultz * This functions uses delayed work over a period of a 121908ec0c58SJohn Stultz * second to further refine the TSC freq value. Since this is 122008ec0c58SJohn Stultz * timer based, instead of loop based, we don't block the boot 122108ec0c58SJohn Stultz * process while this longer calibration is done. 122208ec0c58SJohn Stultz * 12230d2eb44fSLucas De Marchi * If there are any calibration anomalies (too many SMIs, etc), 122408ec0c58SJohn Stultz * or the refined calibration is off by 1% of the fast early 122508ec0c58SJohn Stultz * calibration, we throw out the new calibration and use the 122608ec0c58SJohn Stultz * early calibration. 122708ec0c58SJohn Stultz */ 122808ec0c58SJohn Stultz static void tsc_refine_calibration_work(struct work_struct *work) 122908ec0c58SJohn Stultz { 123008ec0c58SJohn Stultz static u64 tsc_start = -1, ref_start; 123108ec0c58SJohn Stultz static int hpet; 123208ec0c58SJohn Stultz u64 tsc_stop, ref_stop, delta; 123308ec0c58SJohn Stultz unsigned long freq; 123408ec0c58SJohn Stultz 123508ec0c58SJohn Stultz /* Don't bother refining TSC on unstable systems */ 123608ec0c58SJohn Stultz if (check_tsc_unstable()) 123708ec0c58SJohn Stultz goto out; 123808ec0c58SJohn Stultz 123908ec0c58SJohn Stultz /* 124008ec0c58SJohn Stultz * Since the work is started early in boot, we may be 124108ec0c58SJohn Stultz * delayed the first time we expire. So set the workqueue 124208ec0c58SJohn Stultz * again once we know timers are working. 124308ec0c58SJohn Stultz */ 124408ec0c58SJohn Stultz if (tsc_start == -1) { 124508ec0c58SJohn Stultz /* 124608ec0c58SJohn Stultz * Only set hpet once, to avoid mixing hardware 124708ec0c58SJohn Stultz * if the hpet becomes enabled later. 124808ec0c58SJohn Stultz */ 124908ec0c58SJohn Stultz hpet = is_hpet_enabled(); 125008ec0c58SJohn Stultz schedule_delayed_work(&tsc_irqwork, HZ); 125108ec0c58SJohn Stultz tsc_start = tsc_read_refs(&ref_start, hpet); 125208ec0c58SJohn Stultz return; 125308ec0c58SJohn Stultz } 125408ec0c58SJohn Stultz 125508ec0c58SJohn Stultz tsc_stop = tsc_read_refs(&ref_stop, hpet); 125608ec0c58SJohn Stultz 125708ec0c58SJohn Stultz /* hpet or pmtimer available ? */ 125862627becSJohn Stultz if (ref_start == ref_stop) 125908ec0c58SJohn Stultz goto out; 126008ec0c58SJohn Stultz 126108ec0c58SJohn Stultz /* Check, whether the sampling was disturbed by an SMI */ 126208ec0c58SJohn Stultz if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) 126308ec0c58SJohn Stultz goto out; 126408ec0c58SJohn Stultz 126508ec0c58SJohn Stultz delta = tsc_stop - tsc_start; 126608ec0c58SJohn Stultz delta *= 1000000LL; 126708ec0c58SJohn Stultz if (hpet) 126808ec0c58SJohn Stultz freq = calc_hpet_ref(delta, ref_start, ref_stop); 126908ec0c58SJohn Stultz else 127008ec0c58SJohn Stultz freq = calc_pmtimer_ref(delta, ref_start, ref_stop); 127108ec0c58SJohn Stultz 127208ec0c58SJohn Stultz /* Make sure we're within 1% */ 127308ec0c58SJohn Stultz if (abs(tsc_khz - freq) > tsc_khz/100) 127408ec0c58SJohn Stultz goto out; 127508ec0c58SJohn Stultz 127608ec0c58SJohn Stultz tsc_khz = freq; 1277c767a54bSJoe Perches pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n", 1278c767a54bSJoe Perches (unsigned long)tsc_khz / 1000, 127908ec0c58SJohn Stultz (unsigned long)tsc_khz % 1000); 128008ec0c58SJohn Stultz 12816731b0d6SNicolai Stange /* Inform the TSC deadline clockevent devices about the recalibration */ 12826731b0d6SNicolai Stange lapic_update_tsc_freq(); 12836731b0d6SNicolai Stange 128408ec0c58SJohn Stultz out: 1285f9677e0fSChristopher S. Hall if (boot_cpu_has(X86_FEATURE_ART)) 1286f9677e0fSChristopher S. Hall art_related_clocksource = &clocksource_tsc; 128708ec0c58SJohn Stultz clocksource_register_khz(&clocksource_tsc, tsc_khz); 128808ec0c58SJohn Stultz } 128908ec0c58SJohn Stultz 129008ec0c58SJohn Stultz 129108ec0c58SJohn Stultz static int __init init_tsc_clocksource(void) 12928fbbc4b4SAlok Kataria { 129359e21e3dSBorislav Petkov if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz) 1294a8760ecaSThomas Gleixner return 0; 1295a8760ecaSThomas Gleixner 1296395628efSAlok Kataria if (tsc_clocksource_reliable) 1297395628efSAlok Kataria clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 12988fbbc4b4SAlok Kataria /* lower the rating if we already know its unstable: */ 12998fbbc4b4SAlok Kataria if (check_tsc_unstable()) { 13008fbbc4b4SAlok Kataria clocksource_tsc.rating = 0; 13018fbbc4b4SAlok Kataria clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 13028fbbc4b4SAlok Kataria } 130357779dc2SAlok Kataria 130482f9c080SFeng Tang if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) 130582f9c080SFeng Tang clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; 130682f9c080SFeng Tang 130757779dc2SAlok Kataria /* 130847c95a46SBin Gao * When TSC frequency is known (retrieved via MSR or CPUID), we skip 130947c95a46SBin Gao * the refined calibration and directly register it as a clocksource. 131057779dc2SAlok Kataria */ 1311984fecebSThomas Gleixner if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) { 131257779dc2SAlok Kataria clocksource_register_khz(&clocksource_tsc, tsc_khz); 131357779dc2SAlok Kataria return 0; 131457779dc2SAlok Kataria } 131557779dc2SAlok Kataria 131608ec0c58SJohn Stultz schedule_delayed_work(&tsc_irqwork, 0); 131708ec0c58SJohn Stultz return 0; 13188fbbc4b4SAlok Kataria } 131908ec0c58SJohn Stultz /* 132008ec0c58SJohn Stultz * We use device_initcall here, to ensure we run after the hpet 132108ec0c58SJohn Stultz * is fully initialized, which may occur at fs_initcall time. 132208ec0c58SJohn Stultz */ 132308ec0c58SJohn Stultz device_initcall(init_tsc_clocksource); 13248fbbc4b4SAlok Kataria 13258fbbc4b4SAlok Kataria void __init tsc_init(void) 13268fbbc4b4SAlok Kataria { 13278fbbc4b4SAlok Kataria u64 lpj; 13288fbbc4b4SAlok Kataria int cpu; 13298fbbc4b4SAlok Kataria 133059e21e3dSBorislav Petkov if (!boot_cpu_has(X86_FEATURE_TSC)) { 1331b47dcbdcSAndy Lutomirski setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); 13328fbbc4b4SAlok Kataria return; 1333b47dcbdcSAndy Lutomirski } 13348fbbc4b4SAlok Kataria 1335aa297292SLen Brown cpu_khz = x86_platform.calibrate_cpu(); 13362d826404SThomas Gleixner tsc_khz = x86_platform.calibrate_tsc(); 1337ff4c8663SLen Brown 1338ff4c8663SLen Brown /* 1339ff4c8663SLen Brown * Trust non-zero tsc_khz as authorative, 1340ff4c8663SLen Brown * and use it to sanity check cpu_khz, 1341ff4c8663SLen Brown * which will be off if system timer is off. 1342ff4c8663SLen Brown */ 1343aa297292SLen Brown if (tsc_khz == 0) 1344aa297292SLen Brown tsc_khz = cpu_khz; 1345ff4c8663SLen Brown else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz) 1346ff4c8663SLen Brown cpu_khz = tsc_khz; 13478fbbc4b4SAlok Kataria 1348e93ef949SAlok Kataria if (!tsc_khz) { 13498fbbc4b4SAlok Kataria mark_tsc_unstable("could not calculate TSC khz"); 1350b47dcbdcSAndy Lutomirski setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); 13518fbbc4b4SAlok Kataria return; 13528fbbc4b4SAlok Kataria } 13538fbbc4b4SAlok Kataria 1354c767a54bSJoe Perches pr_info("Detected %lu.%03lu MHz processor\n", 13558fbbc4b4SAlok Kataria (unsigned long)cpu_khz / 1000, 13568fbbc4b4SAlok Kataria (unsigned long)cpu_khz % 1000); 13578fbbc4b4SAlok Kataria 13588fbbc4b4SAlok Kataria /* 13598fbbc4b4SAlok Kataria * Secondary CPUs do not run through tsc_init(), so set up 13608fbbc4b4SAlok Kataria * all the scale factors for all CPUs, assuming the same 13618fbbc4b4SAlok Kataria * speed as the bootup CPU. (cpufreq notifiers will fix this 13628fbbc4b4SAlok Kataria * up if their speed diverges) 13638fbbc4b4SAlok Kataria */ 136420d1c86aSPeter Zijlstra for_each_possible_cpu(cpu) { 136520d1c86aSPeter Zijlstra cyc2ns_init(cpu); 1366aa297292SLen Brown set_cyc2ns_scale(tsc_khz, cpu); 136720d1c86aSPeter Zijlstra } 13688fbbc4b4SAlok Kataria 13698fbbc4b4SAlok Kataria if (tsc_disabled > 0) 13708fbbc4b4SAlok Kataria return; 13718fbbc4b4SAlok Kataria 13728fbbc4b4SAlok Kataria /* now allow native_sched_clock() to use rdtsc */ 137310b033d4SPeter Zijlstra 13748fbbc4b4SAlok Kataria tsc_disabled = 0; 13753bbfafb7SPeter Zijlstra static_branch_enable(&__use_tsc); 13768fbbc4b4SAlok Kataria 1377e82b8e4eSVenkatesh Pallipadi if (!no_sched_irq_time) 1378e82b8e4eSVenkatesh Pallipadi enable_sched_clock_irqtime(); 1379e82b8e4eSVenkatesh Pallipadi 138070de9a97SAlok Kataria lpj = ((u64)tsc_khz * 1000); 138170de9a97SAlok Kataria do_div(lpj, HZ); 138270de9a97SAlok Kataria lpj_fine = lpj; 138370de9a97SAlok Kataria 13848fbbc4b4SAlok Kataria use_tsc_delay(); 13858fbbc4b4SAlok Kataria 13868fbbc4b4SAlok Kataria if (unsynchronized_tsc()) 13878fbbc4b4SAlok Kataria mark_tsc_unstable("TSCs unsynchronized"); 13888b223bc7SThomas Gleixner else 13895bae1562SThomas Gleixner tsc_store_and_check_tsc_adjust(true); 13908fbbc4b4SAlok Kataria 1391395628efSAlok Kataria check_system_tsc_reliable(); 1392f9677e0fSChristopher S. Hall 1393f9677e0fSChristopher S. Hall detect_art(); 13948fbbc4b4SAlok Kataria } 13958fbbc4b4SAlok Kataria 1396b565201cSJack Steiner #ifdef CONFIG_SMP 1397b565201cSJack Steiner /* 1398b565201cSJack Steiner * If we have a constant TSC and are using the TSC for the delay loop, 1399b565201cSJack Steiner * we can skip clock calibration if another cpu in the same socket has already 1400b565201cSJack Steiner * been calibrated. This assumes that CONSTANT_TSC applies to all 1401b565201cSJack Steiner * cpus in the socket - this should be a safe assumption. 1402b565201cSJack Steiner */ 1403148f9bb8SPaul Gortmaker unsigned long calibrate_delay_is_known(void) 1404b565201cSJack Steiner { 1405c25323c0SThomas Gleixner int sibling, cpu = smp_processor_id(); 1406f508a5baSThomas Gleixner struct cpumask *mask = topology_core_cpumask(cpu); 1407b565201cSJack Steiner 1408b565201cSJack Steiner if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) 1409b565201cSJack Steiner return 0; 1410b565201cSJack Steiner 1411f508a5baSThomas Gleixner if (!mask) 1412f508a5baSThomas Gleixner return 0; 1413f508a5baSThomas Gleixner 1414f508a5baSThomas Gleixner sibling = cpumask_any_but(mask, cpu); 1415c25323c0SThomas Gleixner if (sibling < nr_cpu_ids) 1416c25323c0SThomas Gleixner return cpu_data(sibling).loops_per_jiffy; 1417b565201cSJack Steiner return 0; 1418b565201cSJack Steiner } 1419b565201cSJack Steiner #endif 1420