1c767a54bSJoe Perches #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 2c767a54bSJoe Perches 3bfc0f594SAlok Kataria #include <linux/kernel.h> 40ef95533SAlok Kataria #include <linux/sched.h> 50ef95533SAlok Kataria #include <linux/init.h> 60ef95533SAlok Kataria #include <linux/module.h> 70ef95533SAlok Kataria #include <linux/timer.h> 8bfc0f594SAlok Kataria #include <linux/acpi_pmtmr.h> 92dbe06faSAlok Kataria #include <linux/cpufreq.h> 108fbbc4b4SAlok Kataria #include <linux/delay.h> 118fbbc4b4SAlok Kataria #include <linux/clocksource.h> 128fbbc4b4SAlok Kataria #include <linux/percpu.h> 1308604bd9SArnd Bergmann #include <linux/timex.h> 1410b033d4SPeter Zijlstra #include <linux/static_key.h> 15bfc0f594SAlok Kataria 16bfc0f594SAlok Kataria #include <asm/hpet.h> 178fbbc4b4SAlok Kataria #include <asm/timer.h> 188fbbc4b4SAlok Kataria #include <asm/vgtod.h> 198fbbc4b4SAlok Kataria #include <asm/time.h> 208fbbc4b4SAlok Kataria #include <asm/delay.h> 2188b094fbSAlok Kataria #include <asm/hypervisor.h> 2208047c4fSThomas Gleixner #include <asm/nmi.h> 232d826404SThomas Gleixner #include <asm/x86_init.h> 240ef95533SAlok Kataria 25f24ade3aSIngo Molnar unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ 260ef95533SAlok Kataria EXPORT_SYMBOL(cpu_khz); 27f24ade3aSIngo Molnar 28f24ade3aSIngo Molnar unsigned int __read_mostly tsc_khz; 290ef95533SAlok Kataria EXPORT_SYMBOL(tsc_khz); 300ef95533SAlok Kataria 310ef95533SAlok Kataria /* 320ef95533SAlok Kataria * TSC can be unstable due to cpufreq or due to unsynced TSCs 330ef95533SAlok Kataria */ 34f24ade3aSIngo Molnar static int __read_mostly tsc_unstable; 350ef95533SAlok Kataria 360ef95533SAlok Kataria /* native_sched_clock() is called before tsc_init(), so 370ef95533SAlok Kataria we must start with the TSC soft disabled to prevent 380ef95533SAlok Kataria erroneous rdtsc usage on !cpu_has_tsc processors */ 39f24ade3aSIngo Molnar static int __read_mostly tsc_disabled = -1; 400ef95533SAlok Kataria 4110b033d4SPeter Zijlstra static struct static_key __use_tsc = STATIC_KEY_INIT; 4210b033d4SPeter Zijlstra 4328a00184SSuresh Siddha int tsc_clocksource_reliable; 4457c67da2SPeter Zijlstra 4520d1c86aSPeter Zijlstra /* 4620d1c86aSPeter Zijlstra * Use a ring-buffer like data structure, where a writer advances the head by 4720d1c86aSPeter Zijlstra * writing a new data entry and a reader advances the tail when it observes a 4820d1c86aSPeter Zijlstra * new entry. 4920d1c86aSPeter Zijlstra * 5020d1c86aSPeter Zijlstra * Writers are made to wait on readers until there's space to write a new 5120d1c86aSPeter Zijlstra * entry. 5220d1c86aSPeter Zijlstra * 5320d1c86aSPeter Zijlstra * This means that we can always use an {offset, mul} pair to compute a ns 5420d1c86aSPeter Zijlstra * value that is 'roughly' in the right direction, even if we're writing a new 5520d1c86aSPeter Zijlstra * {offset, mul} pair during the clock read. 5620d1c86aSPeter Zijlstra * 5720d1c86aSPeter Zijlstra * The down-side is that we can no longer guarantee strict monotonicity anymore 5820d1c86aSPeter Zijlstra * (assuming the TSC was that to begin with), because while we compute the 5920d1c86aSPeter Zijlstra * intersection point of the two clock slopes and make sure the time is 6020d1c86aSPeter Zijlstra * continuous at the point of switching; we can no longer guarantee a reader is 6120d1c86aSPeter Zijlstra * strictly before or after the switch point. 6220d1c86aSPeter Zijlstra * 6320d1c86aSPeter Zijlstra * It does mean a reader no longer needs to disable IRQs in order to avoid 6420d1c86aSPeter Zijlstra * CPU-Freq updates messing with his times, and similarly an NMI reader will 6520d1c86aSPeter Zijlstra * no longer run the risk of hitting half-written state. 6620d1c86aSPeter Zijlstra */ 6720d1c86aSPeter Zijlstra 6820d1c86aSPeter Zijlstra struct cyc2ns { 6920d1c86aSPeter Zijlstra struct cyc2ns_data data[2]; /* 0 + 2*24 = 48 */ 7020d1c86aSPeter Zijlstra struct cyc2ns_data *head; /* 48 + 8 = 56 */ 7120d1c86aSPeter Zijlstra struct cyc2ns_data *tail; /* 56 + 8 = 64 */ 7220d1c86aSPeter Zijlstra }; /* exactly fits one cacheline */ 7320d1c86aSPeter Zijlstra 7420d1c86aSPeter Zijlstra static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); 7520d1c86aSPeter Zijlstra 7620d1c86aSPeter Zijlstra struct cyc2ns_data *cyc2ns_read_begin(void) 7720d1c86aSPeter Zijlstra { 7820d1c86aSPeter Zijlstra struct cyc2ns_data *head; 7920d1c86aSPeter Zijlstra 8020d1c86aSPeter Zijlstra preempt_disable(); 8120d1c86aSPeter Zijlstra 8220d1c86aSPeter Zijlstra head = this_cpu_read(cyc2ns.head); 8320d1c86aSPeter Zijlstra /* 8420d1c86aSPeter Zijlstra * Ensure we observe the entry when we observe the pointer to it. 8520d1c86aSPeter Zijlstra * matches the wmb from cyc2ns_write_end(). 8620d1c86aSPeter Zijlstra */ 8720d1c86aSPeter Zijlstra smp_read_barrier_depends(); 8820d1c86aSPeter Zijlstra head->__count++; 8920d1c86aSPeter Zijlstra barrier(); 9020d1c86aSPeter Zijlstra 9120d1c86aSPeter Zijlstra return head; 9220d1c86aSPeter Zijlstra } 9320d1c86aSPeter Zijlstra 9420d1c86aSPeter Zijlstra void cyc2ns_read_end(struct cyc2ns_data *head) 9520d1c86aSPeter Zijlstra { 9620d1c86aSPeter Zijlstra barrier(); 9720d1c86aSPeter Zijlstra /* 9820d1c86aSPeter Zijlstra * If we're the outer most nested read; update the tail pointer 9920d1c86aSPeter Zijlstra * when we're done. This notifies possible pending writers 10020d1c86aSPeter Zijlstra * that we've observed the head pointer and that the other 10120d1c86aSPeter Zijlstra * entry is now free. 10220d1c86aSPeter Zijlstra */ 10320d1c86aSPeter Zijlstra if (!--head->__count) { 10420d1c86aSPeter Zijlstra /* 10520d1c86aSPeter Zijlstra * x86-TSO does not reorder writes with older reads; 10620d1c86aSPeter Zijlstra * therefore once this write becomes visible to another 10720d1c86aSPeter Zijlstra * cpu, we must be finished reading the cyc2ns_data. 10820d1c86aSPeter Zijlstra * 10920d1c86aSPeter Zijlstra * matches with cyc2ns_write_begin(). 11020d1c86aSPeter Zijlstra */ 11120d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.tail, head); 11220d1c86aSPeter Zijlstra } 11320d1c86aSPeter Zijlstra preempt_enable(); 11420d1c86aSPeter Zijlstra } 11520d1c86aSPeter Zijlstra 11620d1c86aSPeter Zijlstra /* 11720d1c86aSPeter Zijlstra * Begin writing a new @data entry for @cpu. 11820d1c86aSPeter Zijlstra * 11920d1c86aSPeter Zijlstra * Assumes some sort of write side lock; currently 'provided' by the assumption 12020d1c86aSPeter Zijlstra * that cpufreq will call its notifiers sequentially. 12120d1c86aSPeter Zijlstra */ 12220d1c86aSPeter Zijlstra static struct cyc2ns_data *cyc2ns_write_begin(int cpu) 12320d1c86aSPeter Zijlstra { 12420d1c86aSPeter Zijlstra struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); 12520d1c86aSPeter Zijlstra struct cyc2ns_data *data = c2n->data; 12620d1c86aSPeter Zijlstra 12720d1c86aSPeter Zijlstra if (data == c2n->head) 12820d1c86aSPeter Zijlstra data++; 12920d1c86aSPeter Zijlstra 13020d1c86aSPeter Zijlstra /* XXX send an IPI to @cpu in order to guarantee a read? */ 13120d1c86aSPeter Zijlstra 13220d1c86aSPeter Zijlstra /* 13320d1c86aSPeter Zijlstra * When we observe the tail write from cyc2ns_read_end(), 13420d1c86aSPeter Zijlstra * the cpu must be done with that entry and its safe 13520d1c86aSPeter Zijlstra * to start writing to it. 13620d1c86aSPeter Zijlstra */ 13720d1c86aSPeter Zijlstra while (c2n->tail == data) 13820d1c86aSPeter Zijlstra cpu_relax(); 13920d1c86aSPeter Zijlstra 14020d1c86aSPeter Zijlstra return data; 14120d1c86aSPeter Zijlstra } 14220d1c86aSPeter Zijlstra 14320d1c86aSPeter Zijlstra static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data) 14420d1c86aSPeter Zijlstra { 14520d1c86aSPeter Zijlstra struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); 14620d1c86aSPeter Zijlstra 14720d1c86aSPeter Zijlstra /* 14820d1c86aSPeter Zijlstra * Ensure the @data writes are visible before we publish the 14920d1c86aSPeter Zijlstra * entry. Matches the data-depencency in cyc2ns_read_begin(). 15020d1c86aSPeter Zijlstra */ 15120d1c86aSPeter Zijlstra smp_wmb(); 15220d1c86aSPeter Zijlstra 15320d1c86aSPeter Zijlstra ACCESS_ONCE(c2n->head) = data; 15420d1c86aSPeter Zijlstra } 15520d1c86aSPeter Zijlstra 15620d1c86aSPeter Zijlstra /* 15720d1c86aSPeter Zijlstra * Accelerators for sched_clock() 15857c67da2SPeter Zijlstra * convert from cycles(64bits) => nanoseconds (64bits) 15957c67da2SPeter Zijlstra * basic equation: 16057c67da2SPeter Zijlstra * ns = cycles / (freq / ns_per_sec) 16157c67da2SPeter Zijlstra * ns = cycles * (ns_per_sec / freq) 16257c67da2SPeter Zijlstra * ns = cycles * (10^9 / (cpu_khz * 10^3)) 16357c67da2SPeter Zijlstra * ns = cycles * (10^6 / cpu_khz) 16457c67da2SPeter Zijlstra * 16557c67da2SPeter Zijlstra * Then we use scaling math (suggested by george@mvista.com) to get: 16657c67da2SPeter Zijlstra * ns = cycles * (10^6 * SC / cpu_khz) / SC 16757c67da2SPeter Zijlstra * ns = cycles * cyc2ns_scale / SC 16857c67da2SPeter Zijlstra * 16957c67da2SPeter Zijlstra * And since SC is a constant power of two, we can convert the div 17057c67da2SPeter Zijlstra * into a shift. 17157c67da2SPeter Zijlstra * 17257c67da2SPeter Zijlstra * We can use khz divisor instead of mhz to keep a better precision, since 17357c67da2SPeter Zijlstra * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. 17457c67da2SPeter Zijlstra * (mathieu.desnoyers@polymtl.ca) 17557c67da2SPeter Zijlstra * 17657c67da2SPeter Zijlstra * -johnstul@us.ibm.com "math is hard, lets go shopping!" 17757c67da2SPeter Zijlstra */ 17857c67da2SPeter Zijlstra 17957c67da2SPeter Zijlstra #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ 18057c67da2SPeter Zijlstra 18120d1c86aSPeter Zijlstra static void cyc2ns_data_init(struct cyc2ns_data *data) 18220d1c86aSPeter Zijlstra { 1835e3c1afdSPeter Zijlstra data->cyc2ns_mul = 0; 18420d1c86aSPeter Zijlstra data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; 18520d1c86aSPeter Zijlstra data->cyc2ns_offset = 0; 18620d1c86aSPeter Zijlstra data->__count = 0; 18720d1c86aSPeter Zijlstra } 18820d1c86aSPeter Zijlstra 18920d1c86aSPeter Zijlstra static void cyc2ns_init(int cpu) 19020d1c86aSPeter Zijlstra { 19120d1c86aSPeter Zijlstra struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); 19220d1c86aSPeter Zijlstra 19320d1c86aSPeter Zijlstra cyc2ns_data_init(&c2n->data[0]); 19420d1c86aSPeter Zijlstra cyc2ns_data_init(&c2n->data[1]); 19520d1c86aSPeter Zijlstra 19620d1c86aSPeter Zijlstra c2n->head = c2n->data; 19720d1c86aSPeter Zijlstra c2n->tail = c2n->data; 19820d1c86aSPeter Zijlstra } 19920d1c86aSPeter Zijlstra 20057c67da2SPeter Zijlstra static inline unsigned long long cycles_2_ns(unsigned long long cyc) 20157c67da2SPeter Zijlstra { 20220d1c86aSPeter Zijlstra struct cyc2ns_data *data, *tail; 20320d1c86aSPeter Zijlstra unsigned long long ns; 20420d1c86aSPeter Zijlstra 20520d1c86aSPeter Zijlstra /* 20620d1c86aSPeter Zijlstra * See cyc2ns_read_*() for details; replicated in order to avoid 20720d1c86aSPeter Zijlstra * an extra few instructions that came with the abstraction. 20820d1c86aSPeter Zijlstra * Notable, it allows us to only do the __count and tail update 20920d1c86aSPeter Zijlstra * dance when its actually needed. 21020d1c86aSPeter Zijlstra */ 21120d1c86aSPeter Zijlstra 21220d1c86aSPeter Zijlstra preempt_disable(); 21320d1c86aSPeter Zijlstra data = this_cpu_read(cyc2ns.head); 21420d1c86aSPeter Zijlstra tail = this_cpu_read(cyc2ns.tail); 21520d1c86aSPeter Zijlstra 21620d1c86aSPeter Zijlstra if (likely(data == tail)) { 21720d1c86aSPeter Zijlstra ns = data->cyc2ns_offset; 21820d1c86aSPeter Zijlstra ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); 21920d1c86aSPeter Zijlstra } else { 22020d1c86aSPeter Zijlstra data->__count++; 22120d1c86aSPeter Zijlstra 22220d1c86aSPeter Zijlstra barrier(); 22320d1c86aSPeter Zijlstra 22420d1c86aSPeter Zijlstra ns = data->cyc2ns_offset; 22520d1c86aSPeter Zijlstra ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); 22620d1c86aSPeter Zijlstra 22720d1c86aSPeter Zijlstra barrier(); 22820d1c86aSPeter Zijlstra 22920d1c86aSPeter Zijlstra if (!--data->__count) 23020d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.tail, data); 23120d1c86aSPeter Zijlstra } 23220d1c86aSPeter Zijlstra preempt_enable(); 23320d1c86aSPeter Zijlstra 23457c67da2SPeter Zijlstra return ns; 23557c67da2SPeter Zijlstra } 23657c67da2SPeter Zijlstra 23720d1c86aSPeter Zijlstra /* XXX surely we already have this someplace in the kernel?! */ 23820d1c86aSPeter Zijlstra #define DIV_ROUND(n, d) (((n) + ((d) / 2)) / (d)) 23920d1c86aSPeter Zijlstra 24057c67da2SPeter Zijlstra static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) 24157c67da2SPeter Zijlstra { 24220d1c86aSPeter Zijlstra unsigned long long tsc_now, ns_now; 24320d1c86aSPeter Zijlstra struct cyc2ns_data *data; 24420d1c86aSPeter Zijlstra unsigned long flags; 24557c67da2SPeter Zijlstra 24657c67da2SPeter Zijlstra local_irq_save(flags); 24757c67da2SPeter Zijlstra sched_clock_idle_sleep_event(); 24857c67da2SPeter Zijlstra 24920d1c86aSPeter Zijlstra if (!cpu_khz) 25020d1c86aSPeter Zijlstra goto done; 25120d1c86aSPeter Zijlstra 25220d1c86aSPeter Zijlstra data = cyc2ns_write_begin(cpu); 25357c67da2SPeter Zijlstra 25457c67da2SPeter Zijlstra rdtscll(tsc_now); 25557c67da2SPeter Zijlstra ns_now = cycles_2_ns(tsc_now); 25657c67da2SPeter Zijlstra 25720d1c86aSPeter Zijlstra /* 25820d1c86aSPeter Zijlstra * Compute a new multiplier as per the above comment and ensure our 25920d1c86aSPeter Zijlstra * time function is continuous; see the comment near struct 26020d1c86aSPeter Zijlstra * cyc2ns_data. 26120d1c86aSPeter Zijlstra */ 26220d1c86aSPeter Zijlstra data->cyc2ns_mul = DIV_ROUND(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR, cpu_khz); 26320d1c86aSPeter Zijlstra data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; 26420d1c86aSPeter Zijlstra data->cyc2ns_offset = ns_now - 26520d1c86aSPeter Zijlstra mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); 26657c67da2SPeter Zijlstra 26720d1c86aSPeter Zijlstra cyc2ns_write_end(cpu, data); 26820d1c86aSPeter Zijlstra 26920d1c86aSPeter Zijlstra done: 27057c67da2SPeter Zijlstra sched_clock_idle_wakeup_event(0); 27157c67da2SPeter Zijlstra local_irq_restore(flags); 27257c67da2SPeter Zijlstra } 2730ef95533SAlok Kataria /* 2740ef95533SAlok Kataria * Scheduler clock - returns current time in nanosec units. 2750ef95533SAlok Kataria */ 2760ef95533SAlok Kataria u64 native_sched_clock(void) 2770ef95533SAlok Kataria { 27820d1c86aSPeter Zijlstra u64 tsc_now; 2790ef95533SAlok Kataria 2800ef95533SAlok Kataria /* 2810ef95533SAlok Kataria * Fall back to jiffies if there's no TSC available: 2820ef95533SAlok Kataria * ( But note that we still use it if the TSC is marked 2830ef95533SAlok Kataria * unstable. We do this because unlike Time Of Day, 2840ef95533SAlok Kataria * the scheduler clock tolerates small errors and it's 2850ef95533SAlok Kataria * very important for it to be as fast as the platform 2863ad2f3fbSDaniel Mack * can achieve it. ) 2870ef95533SAlok Kataria */ 28810b033d4SPeter Zijlstra if (!static_key_false(&__use_tsc)) { 2890ef95533SAlok Kataria /* No locking but a rare wrong value is not a big deal: */ 2900ef95533SAlok Kataria return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); 2910ef95533SAlok Kataria } 2920ef95533SAlok Kataria 2930ef95533SAlok Kataria /* read the Time Stamp Counter: */ 29420d1c86aSPeter Zijlstra rdtscll(tsc_now); 2950ef95533SAlok Kataria 2960ef95533SAlok Kataria /* return the value in ns */ 29720d1c86aSPeter Zijlstra return cycles_2_ns(tsc_now); 2980ef95533SAlok Kataria } 2990ef95533SAlok Kataria 3000ef95533SAlok Kataria /* We need to define a real function for sched_clock, to override the 3010ef95533SAlok Kataria weak default version */ 3020ef95533SAlok Kataria #ifdef CONFIG_PARAVIRT 3030ef95533SAlok Kataria unsigned long long sched_clock(void) 3040ef95533SAlok Kataria { 3050ef95533SAlok Kataria return paravirt_sched_clock(); 3060ef95533SAlok Kataria } 3070ef95533SAlok Kataria #else 3080ef95533SAlok Kataria unsigned long long 3090ef95533SAlok Kataria sched_clock(void) __attribute__((alias("native_sched_clock"))); 3100ef95533SAlok Kataria #endif 3110ef95533SAlok Kataria 312ce37f400SDavid Vrabel unsigned long long native_read_tsc(void) 313ce37f400SDavid Vrabel { 314ce37f400SDavid Vrabel return __native_read_tsc(); 315ce37f400SDavid Vrabel } 316ce37f400SDavid Vrabel EXPORT_SYMBOL(native_read_tsc); 317ce37f400SDavid Vrabel 3180ef95533SAlok Kataria int check_tsc_unstable(void) 3190ef95533SAlok Kataria { 3200ef95533SAlok Kataria return tsc_unstable; 3210ef95533SAlok Kataria } 3220ef95533SAlok Kataria EXPORT_SYMBOL_GPL(check_tsc_unstable); 3230ef95533SAlok Kataria 324c73deb6aSAdrian Hunter int check_tsc_disabled(void) 325c73deb6aSAdrian Hunter { 326c73deb6aSAdrian Hunter return tsc_disabled; 327c73deb6aSAdrian Hunter } 328c73deb6aSAdrian Hunter EXPORT_SYMBOL_GPL(check_tsc_disabled); 329c73deb6aSAdrian Hunter 3300ef95533SAlok Kataria #ifdef CONFIG_X86_TSC 3310ef95533SAlok Kataria int __init notsc_setup(char *str) 3320ef95533SAlok Kataria { 333c767a54bSJoe Perches pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n"); 3340ef95533SAlok Kataria tsc_disabled = 1; 3350ef95533SAlok Kataria return 1; 3360ef95533SAlok Kataria } 3370ef95533SAlok Kataria #else 3380ef95533SAlok Kataria /* 3390ef95533SAlok Kataria * disable flag for tsc. Takes effect by clearing the TSC cpu flag 3400ef95533SAlok Kataria * in cpu/common.c 3410ef95533SAlok Kataria */ 3420ef95533SAlok Kataria int __init notsc_setup(char *str) 3430ef95533SAlok Kataria { 3440ef95533SAlok Kataria setup_clear_cpu_cap(X86_FEATURE_TSC); 3450ef95533SAlok Kataria return 1; 3460ef95533SAlok Kataria } 3470ef95533SAlok Kataria #endif 3480ef95533SAlok Kataria 3490ef95533SAlok Kataria __setup("notsc", notsc_setup); 350bfc0f594SAlok Kataria 351e82b8e4eSVenkatesh Pallipadi static int no_sched_irq_time; 352e82b8e4eSVenkatesh Pallipadi 353395628efSAlok Kataria static int __init tsc_setup(char *str) 354395628efSAlok Kataria { 355395628efSAlok Kataria if (!strcmp(str, "reliable")) 356395628efSAlok Kataria tsc_clocksource_reliable = 1; 357e82b8e4eSVenkatesh Pallipadi if (!strncmp(str, "noirqtime", 9)) 358e82b8e4eSVenkatesh Pallipadi no_sched_irq_time = 1; 359395628efSAlok Kataria return 1; 360395628efSAlok Kataria } 361395628efSAlok Kataria 362395628efSAlok Kataria __setup("tsc=", tsc_setup); 363395628efSAlok Kataria 364bfc0f594SAlok Kataria #define MAX_RETRIES 5 365bfc0f594SAlok Kataria #define SMI_TRESHOLD 50000 366bfc0f594SAlok Kataria 367bfc0f594SAlok Kataria /* 368bfc0f594SAlok Kataria * Read TSC and the reference counters. Take care of SMI disturbance 369bfc0f594SAlok Kataria */ 370827014beSThomas Gleixner static u64 tsc_read_refs(u64 *p, int hpet) 371bfc0f594SAlok Kataria { 372bfc0f594SAlok Kataria u64 t1, t2; 373bfc0f594SAlok Kataria int i; 374bfc0f594SAlok Kataria 375bfc0f594SAlok Kataria for (i = 0; i < MAX_RETRIES; i++) { 376bfc0f594SAlok Kataria t1 = get_cycles(); 377bfc0f594SAlok Kataria if (hpet) 378827014beSThomas Gleixner *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; 379bfc0f594SAlok Kataria else 380827014beSThomas Gleixner *p = acpi_pm_read_early(); 381bfc0f594SAlok Kataria t2 = get_cycles(); 382bfc0f594SAlok Kataria if ((t2 - t1) < SMI_TRESHOLD) 383bfc0f594SAlok Kataria return t2; 384bfc0f594SAlok Kataria } 385bfc0f594SAlok Kataria return ULLONG_MAX; 386bfc0f594SAlok Kataria } 387bfc0f594SAlok Kataria 388ec0c15afSLinus Torvalds /* 389d683ef7aSThomas Gleixner * Calculate the TSC frequency from HPET reference 390d683ef7aSThomas Gleixner */ 391d683ef7aSThomas Gleixner static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) 392d683ef7aSThomas Gleixner { 393d683ef7aSThomas Gleixner u64 tmp; 394d683ef7aSThomas Gleixner 395d683ef7aSThomas Gleixner if (hpet2 < hpet1) 396d683ef7aSThomas Gleixner hpet2 += 0x100000000ULL; 397d683ef7aSThomas Gleixner hpet2 -= hpet1; 398d683ef7aSThomas Gleixner tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); 399d683ef7aSThomas Gleixner do_div(tmp, 1000000); 400d683ef7aSThomas Gleixner do_div(deltatsc, tmp); 401d683ef7aSThomas Gleixner 402d683ef7aSThomas Gleixner return (unsigned long) deltatsc; 403d683ef7aSThomas Gleixner } 404d683ef7aSThomas Gleixner 405d683ef7aSThomas Gleixner /* 406d683ef7aSThomas Gleixner * Calculate the TSC frequency from PMTimer reference 407d683ef7aSThomas Gleixner */ 408d683ef7aSThomas Gleixner static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) 409d683ef7aSThomas Gleixner { 410d683ef7aSThomas Gleixner u64 tmp; 411d683ef7aSThomas Gleixner 412d683ef7aSThomas Gleixner if (!pm1 && !pm2) 413d683ef7aSThomas Gleixner return ULONG_MAX; 414d683ef7aSThomas Gleixner 415d683ef7aSThomas Gleixner if (pm2 < pm1) 416d683ef7aSThomas Gleixner pm2 += (u64)ACPI_PM_OVRRUN; 417d683ef7aSThomas Gleixner pm2 -= pm1; 418d683ef7aSThomas Gleixner tmp = pm2 * 1000000000LL; 419d683ef7aSThomas Gleixner do_div(tmp, PMTMR_TICKS_PER_SEC); 420d683ef7aSThomas Gleixner do_div(deltatsc, tmp); 421d683ef7aSThomas Gleixner 422d683ef7aSThomas Gleixner return (unsigned long) deltatsc; 423d683ef7aSThomas Gleixner } 424d683ef7aSThomas Gleixner 425a977c400SThomas Gleixner #define CAL_MS 10 426b7743970SDeepak Saxena #define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS)) 427a977c400SThomas Gleixner #define CAL_PIT_LOOPS 1000 428a977c400SThomas Gleixner 429a977c400SThomas Gleixner #define CAL2_MS 50 430b7743970SDeepak Saxena #define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS)) 431a977c400SThomas Gleixner #define CAL2_PIT_LOOPS 5000 432a977c400SThomas Gleixner 433cce3e057SThomas Gleixner 434ec0c15afSLinus Torvalds /* 435ec0c15afSLinus Torvalds * Try to calibrate the TSC against the Programmable 436ec0c15afSLinus Torvalds * Interrupt Timer and return the frequency of the TSC 437ec0c15afSLinus Torvalds * in kHz. 438ec0c15afSLinus Torvalds * 439ec0c15afSLinus Torvalds * Return ULONG_MAX on failure to calibrate. 440ec0c15afSLinus Torvalds */ 441a977c400SThomas Gleixner static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) 442ec0c15afSLinus Torvalds { 443ec0c15afSLinus Torvalds u64 tsc, t1, t2, delta; 444ec0c15afSLinus Torvalds unsigned long tscmin, tscmax; 445ec0c15afSLinus Torvalds int pitcnt; 446ec0c15afSLinus Torvalds 447ec0c15afSLinus Torvalds /* Set the Gate high, disable speaker */ 448ec0c15afSLinus Torvalds outb((inb(0x61) & ~0x02) | 0x01, 0x61); 449ec0c15afSLinus Torvalds 450ec0c15afSLinus Torvalds /* 451ec0c15afSLinus Torvalds * Setup CTC channel 2* for mode 0, (interrupt on terminal 452ec0c15afSLinus Torvalds * count mode), binary count. Set the latch register to 50ms 453ec0c15afSLinus Torvalds * (LSB then MSB) to begin countdown. 454ec0c15afSLinus Torvalds */ 455ec0c15afSLinus Torvalds outb(0xb0, 0x43); 456a977c400SThomas Gleixner outb(latch & 0xff, 0x42); 457a977c400SThomas Gleixner outb(latch >> 8, 0x42); 458ec0c15afSLinus Torvalds 459ec0c15afSLinus Torvalds tsc = t1 = t2 = get_cycles(); 460ec0c15afSLinus Torvalds 461ec0c15afSLinus Torvalds pitcnt = 0; 462ec0c15afSLinus Torvalds tscmax = 0; 463ec0c15afSLinus Torvalds tscmin = ULONG_MAX; 464ec0c15afSLinus Torvalds while ((inb(0x61) & 0x20) == 0) { 465ec0c15afSLinus Torvalds t2 = get_cycles(); 466ec0c15afSLinus Torvalds delta = t2 - tsc; 467ec0c15afSLinus Torvalds tsc = t2; 468ec0c15afSLinus Torvalds if ((unsigned long) delta < tscmin) 469ec0c15afSLinus Torvalds tscmin = (unsigned int) delta; 470ec0c15afSLinus Torvalds if ((unsigned long) delta > tscmax) 471ec0c15afSLinus Torvalds tscmax = (unsigned int) delta; 472ec0c15afSLinus Torvalds pitcnt++; 473ec0c15afSLinus Torvalds } 474ec0c15afSLinus Torvalds 475ec0c15afSLinus Torvalds /* 476ec0c15afSLinus Torvalds * Sanity checks: 477ec0c15afSLinus Torvalds * 478a977c400SThomas Gleixner * If we were not able to read the PIT more than loopmin 479ec0c15afSLinus Torvalds * times, then we have been hit by a massive SMI 480ec0c15afSLinus Torvalds * 481ec0c15afSLinus Torvalds * If the maximum is 10 times larger than the minimum, 482ec0c15afSLinus Torvalds * then we got hit by an SMI as well. 483ec0c15afSLinus Torvalds */ 484a977c400SThomas Gleixner if (pitcnt < loopmin || tscmax > 10 * tscmin) 485ec0c15afSLinus Torvalds return ULONG_MAX; 486ec0c15afSLinus Torvalds 487ec0c15afSLinus Torvalds /* Calculate the PIT value */ 488ec0c15afSLinus Torvalds delta = t2 - t1; 489a977c400SThomas Gleixner do_div(delta, ms); 490ec0c15afSLinus Torvalds return delta; 491ec0c15afSLinus Torvalds } 492ec0c15afSLinus Torvalds 4936ac40ed0SLinus Torvalds /* 4946ac40ed0SLinus Torvalds * This reads the current MSB of the PIT counter, and 4956ac40ed0SLinus Torvalds * checks if we are running on sufficiently fast and 4966ac40ed0SLinus Torvalds * non-virtualized hardware. 4976ac40ed0SLinus Torvalds * 4986ac40ed0SLinus Torvalds * Our expectations are: 4996ac40ed0SLinus Torvalds * 5006ac40ed0SLinus Torvalds * - the PIT is running at roughly 1.19MHz 5016ac40ed0SLinus Torvalds * 5026ac40ed0SLinus Torvalds * - each IO is going to take about 1us on real hardware, 5036ac40ed0SLinus Torvalds * but we allow it to be much faster (by a factor of 10) or 5046ac40ed0SLinus Torvalds * _slightly_ slower (ie we allow up to a 2us read+counter 5056ac40ed0SLinus Torvalds * update - anything else implies a unacceptably slow CPU 5066ac40ed0SLinus Torvalds * or PIT for the fast calibration to work. 5076ac40ed0SLinus Torvalds * 5086ac40ed0SLinus Torvalds * - with 256 PIT ticks to read the value, we have 214us to 5096ac40ed0SLinus Torvalds * see the same MSB (and overhead like doing a single TSC 5106ac40ed0SLinus Torvalds * read per MSB value etc). 5116ac40ed0SLinus Torvalds * 5126ac40ed0SLinus Torvalds * - We're doing 2 reads per loop (LSB, MSB), and we expect 5136ac40ed0SLinus Torvalds * them each to take about a microsecond on real hardware. 5146ac40ed0SLinus Torvalds * So we expect a count value of around 100. But we'll be 5156ac40ed0SLinus Torvalds * generous, and accept anything over 50. 5166ac40ed0SLinus Torvalds * 5176ac40ed0SLinus Torvalds * - if the PIT is stuck, and we see *many* more reads, we 5186ac40ed0SLinus Torvalds * return early (and the next caller of pit_expect_msb() 5196ac40ed0SLinus Torvalds * then consider it a failure when they don't see the 5206ac40ed0SLinus Torvalds * next expected value). 5216ac40ed0SLinus Torvalds * 5226ac40ed0SLinus Torvalds * These expectations mean that we know that we have seen the 5236ac40ed0SLinus Torvalds * transition from one expected value to another with a fairly 5246ac40ed0SLinus Torvalds * high accuracy, and we didn't miss any events. We can thus 5256ac40ed0SLinus Torvalds * use the TSC value at the transitions to calculate a pretty 5266ac40ed0SLinus Torvalds * good value for the TSC frequencty. 5276ac40ed0SLinus Torvalds */ 528b6e61eefSLinus Torvalds static inline int pit_verify_msb(unsigned char val) 529b6e61eefSLinus Torvalds { 530b6e61eefSLinus Torvalds /* Ignore LSB */ 531b6e61eefSLinus Torvalds inb(0x42); 532b6e61eefSLinus Torvalds return inb(0x42) == val; 533b6e61eefSLinus Torvalds } 534b6e61eefSLinus Torvalds 5359e8912e0SLinus Torvalds static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) 5366ac40ed0SLinus Torvalds { 5379e8912e0SLinus Torvalds int count; 53868f30fbeSLinus Torvalds u64 tsc = 0, prev_tsc = 0; 5396ac40ed0SLinus Torvalds 5406ac40ed0SLinus Torvalds for (count = 0; count < 50000; count++) { 541b6e61eefSLinus Torvalds if (!pit_verify_msb(val)) 5426ac40ed0SLinus Torvalds break; 54368f30fbeSLinus Torvalds prev_tsc = tsc; 5449e8912e0SLinus Torvalds tsc = get_cycles(); 5456ac40ed0SLinus Torvalds } 54668f30fbeSLinus Torvalds *deltap = get_cycles() - prev_tsc; 5479e8912e0SLinus Torvalds *tscp = tsc; 5489e8912e0SLinus Torvalds 5499e8912e0SLinus Torvalds /* 5509e8912e0SLinus Torvalds * We require _some_ success, but the quality control 5519e8912e0SLinus Torvalds * will be based on the error terms on the TSC values. 5529e8912e0SLinus Torvalds */ 5539e8912e0SLinus Torvalds return count > 5; 5546ac40ed0SLinus Torvalds } 5556ac40ed0SLinus Torvalds 5566ac40ed0SLinus Torvalds /* 5579e8912e0SLinus Torvalds * How many MSB values do we want to see? We aim for 5589e8912e0SLinus Torvalds * a maximum error rate of 500ppm (in practice the 5599e8912e0SLinus Torvalds * real error is much smaller), but refuse to spend 56068f30fbeSLinus Torvalds * more than 50ms on it. 5616ac40ed0SLinus Torvalds */ 56268f30fbeSLinus Torvalds #define MAX_QUICK_PIT_MS 50 5639e8912e0SLinus Torvalds #define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) 5646ac40ed0SLinus Torvalds 5656ac40ed0SLinus Torvalds static unsigned long quick_pit_calibrate(void) 5666ac40ed0SLinus Torvalds { 5679e8912e0SLinus Torvalds int i; 5689e8912e0SLinus Torvalds u64 tsc, delta; 5699e8912e0SLinus Torvalds unsigned long d1, d2; 5709e8912e0SLinus Torvalds 5716ac40ed0SLinus Torvalds /* Set the Gate high, disable speaker */ 5726ac40ed0SLinus Torvalds outb((inb(0x61) & ~0x02) | 0x01, 0x61); 5736ac40ed0SLinus Torvalds 5746ac40ed0SLinus Torvalds /* 5756ac40ed0SLinus Torvalds * Counter 2, mode 0 (one-shot), binary count 5766ac40ed0SLinus Torvalds * 5776ac40ed0SLinus Torvalds * NOTE! Mode 2 decrements by two (and then the 5786ac40ed0SLinus Torvalds * output is flipped each time, giving the same 5796ac40ed0SLinus Torvalds * final output frequency as a decrement-by-one), 5806ac40ed0SLinus Torvalds * so mode 0 is much better when looking at the 5816ac40ed0SLinus Torvalds * individual counts. 5826ac40ed0SLinus Torvalds */ 5836ac40ed0SLinus Torvalds outb(0xb0, 0x43); 5846ac40ed0SLinus Torvalds 5856ac40ed0SLinus Torvalds /* Start at 0xffff */ 5866ac40ed0SLinus Torvalds outb(0xff, 0x42); 5876ac40ed0SLinus Torvalds outb(0xff, 0x42); 5886ac40ed0SLinus Torvalds 589a6a80e1dSLinus Torvalds /* 590a6a80e1dSLinus Torvalds * The PIT starts counting at the next edge, so we 591a6a80e1dSLinus Torvalds * need to delay for a microsecond. The easiest way 592a6a80e1dSLinus Torvalds * to do that is to just read back the 16-bit counter 593a6a80e1dSLinus Torvalds * once from the PIT. 594a6a80e1dSLinus Torvalds */ 595b6e61eefSLinus Torvalds pit_verify_msb(0); 596a6a80e1dSLinus Torvalds 5979e8912e0SLinus Torvalds if (pit_expect_msb(0xff, &tsc, &d1)) { 5989e8912e0SLinus Torvalds for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { 5999e8912e0SLinus Torvalds if (!pit_expect_msb(0xff-i, &delta, &d2)) 6009e8912e0SLinus Torvalds break; 6016ac40ed0SLinus Torvalds 6026ac40ed0SLinus Torvalds /* 6039e8912e0SLinus Torvalds * Iterate until the error is less than 500 ppm 6044156e9a8SIngo Molnar */ 6059e8912e0SLinus Torvalds delta -= tsc; 606b6e61eefSLinus Torvalds if (d1+d2 >= delta >> 11) 607b6e61eefSLinus Torvalds continue; 608b6e61eefSLinus Torvalds 609b6e61eefSLinus Torvalds /* 610b6e61eefSLinus Torvalds * Check the PIT one more time to verify that 611b6e61eefSLinus Torvalds * all TSC reads were stable wrt the PIT. 612b6e61eefSLinus Torvalds * 613b6e61eefSLinus Torvalds * This also guarantees serialization of the 614b6e61eefSLinus Torvalds * last cycle read ('d2') in pit_expect_msb. 615b6e61eefSLinus Torvalds */ 616b6e61eefSLinus Torvalds if (!pit_verify_msb(0xfe - i)) 617b6e61eefSLinus Torvalds break; 6189e8912e0SLinus Torvalds goto success; 6199e8912e0SLinus Torvalds } 6209e8912e0SLinus Torvalds } 621c767a54bSJoe Perches pr_err("Fast TSC calibration failed\n"); 6229e8912e0SLinus Torvalds return 0; 6234156e9a8SIngo Molnar 6249e8912e0SLinus Torvalds success: 6254156e9a8SIngo Molnar /* 6266ac40ed0SLinus Torvalds * Ok, if we get here, then we've seen the 6279e8912e0SLinus Torvalds * MSB of the PIT decrement 'i' times, and the 6289e8912e0SLinus Torvalds * error has shrunk to less than 500 ppm. 6296ac40ed0SLinus Torvalds * 6306ac40ed0SLinus Torvalds * As a result, we can depend on there not being 6316ac40ed0SLinus Torvalds * any odd delays anywhere, and the TSC reads are 63268f30fbeSLinus Torvalds * reliable (within the error). 6336ac40ed0SLinus Torvalds * 6346ac40ed0SLinus Torvalds * kHz = ticks / time-in-seconds / 1000; 6359e8912e0SLinus Torvalds * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000 6369e8912e0SLinus Torvalds * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000) 6376ac40ed0SLinus Torvalds */ 6389e8912e0SLinus Torvalds delta *= PIT_TICK_RATE; 6399e8912e0SLinus Torvalds do_div(delta, i*256*1000); 640c767a54bSJoe Perches pr_info("Fast TSC calibration using PIT\n"); 6416ac40ed0SLinus Torvalds return delta; 6426ac40ed0SLinus Torvalds } 643ec0c15afSLinus Torvalds 644bfc0f594SAlok Kataria /** 645e93ef949SAlok Kataria * native_calibrate_tsc - calibrate the tsc on boot 646bfc0f594SAlok Kataria */ 647e93ef949SAlok Kataria unsigned long native_calibrate_tsc(void) 648bfc0f594SAlok Kataria { 649827014beSThomas Gleixner u64 tsc1, tsc2, delta, ref1, ref2; 650fbb16e24SThomas Gleixner unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; 6512d826404SThomas Gleixner unsigned long flags, latch, ms, fast_calibrate; 652a977c400SThomas Gleixner int hpet = is_hpet_enabled(), i, loopmin; 653bfc0f594SAlok Kataria 6547da7c156SBin Gao /* Calibrate TSC using MSR for Intel Atom SoCs */ 6557da7c156SBin Gao local_irq_save(flags); 6567da7c156SBin Gao i = try_msr_calibrate_tsc(&fast_calibrate); 6577da7c156SBin Gao local_irq_restore(flags); 6587da7c156SBin Gao if (i >= 0) { 6597da7c156SBin Gao if (i == 0) 6607da7c156SBin Gao pr_warn("Fast TSC calibration using MSR failed\n"); 6617da7c156SBin Gao return fast_calibrate; 6627da7c156SBin Gao } 6637da7c156SBin Gao 664bfc0f594SAlok Kataria local_irq_save(flags); 6656ac40ed0SLinus Torvalds fast_calibrate = quick_pit_calibrate(); 666bfc0f594SAlok Kataria local_irq_restore(flags); 6676ac40ed0SLinus Torvalds if (fast_calibrate) 6686ac40ed0SLinus Torvalds return fast_calibrate; 669fbb16e24SThomas Gleixner 670fbb16e24SThomas Gleixner /* 671fbb16e24SThomas Gleixner * Run 5 calibration loops to get the lowest frequency value 672fbb16e24SThomas Gleixner * (the best estimate). We use two different calibration modes 673fbb16e24SThomas Gleixner * here: 674fbb16e24SThomas Gleixner * 675fbb16e24SThomas Gleixner * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and 676fbb16e24SThomas Gleixner * load a timeout of 50ms. We read the time right after we 677fbb16e24SThomas Gleixner * started the timer and wait until the PIT count down reaches 678fbb16e24SThomas Gleixner * zero. In each wait loop iteration we read the TSC and check 679fbb16e24SThomas Gleixner * the delta to the previous read. We keep track of the min 680fbb16e24SThomas Gleixner * and max values of that delta. The delta is mostly defined 681fbb16e24SThomas Gleixner * by the IO time of the PIT access, so we can detect when a 6820d2eb44fSLucas De Marchi * SMI/SMM disturbance happened between the two reads. If the 683fbb16e24SThomas Gleixner * maximum time is significantly larger than the minimum time, 684fbb16e24SThomas Gleixner * then we discard the result and have another try. 685fbb16e24SThomas Gleixner * 686fbb16e24SThomas Gleixner * 2) Reference counter. If available we use the HPET or the 687fbb16e24SThomas Gleixner * PMTIMER as a reference to check the sanity of that value. 688fbb16e24SThomas Gleixner * We use separate TSC readouts and check inside of the 689fbb16e24SThomas Gleixner * reference read for a SMI/SMM disturbance. We dicard 690fbb16e24SThomas Gleixner * disturbed values here as well. We do that around the PIT 691fbb16e24SThomas Gleixner * calibration delay loop as we have to wait for a certain 692fbb16e24SThomas Gleixner * amount of time anyway. 693fbb16e24SThomas Gleixner */ 694a977c400SThomas Gleixner 695a977c400SThomas Gleixner /* Preset PIT loop values */ 696a977c400SThomas Gleixner latch = CAL_LATCH; 697a977c400SThomas Gleixner ms = CAL_MS; 698a977c400SThomas Gleixner loopmin = CAL_PIT_LOOPS; 699a977c400SThomas Gleixner 700a977c400SThomas Gleixner for (i = 0; i < 3; i++) { 701ec0c15afSLinus Torvalds unsigned long tsc_pit_khz; 702bfc0f594SAlok Kataria 703fbb16e24SThomas Gleixner /* 704fbb16e24SThomas Gleixner * Read the start value and the reference count of 705ec0c15afSLinus Torvalds * hpet/pmtimer when available. Then do the PIT 706ec0c15afSLinus Torvalds * calibration, which will take at least 50ms, and 707ec0c15afSLinus Torvalds * read the end value. 708fbb16e24SThomas Gleixner */ 709ec0c15afSLinus Torvalds local_irq_save(flags); 710827014beSThomas Gleixner tsc1 = tsc_read_refs(&ref1, hpet); 711a977c400SThomas Gleixner tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); 712827014beSThomas Gleixner tsc2 = tsc_read_refs(&ref2, hpet); 713bfc0f594SAlok Kataria local_irq_restore(flags); 714bfc0f594SAlok Kataria 715ec0c15afSLinus Torvalds /* Pick the lowest PIT TSC calibration so far */ 716ec0c15afSLinus Torvalds tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); 717bfc0f594SAlok Kataria 718bfc0f594SAlok Kataria /* hpet or pmtimer available ? */ 71962627becSJohn Stultz if (ref1 == ref2) 720fbb16e24SThomas Gleixner continue; 721bfc0f594SAlok Kataria 722bfc0f594SAlok Kataria /* Check, whether the sampling was disturbed by an SMI */ 723fbb16e24SThomas Gleixner if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) 724fbb16e24SThomas Gleixner continue; 725bfc0f594SAlok Kataria 726bfc0f594SAlok Kataria tsc2 = (tsc2 - tsc1) * 1000000LL; 727d683ef7aSThomas Gleixner if (hpet) 728827014beSThomas Gleixner tsc2 = calc_hpet_ref(tsc2, ref1, ref2); 729d683ef7aSThomas Gleixner else 730827014beSThomas Gleixner tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); 731bfc0f594SAlok Kataria 732fbb16e24SThomas Gleixner tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); 733a977c400SThomas Gleixner 734a977c400SThomas Gleixner /* Check the reference deviation */ 735a977c400SThomas Gleixner delta = ((u64) tsc_pit_min) * 100; 736a977c400SThomas Gleixner do_div(delta, tsc_ref_min); 737a977c400SThomas Gleixner 738a977c400SThomas Gleixner /* 739a977c400SThomas Gleixner * If both calibration results are inside a 10% window 740a977c400SThomas Gleixner * then we can be sure, that the calibration 741a977c400SThomas Gleixner * succeeded. We break out of the loop right away. We 742a977c400SThomas Gleixner * use the reference value, as it is more precise. 743a977c400SThomas Gleixner */ 744a977c400SThomas Gleixner if (delta >= 90 && delta <= 110) { 745c767a54bSJoe Perches pr_info("PIT calibration matches %s. %d loops\n", 746a977c400SThomas Gleixner hpet ? "HPET" : "PMTIMER", i + 1); 747a977c400SThomas Gleixner return tsc_ref_min; 748bfc0f594SAlok Kataria } 749bfc0f594SAlok Kataria 750a977c400SThomas Gleixner /* 751a977c400SThomas Gleixner * Check whether PIT failed more than once. This 752a977c400SThomas Gleixner * happens in virtualized environments. We need to 753a977c400SThomas Gleixner * give the virtual PC a slightly longer timeframe for 754a977c400SThomas Gleixner * the HPET/PMTIMER to make the result precise. 755a977c400SThomas Gleixner */ 756a977c400SThomas Gleixner if (i == 1 && tsc_pit_min == ULONG_MAX) { 757a977c400SThomas Gleixner latch = CAL2_LATCH; 758a977c400SThomas Gleixner ms = CAL2_MS; 759a977c400SThomas Gleixner loopmin = CAL2_PIT_LOOPS; 760a977c400SThomas Gleixner } 761bfc0f594SAlok Kataria } 762bfc0f594SAlok Kataria 763fbb16e24SThomas Gleixner /* 764fbb16e24SThomas Gleixner * Now check the results. 765fbb16e24SThomas Gleixner */ 766fbb16e24SThomas Gleixner if (tsc_pit_min == ULONG_MAX) { 767fbb16e24SThomas Gleixner /* PIT gave no useful value */ 768c767a54bSJoe Perches pr_warn("Unable to calibrate against PIT\n"); 769fbb16e24SThomas Gleixner 770fbb16e24SThomas Gleixner /* We don't have an alternative source, disable TSC */ 771827014beSThomas Gleixner if (!hpet && !ref1 && !ref2) { 772c767a54bSJoe Perches pr_notice("No reference (HPET/PMTIMER) available\n"); 773fbb16e24SThomas Gleixner return 0; 774fbb16e24SThomas Gleixner } 775fbb16e24SThomas Gleixner 776fbb16e24SThomas Gleixner /* The alternative source failed as well, disable TSC */ 777fbb16e24SThomas Gleixner if (tsc_ref_min == ULONG_MAX) { 778c767a54bSJoe Perches pr_warn("HPET/PMTIMER calibration failed\n"); 779fbb16e24SThomas Gleixner return 0; 780fbb16e24SThomas Gleixner } 781fbb16e24SThomas Gleixner 782fbb16e24SThomas Gleixner /* Use the alternative source */ 783c767a54bSJoe Perches pr_info("using %s reference calibration\n", 784fbb16e24SThomas Gleixner hpet ? "HPET" : "PMTIMER"); 785fbb16e24SThomas Gleixner 786fbb16e24SThomas Gleixner return tsc_ref_min; 787fbb16e24SThomas Gleixner } 788fbb16e24SThomas Gleixner 789fbb16e24SThomas Gleixner /* We don't have an alternative source, use the PIT calibration value */ 790827014beSThomas Gleixner if (!hpet && !ref1 && !ref2) { 791c767a54bSJoe Perches pr_info("Using PIT calibration value\n"); 792fbb16e24SThomas Gleixner return tsc_pit_min; 793fbb16e24SThomas Gleixner } 794fbb16e24SThomas Gleixner 795fbb16e24SThomas Gleixner /* The alternative source failed, use the PIT calibration value */ 796fbb16e24SThomas Gleixner if (tsc_ref_min == ULONG_MAX) { 797c767a54bSJoe Perches pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n"); 798fbb16e24SThomas Gleixner return tsc_pit_min; 799fbb16e24SThomas Gleixner } 800fbb16e24SThomas Gleixner 801fbb16e24SThomas Gleixner /* 802fbb16e24SThomas Gleixner * The calibration values differ too much. In doubt, we use 803fbb16e24SThomas Gleixner * the PIT value as we know that there are PMTIMERs around 804a977c400SThomas Gleixner * running at double speed. At least we let the user know: 805fbb16e24SThomas Gleixner */ 806c767a54bSJoe Perches pr_warn("PIT calibration deviates from %s: %lu %lu\n", 807a977c400SThomas Gleixner hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); 808c767a54bSJoe Perches pr_info("Using PIT calibration value\n"); 809fbb16e24SThomas Gleixner return tsc_pit_min; 810fbb16e24SThomas Gleixner } 811bfc0f594SAlok Kataria 812bfc0f594SAlok Kataria int recalibrate_cpu_khz(void) 813bfc0f594SAlok Kataria { 814bfc0f594SAlok Kataria #ifndef CONFIG_SMP 815bfc0f594SAlok Kataria unsigned long cpu_khz_old = cpu_khz; 816bfc0f594SAlok Kataria 817bfc0f594SAlok Kataria if (cpu_has_tsc) { 8182d826404SThomas Gleixner tsc_khz = x86_platform.calibrate_tsc(); 819e93ef949SAlok Kataria cpu_khz = tsc_khz; 820bfc0f594SAlok Kataria cpu_data(0).loops_per_jiffy = 821bfc0f594SAlok Kataria cpufreq_scale(cpu_data(0).loops_per_jiffy, 822bfc0f594SAlok Kataria cpu_khz_old, cpu_khz); 823bfc0f594SAlok Kataria return 0; 824bfc0f594SAlok Kataria } else 825bfc0f594SAlok Kataria return -ENODEV; 826bfc0f594SAlok Kataria #else 827bfc0f594SAlok Kataria return -ENODEV; 828bfc0f594SAlok Kataria #endif 829bfc0f594SAlok Kataria } 830bfc0f594SAlok Kataria 831bfc0f594SAlok Kataria EXPORT_SYMBOL(recalibrate_cpu_khz); 832bfc0f594SAlok Kataria 8332dbe06faSAlok Kataria 834cd7240c0SSuresh Siddha static unsigned long long cyc2ns_suspend; 835cd7240c0SSuresh Siddha 836b74f05d6SMarcelo Tosatti void tsc_save_sched_clock_state(void) 837cd7240c0SSuresh Siddha { 83835af99e6SPeter Zijlstra if (!sched_clock_stable()) 839cd7240c0SSuresh Siddha return; 840cd7240c0SSuresh Siddha 841cd7240c0SSuresh Siddha cyc2ns_suspend = sched_clock(); 842cd7240c0SSuresh Siddha } 843cd7240c0SSuresh Siddha 844cd7240c0SSuresh Siddha /* 845cd7240c0SSuresh Siddha * Even on processors with invariant TSC, TSC gets reset in some the 846cd7240c0SSuresh Siddha * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to 847cd7240c0SSuresh Siddha * arbitrary value (still sync'd across cpu's) during resume from such sleep 848cd7240c0SSuresh Siddha * states. To cope up with this, recompute the cyc2ns_offset for each cpu so 849cd7240c0SSuresh Siddha * that sched_clock() continues from the point where it was left off during 850cd7240c0SSuresh Siddha * suspend. 851cd7240c0SSuresh Siddha */ 852b74f05d6SMarcelo Tosatti void tsc_restore_sched_clock_state(void) 853cd7240c0SSuresh Siddha { 854cd7240c0SSuresh Siddha unsigned long long offset; 855cd7240c0SSuresh Siddha unsigned long flags; 856cd7240c0SSuresh Siddha int cpu; 857cd7240c0SSuresh Siddha 85835af99e6SPeter Zijlstra if (!sched_clock_stable()) 859cd7240c0SSuresh Siddha return; 860cd7240c0SSuresh Siddha 861cd7240c0SSuresh Siddha local_irq_save(flags); 862cd7240c0SSuresh Siddha 86320d1c86aSPeter Zijlstra /* 86420d1c86aSPeter Zijlstra * We're comming out of suspend, there's no concurrency yet; don't 86520d1c86aSPeter Zijlstra * bother being nice about the RCU stuff, just write to both 86620d1c86aSPeter Zijlstra * data fields. 86720d1c86aSPeter Zijlstra */ 86820d1c86aSPeter Zijlstra 86920d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0); 87020d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0); 87120d1c86aSPeter Zijlstra 872cd7240c0SSuresh Siddha offset = cyc2ns_suspend - sched_clock(); 873cd7240c0SSuresh Siddha 87420d1c86aSPeter Zijlstra for_each_possible_cpu(cpu) { 87520d1c86aSPeter Zijlstra per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset; 87620d1c86aSPeter Zijlstra per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset; 87720d1c86aSPeter Zijlstra } 878cd7240c0SSuresh Siddha 879cd7240c0SSuresh Siddha local_irq_restore(flags); 880cd7240c0SSuresh Siddha } 881cd7240c0SSuresh Siddha 8822dbe06faSAlok Kataria #ifdef CONFIG_CPU_FREQ 8832dbe06faSAlok Kataria 8842dbe06faSAlok Kataria /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency 8852dbe06faSAlok Kataria * changes. 8862dbe06faSAlok Kataria * 8872dbe06faSAlok Kataria * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's 8882dbe06faSAlok Kataria * not that important because current Opteron setups do not support 8892dbe06faSAlok Kataria * scaling on SMP anyroads. 8902dbe06faSAlok Kataria * 8912dbe06faSAlok Kataria * Should fix up last_tsc too. Currently gettimeofday in the 8922dbe06faSAlok Kataria * first tick after the change will be slightly wrong. 8932dbe06faSAlok Kataria */ 8942dbe06faSAlok Kataria 8952dbe06faSAlok Kataria static unsigned int ref_freq; 8962dbe06faSAlok Kataria static unsigned long loops_per_jiffy_ref; 8972dbe06faSAlok Kataria static unsigned long tsc_khz_ref; 8982dbe06faSAlok Kataria 8992dbe06faSAlok Kataria static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 9002dbe06faSAlok Kataria void *data) 9012dbe06faSAlok Kataria { 9022dbe06faSAlok Kataria struct cpufreq_freqs *freq = data; 903931db6a3SDave Jones unsigned long *lpj; 9042dbe06faSAlok Kataria 9052dbe06faSAlok Kataria if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) 9062dbe06faSAlok Kataria return 0; 9072dbe06faSAlok Kataria 9082dbe06faSAlok Kataria lpj = &boot_cpu_data.loops_per_jiffy; 909931db6a3SDave Jones #ifdef CONFIG_SMP 910931db6a3SDave Jones if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 911931db6a3SDave Jones lpj = &cpu_data(freq->cpu).loops_per_jiffy; 9122dbe06faSAlok Kataria #endif 9132dbe06faSAlok Kataria 9142dbe06faSAlok Kataria if (!ref_freq) { 9152dbe06faSAlok Kataria ref_freq = freq->old; 9162dbe06faSAlok Kataria loops_per_jiffy_ref = *lpj; 9172dbe06faSAlok Kataria tsc_khz_ref = tsc_khz; 9182dbe06faSAlok Kataria } 9192dbe06faSAlok Kataria if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || 9202dbe06faSAlok Kataria (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || 9212dbe06faSAlok Kataria (val == CPUFREQ_RESUMECHANGE)) { 9222dbe06faSAlok Kataria *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); 9232dbe06faSAlok Kataria 9242dbe06faSAlok Kataria tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); 9252dbe06faSAlok Kataria if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 9262dbe06faSAlok Kataria mark_tsc_unstable("cpufreq changes"); 9272dbe06faSAlok Kataria } 9282dbe06faSAlok Kataria 92952a8968cSPeter Zijlstra set_cyc2ns_scale(tsc_khz, freq->cpu); 9302dbe06faSAlok Kataria 9312dbe06faSAlok Kataria return 0; 9322dbe06faSAlok Kataria } 9332dbe06faSAlok Kataria 9342dbe06faSAlok Kataria static struct notifier_block time_cpufreq_notifier_block = { 9352dbe06faSAlok Kataria .notifier_call = time_cpufreq_notifier 9362dbe06faSAlok Kataria }; 9372dbe06faSAlok Kataria 9382dbe06faSAlok Kataria static int __init cpufreq_tsc(void) 9392dbe06faSAlok Kataria { 940060700b5SLinus Torvalds if (!cpu_has_tsc) 941060700b5SLinus Torvalds return 0; 942060700b5SLinus Torvalds if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 943060700b5SLinus Torvalds return 0; 9442dbe06faSAlok Kataria cpufreq_register_notifier(&time_cpufreq_notifier_block, 9452dbe06faSAlok Kataria CPUFREQ_TRANSITION_NOTIFIER); 9462dbe06faSAlok Kataria return 0; 9472dbe06faSAlok Kataria } 9482dbe06faSAlok Kataria 9492dbe06faSAlok Kataria core_initcall(cpufreq_tsc); 9502dbe06faSAlok Kataria 9512dbe06faSAlok Kataria #endif /* CONFIG_CPU_FREQ */ 9528fbbc4b4SAlok Kataria 9538fbbc4b4SAlok Kataria /* clocksource code */ 9548fbbc4b4SAlok Kataria 9558fbbc4b4SAlok Kataria static struct clocksource clocksource_tsc; 9568fbbc4b4SAlok Kataria 9578fbbc4b4SAlok Kataria /* 9588fbbc4b4SAlok Kataria * We compare the TSC to the cycle_last value in the clocksource 9598fbbc4b4SAlok Kataria * structure to avoid a nasty time-warp. This can be observed in a 9608fbbc4b4SAlok Kataria * very small window right after one CPU updated cycle_last under 9618fbbc4b4SAlok Kataria * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which 9628fbbc4b4SAlok Kataria * is smaller than the cycle_last reference value due to a TSC which 9638fbbc4b4SAlok Kataria * is slighty behind. This delta is nowhere else observable, but in 9648fbbc4b4SAlok Kataria * that case it results in a forward time jump in the range of hours 9658fbbc4b4SAlok Kataria * due to the unsigned delta calculation of the time keeping core 9668fbbc4b4SAlok Kataria * code, which is necessary to support wrapping clocksources like pm 9678fbbc4b4SAlok Kataria * timer. 9688fbbc4b4SAlok Kataria */ 9698e19608eSMagnus Damm static cycle_t read_tsc(struct clocksource *cs) 9708fbbc4b4SAlok Kataria { 9718fbbc4b4SAlok Kataria cycle_t ret = (cycle_t)get_cycles(); 9728fbbc4b4SAlok Kataria 9738fbbc4b4SAlok Kataria return ret >= clocksource_tsc.cycle_last ? 9748fbbc4b4SAlok Kataria ret : clocksource_tsc.cycle_last; 9758fbbc4b4SAlok Kataria } 9768fbbc4b4SAlok Kataria 97717622339SMagnus Damm static void resume_tsc(struct clocksource *cs) 9781be39679SMartin Schwidefsky { 97982f9c080SFeng Tang if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) 9801be39679SMartin Schwidefsky clocksource_tsc.cycle_last = 0; 9811be39679SMartin Schwidefsky } 9821be39679SMartin Schwidefsky 9838fbbc4b4SAlok Kataria static struct clocksource clocksource_tsc = { 9848fbbc4b4SAlok Kataria .name = "tsc", 9858fbbc4b4SAlok Kataria .rating = 300, 9868fbbc4b4SAlok Kataria .read = read_tsc, 9871be39679SMartin Schwidefsky .resume = resume_tsc, 9888fbbc4b4SAlok Kataria .mask = CLOCKSOURCE_MASK(64), 9898fbbc4b4SAlok Kataria .flags = CLOCK_SOURCE_IS_CONTINUOUS | 9908fbbc4b4SAlok Kataria CLOCK_SOURCE_MUST_VERIFY, 9918fbbc4b4SAlok Kataria #ifdef CONFIG_X86_64 99298d0ac38SAndy Lutomirski .archdata = { .vclock_mode = VCLOCK_TSC }, 9938fbbc4b4SAlok Kataria #endif 9948fbbc4b4SAlok Kataria }; 9958fbbc4b4SAlok Kataria 9968fbbc4b4SAlok Kataria void mark_tsc_unstable(char *reason) 9978fbbc4b4SAlok Kataria { 9988fbbc4b4SAlok Kataria if (!tsc_unstable) { 9998fbbc4b4SAlok Kataria tsc_unstable = 1; 100035af99e6SPeter Zijlstra clear_sched_clock_stable(); 1001e82b8e4eSVenkatesh Pallipadi disable_sched_clock_irqtime(); 1002c767a54bSJoe Perches pr_info("Marking TSC unstable due to %s\n", reason); 10038fbbc4b4SAlok Kataria /* Change only the rating, when not registered */ 10048fbbc4b4SAlok Kataria if (clocksource_tsc.mult) 10057285dd7fSThomas Gleixner clocksource_mark_unstable(&clocksource_tsc); 10067285dd7fSThomas Gleixner else { 10077285dd7fSThomas Gleixner clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; 10088fbbc4b4SAlok Kataria clocksource_tsc.rating = 0; 10098fbbc4b4SAlok Kataria } 10108fbbc4b4SAlok Kataria } 10117285dd7fSThomas Gleixner } 10128fbbc4b4SAlok Kataria 10138fbbc4b4SAlok Kataria EXPORT_SYMBOL_GPL(mark_tsc_unstable); 10148fbbc4b4SAlok Kataria 1015395628efSAlok Kataria static void __init check_system_tsc_reliable(void) 1016395628efSAlok Kataria { 10178fbbc4b4SAlok Kataria #ifdef CONFIG_MGEODE_LX 10188fbbc4b4SAlok Kataria /* RTSC counts during suspend */ 10198fbbc4b4SAlok Kataria #define RTSC_SUSP 0x100 10208fbbc4b4SAlok Kataria unsigned long res_low, res_high; 10218fbbc4b4SAlok Kataria 10228fbbc4b4SAlok Kataria rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); 102300097c4fSThadeu Lima de Souza Cascardo /* Geode_LX - the OLPC CPU has a very reliable TSC */ 10248fbbc4b4SAlok Kataria if (res_low & RTSC_SUSP) 1025395628efSAlok Kataria tsc_clocksource_reliable = 1; 10268fbbc4b4SAlok Kataria #endif 1027395628efSAlok Kataria if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) 1028395628efSAlok Kataria tsc_clocksource_reliable = 1; 1029395628efSAlok Kataria } 10308fbbc4b4SAlok Kataria 10318fbbc4b4SAlok Kataria /* 10328fbbc4b4SAlok Kataria * Make an educated guess if the TSC is trustworthy and synchronized 10338fbbc4b4SAlok Kataria * over all CPUs. 10348fbbc4b4SAlok Kataria */ 1035148f9bb8SPaul Gortmaker int unsynchronized_tsc(void) 10368fbbc4b4SAlok Kataria { 10378fbbc4b4SAlok Kataria if (!cpu_has_tsc || tsc_unstable) 10388fbbc4b4SAlok Kataria return 1; 10398fbbc4b4SAlok Kataria 10403e5095d1SIngo Molnar #ifdef CONFIG_SMP 10418fbbc4b4SAlok Kataria if (apic_is_clustered_box()) 10428fbbc4b4SAlok Kataria return 1; 10438fbbc4b4SAlok Kataria #endif 10448fbbc4b4SAlok Kataria 10458fbbc4b4SAlok Kataria if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 10468fbbc4b4SAlok Kataria return 0; 1047d3b8f889Sjohn stultz 1048d3b8f889Sjohn stultz if (tsc_clocksource_reliable) 1049d3b8f889Sjohn stultz return 0; 10508fbbc4b4SAlok Kataria /* 10518fbbc4b4SAlok Kataria * Intel systems are normally all synchronized. 10528fbbc4b4SAlok Kataria * Exceptions must mark TSC as unstable: 10538fbbc4b4SAlok Kataria */ 10548fbbc4b4SAlok Kataria if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { 10558fbbc4b4SAlok Kataria /* assume multi socket systems are not synchronized: */ 10568fbbc4b4SAlok Kataria if (num_possible_cpus() > 1) 1057d3b8f889Sjohn stultz return 1; 10588fbbc4b4SAlok Kataria } 10598fbbc4b4SAlok Kataria 1060d3b8f889Sjohn stultz return 0; 10618fbbc4b4SAlok Kataria } 10628fbbc4b4SAlok Kataria 106308ec0c58SJohn Stultz 106408ec0c58SJohn Stultz static void tsc_refine_calibration_work(struct work_struct *work); 106508ec0c58SJohn Stultz static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); 106608ec0c58SJohn Stultz /** 106708ec0c58SJohn Stultz * tsc_refine_calibration_work - Further refine tsc freq calibration 106808ec0c58SJohn Stultz * @work - ignored. 106908ec0c58SJohn Stultz * 107008ec0c58SJohn Stultz * This functions uses delayed work over a period of a 107108ec0c58SJohn Stultz * second to further refine the TSC freq value. Since this is 107208ec0c58SJohn Stultz * timer based, instead of loop based, we don't block the boot 107308ec0c58SJohn Stultz * process while this longer calibration is done. 107408ec0c58SJohn Stultz * 10750d2eb44fSLucas De Marchi * If there are any calibration anomalies (too many SMIs, etc), 107608ec0c58SJohn Stultz * or the refined calibration is off by 1% of the fast early 107708ec0c58SJohn Stultz * calibration, we throw out the new calibration and use the 107808ec0c58SJohn Stultz * early calibration. 107908ec0c58SJohn Stultz */ 108008ec0c58SJohn Stultz static void tsc_refine_calibration_work(struct work_struct *work) 108108ec0c58SJohn Stultz { 108208ec0c58SJohn Stultz static u64 tsc_start = -1, ref_start; 108308ec0c58SJohn Stultz static int hpet; 108408ec0c58SJohn Stultz u64 tsc_stop, ref_stop, delta; 108508ec0c58SJohn Stultz unsigned long freq; 108608ec0c58SJohn Stultz 108708ec0c58SJohn Stultz /* Don't bother refining TSC on unstable systems */ 108808ec0c58SJohn Stultz if (check_tsc_unstable()) 108908ec0c58SJohn Stultz goto out; 109008ec0c58SJohn Stultz 109108ec0c58SJohn Stultz /* 109208ec0c58SJohn Stultz * Since the work is started early in boot, we may be 109308ec0c58SJohn Stultz * delayed the first time we expire. So set the workqueue 109408ec0c58SJohn Stultz * again once we know timers are working. 109508ec0c58SJohn Stultz */ 109608ec0c58SJohn Stultz if (tsc_start == -1) { 109708ec0c58SJohn Stultz /* 109808ec0c58SJohn Stultz * Only set hpet once, to avoid mixing hardware 109908ec0c58SJohn Stultz * if the hpet becomes enabled later. 110008ec0c58SJohn Stultz */ 110108ec0c58SJohn Stultz hpet = is_hpet_enabled(); 110208ec0c58SJohn Stultz schedule_delayed_work(&tsc_irqwork, HZ); 110308ec0c58SJohn Stultz tsc_start = tsc_read_refs(&ref_start, hpet); 110408ec0c58SJohn Stultz return; 110508ec0c58SJohn Stultz } 110608ec0c58SJohn Stultz 110708ec0c58SJohn Stultz tsc_stop = tsc_read_refs(&ref_stop, hpet); 110808ec0c58SJohn Stultz 110908ec0c58SJohn Stultz /* hpet or pmtimer available ? */ 111062627becSJohn Stultz if (ref_start == ref_stop) 111108ec0c58SJohn Stultz goto out; 111208ec0c58SJohn Stultz 111308ec0c58SJohn Stultz /* Check, whether the sampling was disturbed by an SMI */ 111408ec0c58SJohn Stultz if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) 111508ec0c58SJohn Stultz goto out; 111608ec0c58SJohn Stultz 111708ec0c58SJohn Stultz delta = tsc_stop - tsc_start; 111808ec0c58SJohn Stultz delta *= 1000000LL; 111908ec0c58SJohn Stultz if (hpet) 112008ec0c58SJohn Stultz freq = calc_hpet_ref(delta, ref_start, ref_stop); 112108ec0c58SJohn Stultz else 112208ec0c58SJohn Stultz freq = calc_pmtimer_ref(delta, ref_start, ref_stop); 112308ec0c58SJohn Stultz 112408ec0c58SJohn Stultz /* Make sure we're within 1% */ 112508ec0c58SJohn Stultz if (abs(tsc_khz - freq) > tsc_khz/100) 112608ec0c58SJohn Stultz goto out; 112708ec0c58SJohn Stultz 112808ec0c58SJohn Stultz tsc_khz = freq; 1129c767a54bSJoe Perches pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n", 1130c767a54bSJoe Perches (unsigned long)tsc_khz / 1000, 113108ec0c58SJohn Stultz (unsigned long)tsc_khz % 1000); 113208ec0c58SJohn Stultz 113308ec0c58SJohn Stultz out: 113408ec0c58SJohn Stultz clocksource_register_khz(&clocksource_tsc, tsc_khz); 113508ec0c58SJohn Stultz } 113608ec0c58SJohn Stultz 113708ec0c58SJohn Stultz 113808ec0c58SJohn Stultz static int __init init_tsc_clocksource(void) 11398fbbc4b4SAlok Kataria { 114029fe359cSThomas Gleixner if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz) 1141a8760ecaSThomas Gleixner return 0; 1142a8760ecaSThomas Gleixner 1143395628efSAlok Kataria if (tsc_clocksource_reliable) 1144395628efSAlok Kataria clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 11458fbbc4b4SAlok Kataria /* lower the rating if we already know its unstable: */ 11468fbbc4b4SAlok Kataria if (check_tsc_unstable()) { 11478fbbc4b4SAlok Kataria clocksource_tsc.rating = 0; 11488fbbc4b4SAlok Kataria clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 11498fbbc4b4SAlok Kataria } 115057779dc2SAlok Kataria 115182f9c080SFeng Tang if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) 115282f9c080SFeng Tang clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; 115382f9c080SFeng Tang 115457779dc2SAlok Kataria /* 115557779dc2SAlok Kataria * Trust the results of the earlier calibration on systems 115657779dc2SAlok Kataria * exporting a reliable TSC. 115757779dc2SAlok Kataria */ 115857779dc2SAlok Kataria if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { 115957779dc2SAlok Kataria clocksource_register_khz(&clocksource_tsc, tsc_khz); 116057779dc2SAlok Kataria return 0; 116157779dc2SAlok Kataria } 116257779dc2SAlok Kataria 116308ec0c58SJohn Stultz schedule_delayed_work(&tsc_irqwork, 0); 116408ec0c58SJohn Stultz return 0; 11658fbbc4b4SAlok Kataria } 116608ec0c58SJohn Stultz /* 116708ec0c58SJohn Stultz * We use device_initcall here, to ensure we run after the hpet 116808ec0c58SJohn Stultz * is fully initialized, which may occur at fs_initcall time. 116908ec0c58SJohn Stultz */ 117008ec0c58SJohn Stultz device_initcall(init_tsc_clocksource); 11718fbbc4b4SAlok Kataria 11728fbbc4b4SAlok Kataria void __init tsc_init(void) 11738fbbc4b4SAlok Kataria { 11748fbbc4b4SAlok Kataria u64 lpj; 11758fbbc4b4SAlok Kataria int cpu; 11768fbbc4b4SAlok Kataria 1177845b3944SThomas Gleixner x86_init.timers.tsc_pre_init(); 1178845b3944SThomas Gleixner 11798fbbc4b4SAlok Kataria if (!cpu_has_tsc) 11808fbbc4b4SAlok Kataria return; 11818fbbc4b4SAlok Kataria 11822d826404SThomas Gleixner tsc_khz = x86_platform.calibrate_tsc(); 1183e93ef949SAlok Kataria cpu_khz = tsc_khz; 11848fbbc4b4SAlok Kataria 1185e93ef949SAlok Kataria if (!tsc_khz) { 11868fbbc4b4SAlok Kataria mark_tsc_unstable("could not calculate TSC khz"); 11878fbbc4b4SAlok Kataria return; 11888fbbc4b4SAlok Kataria } 11898fbbc4b4SAlok Kataria 1190c767a54bSJoe Perches pr_info("Detected %lu.%03lu MHz processor\n", 11918fbbc4b4SAlok Kataria (unsigned long)cpu_khz / 1000, 11928fbbc4b4SAlok Kataria (unsigned long)cpu_khz % 1000); 11938fbbc4b4SAlok Kataria 11948fbbc4b4SAlok Kataria /* 11958fbbc4b4SAlok Kataria * Secondary CPUs do not run through tsc_init(), so set up 11968fbbc4b4SAlok Kataria * all the scale factors for all CPUs, assuming the same 11978fbbc4b4SAlok Kataria * speed as the bootup CPU. (cpufreq notifiers will fix this 11988fbbc4b4SAlok Kataria * up if their speed diverges) 11998fbbc4b4SAlok Kataria */ 120020d1c86aSPeter Zijlstra for_each_possible_cpu(cpu) { 120120d1c86aSPeter Zijlstra cyc2ns_init(cpu); 12028fbbc4b4SAlok Kataria set_cyc2ns_scale(cpu_khz, cpu); 120320d1c86aSPeter Zijlstra } 12048fbbc4b4SAlok Kataria 12058fbbc4b4SAlok Kataria if (tsc_disabled > 0) 12068fbbc4b4SAlok Kataria return; 12078fbbc4b4SAlok Kataria 12088fbbc4b4SAlok Kataria /* now allow native_sched_clock() to use rdtsc */ 120910b033d4SPeter Zijlstra 12108fbbc4b4SAlok Kataria tsc_disabled = 0; 121110b033d4SPeter Zijlstra static_key_slow_inc(&__use_tsc); 12128fbbc4b4SAlok Kataria 1213e82b8e4eSVenkatesh Pallipadi if (!no_sched_irq_time) 1214e82b8e4eSVenkatesh Pallipadi enable_sched_clock_irqtime(); 1215e82b8e4eSVenkatesh Pallipadi 121670de9a97SAlok Kataria lpj = ((u64)tsc_khz * 1000); 121770de9a97SAlok Kataria do_div(lpj, HZ); 121870de9a97SAlok Kataria lpj_fine = lpj; 121970de9a97SAlok Kataria 12208fbbc4b4SAlok Kataria use_tsc_delay(); 12218fbbc4b4SAlok Kataria 12228fbbc4b4SAlok Kataria if (unsynchronized_tsc()) 12238fbbc4b4SAlok Kataria mark_tsc_unstable("TSCs unsynchronized"); 12248fbbc4b4SAlok Kataria 1225395628efSAlok Kataria check_system_tsc_reliable(); 12268fbbc4b4SAlok Kataria } 12278fbbc4b4SAlok Kataria 1228b565201cSJack Steiner #ifdef CONFIG_SMP 1229b565201cSJack Steiner /* 1230b565201cSJack Steiner * If we have a constant TSC and are using the TSC for the delay loop, 1231b565201cSJack Steiner * we can skip clock calibration if another cpu in the same socket has already 1232b565201cSJack Steiner * been calibrated. This assumes that CONSTANT_TSC applies to all 1233b565201cSJack Steiner * cpus in the socket - this should be a safe assumption. 1234b565201cSJack Steiner */ 1235148f9bb8SPaul Gortmaker unsigned long calibrate_delay_is_known(void) 1236b565201cSJack Steiner { 1237b565201cSJack Steiner int i, cpu = smp_processor_id(); 1238b565201cSJack Steiner 1239b565201cSJack Steiner if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) 1240b565201cSJack Steiner return 0; 1241b565201cSJack Steiner 1242b565201cSJack Steiner for_each_online_cpu(i) 1243b565201cSJack Steiner if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id) 1244b565201cSJack Steiner return cpu_data(i).loops_per_jiffy; 1245b565201cSJack Steiner return 0; 1246b565201cSJack Steiner } 1247b565201cSJack Steiner #endif 1248