1c767a54bSJoe Perches #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 2c767a54bSJoe Perches 3bfc0f594SAlok Kataria #include <linux/kernel.h> 40ef95533SAlok Kataria #include <linux/sched.h> 50ef95533SAlok Kataria #include <linux/init.h> 60ef95533SAlok Kataria #include <linux/module.h> 70ef95533SAlok Kataria #include <linux/timer.h> 8bfc0f594SAlok Kataria #include <linux/acpi_pmtmr.h> 92dbe06faSAlok Kataria #include <linux/cpufreq.h> 108fbbc4b4SAlok Kataria #include <linux/delay.h> 118fbbc4b4SAlok Kataria #include <linux/clocksource.h> 128fbbc4b4SAlok Kataria #include <linux/percpu.h> 1308604bd9SArnd Bergmann #include <linux/timex.h> 1410b033d4SPeter Zijlstra #include <linux/static_key.h> 15bfc0f594SAlok Kataria 16bfc0f594SAlok Kataria #include <asm/hpet.h> 178fbbc4b4SAlok Kataria #include <asm/timer.h> 188fbbc4b4SAlok Kataria #include <asm/vgtod.h> 198fbbc4b4SAlok Kataria #include <asm/time.h> 208fbbc4b4SAlok Kataria #include <asm/delay.h> 2188b094fbSAlok Kataria #include <asm/hypervisor.h> 2208047c4fSThomas Gleixner #include <asm/nmi.h> 232d826404SThomas Gleixner #include <asm/x86_init.h> 240ef95533SAlok Kataria 25f24ade3aSIngo Molnar unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ 260ef95533SAlok Kataria EXPORT_SYMBOL(cpu_khz); 27f24ade3aSIngo Molnar 28f24ade3aSIngo Molnar unsigned int __read_mostly tsc_khz; 290ef95533SAlok Kataria EXPORT_SYMBOL(tsc_khz); 300ef95533SAlok Kataria 310ef95533SAlok Kataria /* 320ef95533SAlok Kataria * TSC can be unstable due to cpufreq or due to unsynced TSCs 330ef95533SAlok Kataria */ 34f24ade3aSIngo Molnar static int __read_mostly tsc_unstable; 350ef95533SAlok Kataria 360ef95533SAlok Kataria /* native_sched_clock() is called before tsc_init(), so 370ef95533SAlok Kataria we must start with the TSC soft disabled to prevent 380ef95533SAlok Kataria erroneous rdtsc usage on !cpu_has_tsc processors */ 39f24ade3aSIngo Molnar static int __read_mostly tsc_disabled = -1; 400ef95533SAlok Kataria 4110b033d4SPeter Zijlstra static struct static_key __use_tsc = STATIC_KEY_INIT; 4210b033d4SPeter Zijlstra 4328a00184SSuresh Siddha int tsc_clocksource_reliable; 4457c67da2SPeter Zijlstra 4520d1c86aSPeter Zijlstra /* 4620d1c86aSPeter Zijlstra * Use a ring-buffer like data structure, where a writer advances the head by 4720d1c86aSPeter Zijlstra * writing a new data entry and a reader advances the tail when it observes a 4820d1c86aSPeter Zijlstra * new entry. 4920d1c86aSPeter Zijlstra * 5020d1c86aSPeter Zijlstra * Writers are made to wait on readers until there's space to write a new 5120d1c86aSPeter Zijlstra * entry. 5220d1c86aSPeter Zijlstra * 5320d1c86aSPeter Zijlstra * This means that we can always use an {offset, mul} pair to compute a ns 5420d1c86aSPeter Zijlstra * value that is 'roughly' in the right direction, even if we're writing a new 5520d1c86aSPeter Zijlstra * {offset, mul} pair during the clock read. 5620d1c86aSPeter Zijlstra * 5720d1c86aSPeter Zijlstra * The down-side is that we can no longer guarantee strict monotonicity anymore 5820d1c86aSPeter Zijlstra * (assuming the TSC was that to begin with), because while we compute the 5920d1c86aSPeter Zijlstra * intersection point of the two clock slopes and make sure the time is 6020d1c86aSPeter Zijlstra * continuous at the point of switching; we can no longer guarantee a reader is 6120d1c86aSPeter Zijlstra * strictly before or after the switch point. 6220d1c86aSPeter Zijlstra * 6320d1c86aSPeter Zijlstra * It does mean a reader no longer needs to disable IRQs in order to avoid 6420d1c86aSPeter Zijlstra * CPU-Freq updates messing with his times, and similarly an NMI reader will 6520d1c86aSPeter Zijlstra * no longer run the risk of hitting half-written state. 6620d1c86aSPeter Zijlstra */ 6720d1c86aSPeter Zijlstra 6820d1c86aSPeter Zijlstra struct cyc2ns { 6920d1c86aSPeter Zijlstra struct cyc2ns_data data[2]; /* 0 + 2*24 = 48 */ 7020d1c86aSPeter Zijlstra struct cyc2ns_data *head; /* 48 + 8 = 56 */ 7120d1c86aSPeter Zijlstra struct cyc2ns_data *tail; /* 56 + 8 = 64 */ 7220d1c86aSPeter Zijlstra }; /* exactly fits one cacheline */ 7320d1c86aSPeter Zijlstra 7420d1c86aSPeter Zijlstra static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); 7520d1c86aSPeter Zijlstra 7620d1c86aSPeter Zijlstra struct cyc2ns_data *cyc2ns_read_begin(void) 7720d1c86aSPeter Zijlstra { 7820d1c86aSPeter Zijlstra struct cyc2ns_data *head; 7920d1c86aSPeter Zijlstra 8020d1c86aSPeter Zijlstra preempt_disable(); 8120d1c86aSPeter Zijlstra 8220d1c86aSPeter Zijlstra head = this_cpu_read(cyc2ns.head); 8320d1c86aSPeter Zijlstra /* 8420d1c86aSPeter Zijlstra * Ensure we observe the entry when we observe the pointer to it. 8520d1c86aSPeter Zijlstra * matches the wmb from cyc2ns_write_end(). 8620d1c86aSPeter Zijlstra */ 8720d1c86aSPeter Zijlstra smp_read_barrier_depends(); 8820d1c86aSPeter Zijlstra head->__count++; 8920d1c86aSPeter Zijlstra barrier(); 9020d1c86aSPeter Zijlstra 9120d1c86aSPeter Zijlstra return head; 9220d1c86aSPeter Zijlstra } 9320d1c86aSPeter Zijlstra 9420d1c86aSPeter Zijlstra void cyc2ns_read_end(struct cyc2ns_data *head) 9520d1c86aSPeter Zijlstra { 9620d1c86aSPeter Zijlstra barrier(); 9720d1c86aSPeter Zijlstra /* 9820d1c86aSPeter Zijlstra * If we're the outer most nested read; update the tail pointer 9920d1c86aSPeter Zijlstra * when we're done. This notifies possible pending writers 10020d1c86aSPeter Zijlstra * that we've observed the head pointer and that the other 10120d1c86aSPeter Zijlstra * entry is now free. 10220d1c86aSPeter Zijlstra */ 10320d1c86aSPeter Zijlstra if (!--head->__count) { 10420d1c86aSPeter Zijlstra /* 10520d1c86aSPeter Zijlstra * x86-TSO does not reorder writes with older reads; 10620d1c86aSPeter Zijlstra * therefore once this write becomes visible to another 10720d1c86aSPeter Zijlstra * cpu, we must be finished reading the cyc2ns_data. 10820d1c86aSPeter Zijlstra * 10920d1c86aSPeter Zijlstra * matches with cyc2ns_write_begin(). 11020d1c86aSPeter Zijlstra */ 11120d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.tail, head); 11220d1c86aSPeter Zijlstra } 11320d1c86aSPeter Zijlstra preempt_enable(); 11420d1c86aSPeter Zijlstra } 11520d1c86aSPeter Zijlstra 11620d1c86aSPeter Zijlstra /* 11720d1c86aSPeter Zijlstra * Begin writing a new @data entry for @cpu. 11820d1c86aSPeter Zijlstra * 11920d1c86aSPeter Zijlstra * Assumes some sort of write side lock; currently 'provided' by the assumption 12020d1c86aSPeter Zijlstra * that cpufreq will call its notifiers sequentially. 12120d1c86aSPeter Zijlstra */ 12220d1c86aSPeter Zijlstra static struct cyc2ns_data *cyc2ns_write_begin(int cpu) 12320d1c86aSPeter Zijlstra { 12420d1c86aSPeter Zijlstra struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); 12520d1c86aSPeter Zijlstra struct cyc2ns_data *data = c2n->data; 12620d1c86aSPeter Zijlstra 12720d1c86aSPeter Zijlstra if (data == c2n->head) 12820d1c86aSPeter Zijlstra data++; 12920d1c86aSPeter Zijlstra 13020d1c86aSPeter Zijlstra /* XXX send an IPI to @cpu in order to guarantee a read? */ 13120d1c86aSPeter Zijlstra 13220d1c86aSPeter Zijlstra /* 13320d1c86aSPeter Zijlstra * When we observe the tail write from cyc2ns_read_end(), 13420d1c86aSPeter Zijlstra * the cpu must be done with that entry and its safe 13520d1c86aSPeter Zijlstra * to start writing to it. 13620d1c86aSPeter Zijlstra */ 13720d1c86aSPeter Zijlstra while (c2n->tail == data) 13820d1c86aSPeter Zijlstra cpu_relax(); 13920d1c86aSPeter Zijlstra 14020d1c86aSPeter Zijlstra return data; 14120d1c86aSPeter Zijlstra } 14220d1c86aSPeter Zijlstra 14320d1c86aSPeter Zijlstra static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data) 14420d1c86aSPeter Zijlstra { 14520d1c86aSPeter Zijlstra struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); 14620d1c86aSPeter Zijlstra 14720d1c86aSPeter Zijlstra /* 14820d1c86aSPeter Zijlstra * Ensure the @data writes are visible before we publish the 14920d1c86aSPeter Zijlstra * entry. Matches the data-depencency in cyc2ns_read_begin(). 15020d1c86aSPeter Zijlstra */ 15120d1c86aSPeter Zijlstra smp_wmb(); 15220d1c86aSPeter Zijlstra 15320d1c86aSPeter Zijlstra ACCESS_ONCE(c2n->head) = data; 15420d1c86aSPeter Zijlstra } 15520d1c86aSPeter Zijlstra 15620d1c86aSPeter Zijlstra /* 15720d1c86aSPeter Zijlstra * Accelerators for sched_clock() 15857c67da2SPeter Zijlstra * convert from cycles(64bits) => nanoseconds (64bits) 15957c67da2SPeter Zijlstra * basic equation: 16057c67da2SPeter Zijlstra * ns = cycles / (freq / ns_per_sec) 16157c67da2SPeter Zijlstra * ns = cycles * (ns_per_sec / freq) 16257c67da2SPeter Zijlstra * ns = cycles * (10^9 / (cpu_khz * 10^3)) 16357c67da2SPeter Zijlstra * ns = cycles * (10^6 / cpu_khz) 16457c67da2SPeter Zijlstra * 16557c67da2SPeter Zijlstra * Then we use scaling math (suggested by george@mvista.com) to get: 16657c67da2SPeter Zijlstra * ns = cycles * (10^6 * SC / cpu_khz) / SC 16757c67da2SPeter Zijlstra * ns = cycles * cyc2ns_scale / SC 16857c67da2SPeter Zijlstra * 16957c67da2SPeter Zijlstra * And since SC is a constant power of two, we can convert the div 17057c67da2SPeter Zijlstra * into a shift. 17157c67da2SPeter Zijlstra * 17257c67da2SPeter Zijlstra * We can use khz divisor instead of mhz to keep a better precision, since 17357c67da2SPeter Zijlstra * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. 17457c67da2SPeter Zijlstra * (mathieu.desnoyers@polymtl.ca) 17557c67da2SPeter Zijlstra * 17657c67da2SPeter Zijlstra * -johnstul@us.ibm.com "math is hard, lets go shopping!" 17757c67da2SPeter Zijlstra */ 17857c67da2SPeter Zijlstra 17957c67da2SPeter Zijlstra #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ 18057c67da2SPeter Zijlstra 18120d1c86aSPeter Zijlstra static void cyc2ns_data_init(struct cyc2ns_data *data) 18220d1c86aSPeter Zijlstra { 1835e3c1afdSPeter Zijlstra data->cyc2ns_mul = 0; 18420d1c86aSPeter Zijlstra data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; 18520d1c86aSPeter Zijlstra data->cyc2ns_offset = 0; 18620d1c86aSPeter Zijlstra data->__count = 0; 18720d1c86aSPeter Zijlstra } 18820d1c86aSPeter Zijlstra 18920d1c86aSPeter Zijlstra static void cyc2ns_init(int cpu) 19020d1c86aSPeter Zijlstra { 19120d1c86aSPeter Zijlstra struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); 19220d1c86aSPeter Zijlstra 19320d1c86aSPeter Zijlstra cyc2ns_data_init(&c2n->data[0]); 19420d1c86aSPeter Zijlstra cyc2ns_data_init(&c2n->data[1]); 19520d1c86aSPeter Zijlstra 19620d1c86aSPeter Zijlstra c2n->head = c2n->data; 19720d1c86aSPeter Zijlstra c2n->tail = c2n->data; 19820d1c86aSPeter Zijlstra } 19920d1c86aSPeter Zijlstra 20057c67da2SPeter Zijlstra static inline unsigned long long cycles_2_ns(unsigned long long cyc) 20157c67da2SPeter Zijlstra { 20220d1c86aSPeter Zijlstra struct cyc2ns_data *data, *tail; 20320d1c86aSPeter Zijlstra unsigned long long ns; 20420d1c86aSPeter Zijlstra 20520d1c86aSPeter Zijlstra /* 20620d1c86aSPeter Zijlstra * See cyc2ns_read_*() for details; replicated in order to avoid 20720d1c86aSPeter Zijlstra * an extra few instructions that came with the abstraction. 20820d1c86aSPeter Zijlstra * Notable, it allows us to only do the __count and tail update 20920d1c86aSPeter Zijlstra * dance when its actually needed. 21020d1c86aSPeter Zijlstra */ 21120d1c86aSPeter Zijlstra 212569d6557SSteven Rostedt preempt_disable_notrace(); 21320d1c86aSPeter Zijlstra data = this_cpu_read(cyc2ns.head); 21420d1c86aSPeter Zijlstra tail = this_cpu_read(cyc2ns.tail); 21520d1c86aSPeter Zijlstra 21620d1c86aSPeter Zijlstra if (likely(data == tail)) { 21720d1c86aSPeter Zijlstra ns = data->cyc2ns_offset; 21820d1c86aSPeter Zijlstra ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); 21920d1c86aSPeter Zijlstra } else { 22020d1c86aSPeter Zijlstra data->__count++; 22120d1c86aSPeter Zijlstra 22220d1c86aSPeter Zijlstra barrier(); 22320d1c86aSPeter Zijlstra 22420d1c86aSPeter Zijlstra ns = data->cyc2ns_offset; 22520d1c86aSPeter Zijlstra ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); 22620d1c86aSPeter Zijlstra 22720d1c86aSPeter Zijlstra barrier(); 22820d1c86aSPeter Zijlstra 22920d1c86aSPeter Zijlstra if (!--data->__count) 23020d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.tail, data); 23120d1c86aSPeter Zijlstra } 232569d6557SSteven Rostedt preempt_enable_notrace(); 23320d1c86aSPeter Zijlstra 23457c67da2SPeter Zijlstra return ns; 23557c67da2SPeter Zijlstra } 23657c67da2SPeter Zijlstra 23757c67da2SPeter Zijlstra static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) 23857c67da2SPeter Zijlstra { 23920d1c86aSPeter Zijlstra unsigned long long tsc_now, ns_now; 24020d1c86aSPeter Zijlstra struct cyc2ns_data *data; 24120d1c86aSPeter Zijlstra unsigned long flags; 24257c67da2SPeter Zijlstra 24357c67da2SPeter Zijlstra local_irq_save(flags); 24457c67da2SPeter Zijlstra sched_clock_idle_sleep_event(); 24557c67da2SPeter Zijlstra 24620d1c86aSPeter Zijlstra if (!cpu_khz) 24720d1c86aSPeter Zijlstra goto done; 24820d1c86aSPeter Zijlstra 24920d1c86aSPeter Zijlstra data = cyc2ns_write_begin(cpu); 25057c67da2SPeter Zijlstra 25157c67da2SPeter Zijlstra rdtscll(tsc_now); 25257c67da2SPeter Zijlstra ns_now = cycles_2_ns(tsc_now); 25357c67da2SPeter Zijlstra 25420d1c86aSPeter Zijlstra /* 25520d1c86aSPeter Zijlstra * Compute a new multiplier as per the above comment and ensure our 25620d1c86aSPeter Zijlstra * time function is continuous; see the comment near struct 25720d1c86aSPeter Zijlstra * cyc2ns_data. 25820d1c86aSPeter Zijlstra */ 25989171579SMichal Nazarewicz data->cyc2ns_mul = 26089171579SMichal Nazarewicz DIV_ROUND_CLOSEST(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR, 26189171579SMichal Nazarewicz cpu_khz); 26220d1c86aSPeter Zijlstra data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; 26320d1c86aSPeter Zijlstra data->cyc2ns_offset = ns_now - 26420d1c86aSPeter Zijlstra mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); 26557c67da2SPeter Zijlstra 26620d1c86aSPeter Zijlstra cyc2ns_write_end(cpu, data); 26720d1c86aSPeter Zijlstra 26820d1c86aSPeter Zijlstra done: 26957c67da2SPeter Zijlstra sched_clock_idle_wakeup_event(0); 27057c67da2SPeter Zijlstra local_irq_restore(flags); 27157c67da2SPeter Zijlstra } 2720ef95533SAlok Kataria /* 2730ef95533SAlok Kataria * Scheduler clock - returns current time in nanosec units. 2740ef95533SAlok Kataria */ 2750ef95533SAlok Kataria u64 native_sched_clock(void) 2760ef95533SAlok Kataria { 27720d1c86aSPeter Zijlstra u64 tsc_now; 2780ef95533SAlok Kataria 2790ef95533SAlok Kataria /* 2800ef95533SAlok Kataria * Fall back to jiffies if there's no TSC available: 2810ef95533SAlok Kataria * ( But note that we still use it if the TSC is marked 2820ef95533SAlok Kataria * unstable. We do this because unlike Time Of Day, 2830ef95533SAlok Kataria * the scheduler clock tolerates small errors and it's 2840ef95533SAlok Kataria * very important for it to be as fast as the platform 2853ad2f3fbSDaniel Mack * can achieve it. ) 2860ef95533SAlok Kataria */ 28710b033d4SPeter Zijlstra if (!static_key_false(&__use_tsc)) { 2880ef95533SAlok Kataria /* No locking but a rare wrong value is not a big deal: */ 2890ef95533SAlok Kataria return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); 2900ef95533SAlok Kataria } 2910ef95533SAlok Kataria 2920ef95533SAlok Kataria /* read the Time Stamp Counter: */ 29320d1c86aSPeter Zijlstra rdtscll(tsc_now); 2940ef95533SAlok Kataria 2950ef95533SAlok Kataria /* return the value in ns */ 29620d1c86aSPeter Zijlstra return cycles_2_ns(tsc_now); 2970ef95533SAlok Kataria } 2980ef95533SAlok Kataria 2990ef95533SAlok Kataria /* We need to define a real function for sched_clock, to override the 3000ef95533SAlok Kataria weak default version */ 3010ef95533SAlok Kataria #ifdef CONFIG_PARAVIRT 3020ef95533SAlok Kataria unsigned long long sched_clock(void) 3030ef95533SAlok Kataria { 3040ef95533SAlok Kataria return paravirt_sched_clock(); 3050ef95533SAlok Kataria } 3060ef95533SAlok Kataria #else 3070ef95533SAlok Kataria unsigned long long 3080ef95533SAlok Kataria sched_clock(void) __attribute__((alias("native_sched_clock"))); 3090ef95533SAlok Kataria #endif 3100ef95533SAlok Kataria 311ce37f400SDavid Vrabel unsigned long long native_read_tsc(void) 312ce37f400SDavid Vrabel { 313ce37f400SDavid Vrabel return __native_read_tsc(); 314ce37f400SDavid Vrabel } 315ce37f400SDavid Vrabel EXPORT_SYMBOL(native_read_tsc); 316ce37f400SDavid Vrabel 3170ef95533SAlok Kataria int check_tsc_unstable(void) 3180ef95533SAlok Kataria { 3190ef95533SAlok Kataria return tsc_unstable; 3200ef95533SAlok Kataria } 3210ef95533SAlok Kataria EXPORT_SYMBOL_GPL(check_tsc_unstable); 3220ef95533SAlok Kataria 323c73deb6aSAdrian Hunter int check_tsc_disabled(void) 324c73deb6aSAdrian Hunter { 325c73deb6aSAdrian Hunter return tsc_disabled; 326c73deb6aSAdrian Hunter } 327c73deb6aSAdrian Hunter EXPORT_SYMBOL_GPL(check_tsc_disabled); 328c73deb6aSAdrian Hunter 3290ef95533SAlok Kataria #ifdef CONFIG_X86_TSC 3300ef95533SAlok Kataria int __init notsc_setup(char *str) 3310ef95533SAlok Kataria { 332c767a54bSJoe Perches pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n"); 3330ef95533SAlok Kataria tsc_disabled = 1; 3340ef95533SAlok Kataria return 1; 3350ef95533SAlok Kataria } 3360ef95533SAlok Kataria #else 3370ef95533SAlok Kataria /* 3380ef95533SAlok Kataria * disable flag for tsc. Takes effect by clearing the TSC cpu flag 3390ef95533SAlok Kataria * in cpu/common.c 3400ef95533SAlok Kataria */ 3410ef95533SAlok Kataria int __init notsc_setup(char *str) 3420ef95533SAlok Kataria { 3430ef95533SAlok Kataria setup_clear_cpu_cap(X86_FEATURE_TSC); 3440ef95533SAlok Kataria return 1; 3450ef95533SAlok Kataria } 3460ef95533SAlok Kataria #endif 3470ef95533SAlok Kataria 3480ef95533SAlok Kataria __setup("notsc", notsc_setup); 349bfc0f594SAlok Kataria 350e82b8e4eSVenkatesh Pallipadi static int no_sched_irq_time; 351e82b8e4eSVenkatesh Pallipadi 352395628efSAlok Kataria static int __init tsc_setup(char *str) 353395628efSAlok Kataria { 354395628efSAlok Kataria if (!strcmp(str, "reliable")) 355395628efSAlok Kataria tsc_clocksource_reliable = 1; 356e82b8e4eSVenkatesh Pallipadi if (!strncmp(str, "noirqtime", 9)) 357e82b8e4eSVenkatesh Pallipadi no_sched_irq_time = 1; 358395628efSAlok Kataria return 1; 359395628efSAlok Kataria } 360395628efSAlok Kataria 361395628efSAlok Kataria __setup("tsc=", tsc_setup); 362395628efSAlok Kataria 363bfc0f594SAlok Kataria #define MAX_RETRIES 5 364bfc0f594SAlok Kataria #define SMI_TRESHOLD 50000 365bfc0f594SAlok Kataria 366bfc0f594SAlok Kataria /* 367bfc0f594SAlok Kataria * Read TSC and the reference counters. Take care of SMI disturbance 368bfc0f594SAlok Kataria */ 369827014beSThomas Gleixner static u64 tsc_read_refs(u64 *p, int hpet) 370bfc0f594SAlok Kataria { 371bfc0f594SAlok Kataria u64 t1, t2; 372bfc0f594SAlok Kataria int i; 373bfc0f594SAlok Kataria 374bfc0f594SAlok Kataria for (i = 0; i < MAX_RETRIES; i++) { 375bfc0f594SAlok Kataria t1 = get_cycles(); 376bfc0f594SAlok Kataria if (hpet) 377827014beSThomas Gleixner *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; 378bfc0f594SAlok Kataria else 379827014beSThomas Gleixner *p = acpi_pm_read_early(); 380bfc0f594SAlok Kataria t2 = get_cycles(); 381bfc0f594SAlok Kataria if ((t2 - t1) < SMI_TRESHOLD) 382bfc0f594SAlok Kataria return t2; 383bfc0f594SAlok Kataria } 384bfc0f594SAlok Kataria return ULLONG_MAX; 385bfc0f594SAlok Kataria } 386bfc0f594SAlok Kataria 387ec0c15afSLinus Torvalds /* 388d683ef7aSThomas Gleixner * Calculate the TSC frequency from HPET reference 389d683ef7aSThomas Gleixner */ 390d683ef7aSThomas Gleixner static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) 391d683ef7aSThomas Gleixner { 392d683ef7aSThomas Gleixner u64 tmp; 393d683ef7aSThomas Gleixner 394d683ef7aSThomas Gleixner if (hpet2 < hpet1) 395d683ef7aSThomas Gleixner hpet2 += 0x100000000ULL; 396d683ef7aSThomas Gleixner hpet2 -= hpet1; 397d683ef7aSThomas Gleixner tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); 398d683ef7aSThomas Gleixner do_div(tmp, 1000000); 399d683ef7aSThomas Gleixner do_div(deltatsc, tmp); 400d683ef7aSThomas Gleixner 401d683ef7aSThomas Gleixner return (unsigned long) deltatsc; 402d683ef7aSThomas Gleixner } 403d683ef7aSThomas Gleixner 404d683ef7aSThomas Gleixner /* 405d683ef7aSThomas Gleixner * Calculate the TSC frequency from PMTimer reference 406d683ef7aSThomas Gleixner */ 407d683ef7aSThomas Gleixner static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) 408d683ef7aSThomas Gleixner { 409d683ef7aSThomas Gleixner u64 tmp; 410d683ef7aSThomas Gleixner 411d683ef7aSThomas Gleixner if (!pm1 && !pm2) 412d683ef7aSThomas Gleixner return ULONG_MAX; 413d683ef7aSThomas Gleixner 414d683ef7aSThomas Gleixner if (pm2 < pm1) 415d683ef7aSThomas Gleixner pm2 += (u64)ACPI_PM_OVRRUN; 416d683ef7aSThomas Gleixner pm2 -= pm1; 417d683ef7aSThomas Gleixner tmp = pm2 * 1000000000LL; 418d683ef7aSThomas Gleixner do_div(tmp, PMTMR_TICKS_PER_SEC); 419d683ef7aSThomas Gleixner do_div(deltatsc, tmp); 420d683ef7aSThomas Gleixner 421d683ef7aSThomas Gleixner return (unsigned long) deltatsc; 422d683ef7aSThomas Gleixner } 423d683ef7aSThomas Gleixner 424a977c400SThomas Gleixner #define CAL_MS 10 425b7743970SDeepak Saxena #define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS)) 426a977c400SThomas Gleixner #define CAL_PIT_LOOPS 1000 427a977c400SThomas Gleixner 428a977c400SThomas Gleixner #define CAL2_MS 50 429b7743970SDeepak Saxena #define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS)) 430a977c400SThomas Gleixner #define CAL2_PIT_LOOPS 5000 431a977c400SThomas Gleixner 432cce3e057SThomas Gleixner 433ec0c15afSLinus Torvalds /* 434ec0c15afSLinus Torvalds * Try to calibrate the TSC against the Programmable 435ec0c15afSLinus Torvalds * Interrupt Timer and return the frequency of the TSC 436ec0c15afSLinus Torvalds * in kHz. 437ec0c15afSLinus Torvalds * 438ec0c15afSLinus Torvalds * Return ULONG_MAX on failure to calibrate. 439ec0c15afSLinus Torvalds */ 440a977c400SThomas Gleixner static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) 441ec0c15afSLinus Torvalds { 442ec0c15afSLinus Torvalds u64 tsc, t1, t2, delta; 443ec0c15afSLinus Torvalds unsigned long tscmin, tscmax; 444ec0c15afSLinus Torvalds int pitcnt; 445ec0c15afSLinus Torvalds 446ec0c15afSLinus Torvalds /* Set the Gate high, disable speaker */ 447ec0c15afSLinus Torvalds outb((inb(0x61) & ~0x02) | 0x01, 0x61); 448ec0c15afSLinus Torvalds 449ec0c15afSLinus Torvalds /* 450ec0c15afSLinus Torvalds * Setup CTC channel 2* for mode 0, (interrupt on terminal 451ec0c15afSLinus Torvalds * count mode), binary count. Set the latch register to 50ms 452ec0c15afSLinus Torvalds * (LSB then MSB) to begin countdown. 453ec0c15afSLinus Torvalds */ 454ec0c15afSLinus Torvalds outb(0xb0, 0x43); 455a977c400SThomas Gleixner outb(latch & 0xff, 0x42); 456a977c400SThomas Gleixner outb(latch >> 8, 0x42); 457ec0c15afSLinus Torvalds 458ec0c15afSLinus Torvalds tsc = t1 = t2 = get_cycles(); 459ec0c15afSLinus Torvalds 460ec0c15afSLinus Torvalds pitcnt = 0; 461ec0c15afSLinus Torvalds tscmax = 0; 462ec0c15afSLinus Torvalds tscmin = ULONG_MAX; 463ec0c15afSLinus Torvalds while ((inb(0x61) & 0x20) == 0) { 464ec0c15afSLinus Torvalds t2 = get_cycles(); 465ec0c15afSLinus Torvalds delta = t2 - tsc; 466ec0c15afSLinus Torvalds tsc = t2; 467ec0c15afSLinus Torvalds if ((unsigned long) delta < tscmin) 468ec0c15afSLinus Torvalds tscmin = (unsigned int) delta; 469ec0c15afSLinus Torvalds if ((unsigned long) delta > tscmax) 470ec0c15afSLinus Torvalds tscmax = (unsigned int) delta; 471ec0c15afSLinus Torvalds pitcnt++; 472ec0c15afSLinus Torvalds } 473ec0c15afSLinus Torvalds 474ec0c15afSLinus Torvalds /* 475ec0c15afSLinus Torvalds * Sanity checks: 476ec0c15afSLinus Torvalds * 477a977c400SThomas Gleixner * If we were not able to read the PIT more than loopmin 478ec0c15afSLinus Torvalds * times, then we have been hit by a massive SMI 479ec0c15afSLinus Torvalds * 480ec0c15afSLinus Torvalds * If the maximum is 10 times larger than the minimum, 481ec0c15afSLinus Torvalds * then we got hit by an SMI as well. 482ec0c15afSLinus Torvalds */ 483a977c400SThomas Gleixner if (pitcnt < loopmin || tscmax > 10 * tscmin) 484ec0c15afSLinus Torvalds return ULONG_MAX; 485ec0c15afSLinus Torvalds 486ec0c15afSLinus Torvalds /* Calculate the PIT value */ 487ec0c15afSLinus Torvalds delta = t2 - t1; 488a977c400SThomas Gleixner do_div(delta, ms); 489ec0c15afSLinus Torvalds return delta; 490ec0c15afSLinus Torvalds } 491ec0c15afSLinus Torvalds 4926ac40ed0SLinus Torvalds /* 4936ac40ed0SLinus Torvalds * This reads the current MSB of the PIT counter, and 4946ac40ed0SLinus Torvalds * checks if we are running on sufficiently fast and 4956ac40ed0SLinus Torvalds * non-virtualized hardware. 4966ac40ed0SLinus Torvalds * 4976ac40ed0SLinus Torvalds * Our expectations are: 4986ac40ed0SLinus Torvalds * 4996ac40ed0SLinus Torvalds * - the PIT is running at roughly 1.19MHz 5006ac40ed0SLinus Torvalds * 5016ac40ed0SLinus Torvalds * - each IO is going to take about 1us on real hardware, 5026ac40ed0SLinus Torvalds * but we allow it to be much faster (by a factor of 10) or 5036ac40ed0SLinus Torvalds * _slightly_ slower (ie we allow up to a 2us read+counter 5046ac40ed0SLinus Torvalds * update - anything else implies a unacceptably slow CPU 5056ac40ed0SLinus Torvalds * or PIT for the fast calibration to work. 5066ac40ed0SLinus Torvalds * 5076ac40ed0SLinus Torvalds * - with 256 PIT ticks to read the value, we have 214us to 5086ac40ed0SLinus Torvalds * see the same MSB (and overhead like doing a single TSC 5096ac40ed0SLinus Torvalds * read per MSB value etc). 5106ac40ed0SLinus Torvalds * 5116ac40ed0SLinus Torvalds * - We're doing 2 reads per loop (LSB, MSB), and we expect 5126ac40ed0SLinus Torvalds * them each to take about a microsecond on real hardware. 5136ac40ed0SLinus Torvalds * So we expect a count value of around 100. But we'll be 5146ac40ed0SLinus Torvalds * generous, and accept anything over 50. 5156ac40ed0SLinus Torvalds * 5166ac40ed0SLinus Torvalds * - if the PIT is stuck, and we see *many* more reads, we 5176ac40ed0SLinus Torvalds * return early (and the next caller of pit_expect_msb() 5186ac40ed0SLinus Torvalds * then consider it a failure when they don't see the 5196ac40ed0SLinus Torvalds * next expected value). 5206ac40ed0SLinus Torvalds * 5216ac40ed0SLinus Torvalds * These expectations mean that we know that we have seen the 5226ac40ed0SLinus Torvalds * transition from one expected value to another with a fairly 5236ac40ed0SLinus Torvalds * high accuracy, and we didn't miss any events. We can thus 5246ac40ed0SLinus Torvalds * use the TSC value at the transitions to calculate a pretty 5256ac40ed0SLinus Torvalds * good value for the TSC frequencty. 5266ac40ed0SLinus Torvalds */ 527b6e61eefSLinus Torvalds static inline int pit_verify_msb(unsigned char val) 528b6e61eefSLinus Torvalds { 529b6e61eefSLinus Torvalds /* Ignore LSB */ 530b6e61eefSLinus Torvalds inb(0x42); 531b6e61eefSLinus Torvalds return inb(0x42) == val; 532b6e61eefSLinus Torvalds } 533b6e61eefSLinus Torvalds 5349e8912e0SLinus Torvalds static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) 5356ac40ed0SLinus Torvalds { 5369e8912e0SLinus Torvalds int count; 53768f30fbeSLinus Torvalds u64 tsc = 0, prev_tsc = 0; 5386ac40ed0SLinus Torvalds 5396ac40ed0SLinus Torvalds for (count = 0; count < 50000; count++) { 540b6e61eefSLinus Torvalds if (!pit_verify_msb(val)) 5416ac40ed0SLinus Torvalds break; 54268f30fbeSLinus Torvalds prev_tsc = tsc; 5439e8912e0SLinus Torvalds tsc = get_cycles(); 5446ac40ed0SLinus Torvalds } 54568f30fbeSLinus Torvalds *deltap = get_cycles() - prev_tsc; 5469e8912e0SLinus Torvalds *tscp = tsc; 5479e8912e0SLinus Torvalds 5489e8912e0SLinus Torvalds /* 5499e8912e0SLinus Torvalds * We require _some_ success, but the quality control 5509e8912e0SLinus Torvalds * will be based on the error terms on the TSC values. 5519e8912e0SLinus Torvalds */ 5529e8912e0SLinus Torvalds return count > 5; 5536ac40ed0SLinus Torvalds } 5546ac40ed0SLinus Torvalds 5556ac40ed0SLinus Torvalds /* 5569e8912e0SLinus Torvalds * How many MSB values do we want to see? We aim for 5579e8912e0SLinus Torvalds * a maximum error rate of 500ppm (in practice the 5589e8912e0SLinus Torvalds * real error is much smaller), but refuse to spend 55968f30fbeSLinus Torvalds * more than 50ms on it. 5606ac40ed0SLinus Torvalds */ 56168f30fbeSLinus Torvalds #define MAX_QUICK_PIT_MS 50 5629e8912e0SLinus Torvalds #define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) 5636ac40ed0SLinus Torvalds 5646ac40ed0SLinus Torvalds static unsigned long quick_pit_calibrate(void) 5656ac40ed0SLinus Torvalds { 5669e8912e0SLinus Torvalds int i; 5679e8912e0SLinus Torvalds u64 tsc, delta; 5689e8912e0SLinus Torvalds unsigned long d1, d2; 5699e8912e0SLinus Torvalds 5706ac40ed0SLinus Torvalds /* Set the Gate high, disable speaker */ 5716ac40ed0SLinus Torvalds outb((inb(0x61) & ~0x02) | 0x01, 0x61); 5726ac40ed0SLinus Torvalds 5736ac40ed0SLinus Torvalds /* 5746ac40ed0SLinus Torvalds * Counter 2, mode 0 (one-shot), binary count 5756ac40ed0SLinus Torvalds * 5766ac40ed0SLinus Torvalds * NOTE! Mode 2 decrements by two (and then the 5776ac40ed0SLinus Torvalds * output is flipped each time, giving the same 5786ac40ed0SLinus Torvalds * final output frequency as a decrement-by-one), 5796ac40ed0SLinus Torvalds * so mode 0 is much better when looking at the 5806ac40ed0SLinus Torvalds * individual counts. 5816ac40ed0SLinus Torvalds */ 5826ac40ed0SLinus Torvalds outb(0xb0, 0x43); 5836ac40ed0SLinus Torvalds 5846ac40ed0SLinus Torvalds /* Start at 0xffff */ 5856ac40ed0SLinus Torvalds outb(0xff, 0x42); 5866ac40ed0SLinus Torvalds outb(0xff, 0x42); 5876ac40ed0SLinus Torvalds 588a6a80e1dSLinus Torvalds /* 589a6a80e1dSLinus Torvalds * The PIT starts counting at the next edge, so we 590a6a80e1dSLinus Torvalds * need to delay for a microsecond. The easiest way 591a6a80e1dSLinus Torvalds * to do that is to just read back the 16-bit counter 592a6a80e1dSLinus Torvalds * once from the PIT. 593a6a80e1dSLinus Torvalds */ 594b6e61eefSLinus Torvalds pit_verify_msb(0); 595a6a80e1dSLinus Torvalds 5969e8912e0SLinus Torvalds if (pit_expect_msb(0xff, &tsc, &d1)) { 5979e8912e0SLinus Torvalds for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { 5989e8912e0SLinus Torvalds if (!pit_expect_msb(0xff-i, &delta, &d2)) 5999e8912e0SLinus Torvalds break; 6006ac40ed0SLinus Torvalds 6016ac40ed0SLinus Torvalds /* 6029e8912e0SLinus Torvalds * Iterate until the error is less than 500 ppm 6034156e9a8SIngo Molnar */ 6049e8912e0SLinus Torvalds delta -= tsc; 605b6e61eefSLinus Torvalds if (d1+d2 >= delta >> 11) 606b6e61eefSLinus Torvalds continue; 607b6e61eefSLinus Torvalds 608b6e61eefSLinus Torvalds /* 609b6e61eefSLinus Torvalds * Check the PIT one more time to verify that 610b6e61eefSLinus Torvalds * all TSC reads were stable wrt the PIT. 611b6e61eefSLinus Torvalds * 612b6e61eefSLinus Torvalds * This also guarantees serialization of the 613b6e61eefSLinus Torvalds * last cycle read ('d2') in pit_expect_msb. 614b6e61eefSLinus Torvalds */ 615b6e61eefSLinus Torvalds if (!pit_verify_msb(0xfe - i)) 616b6e61eefSLinus Torvalds break; 6179e8912e0SLinus Torvalds goto success; 6189e8912e0SLinus Torvalds } 6199e8912e0SLinus Torvalds } 620c767a54bSJoe Perches pr_err("Fast TSC calibration failed\n"); 6219e8912e0SLinus Torvalds return 0; 6224156e9a8SIngo Molnar 6239e8912e0SLinus Torvalds success: 6244156e9a8SIngo Molnar /* 6256ac40ed0SLinus Torvalds * Ok, if we get here, then we've seen the 6269e8912e0SLinus Torvalds * MSB of the PIT decrement 'i' times, and the 6279e8912e0SLinus Torvalds * error has shrunk to less than 500 ppm. 6286ac40ed0SLinus Torvalds * 6296ac40ed0SLinus Torvalds * As a result, we can depend on there not being 6306ac40ed0SLinus Torvalds * any odd delays anywhere, and the TSC reads are 63168f30fbeSLinus Torvalds * reliable (within the error). 6326ac40ed0SLinus Torvalds * 6336ac40ed0SLinus Torvalds * kHz = ticks / time-in-seconds / 1000; 6349e8912e0SLinus Torvalds * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000 6359e8912e0SLinus Torvalds * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000) 6366ac40ed0SLinus Torvalds */ 6379e8912e0SLinus Torvalds delta *= PIT_TICK_RATE; 6389e8912e0SLinus Torvalds do_div(delta, i*256*1000); 639c767a54bSJoe Perches pr_info("Fast TSC calibration using PIT\n"); 6406ac40ed0SLinus Torvalds return delta; 6416ac40ed0SLinus Torvalds } 642ec0c15afSLinus Torvalds 643bfc0f594SAlok Kataria /** 644e93ef949SAlok Kataria * native_calibrate_tsc - calibrate the tsc on boot 645bfc0f594SAlok Kataria */ 646e93ef949SAlok Kataria unsigned long native_calibrate_tsc(void) 647bfc0f594SAlok Kataria { 648827014beSThomas Gleixner u64 tsc1, tsc2, delta, ref1, ref2; 649fbb16e24SThomas Gleixner unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; 6502d826404SThomas Gleixner unsigned long flags, latch, ms, fast_calibrate; 651a977c400SThomas Gleixner int hpet = is_hpet_enabled(), i, loopmin; 652bfc0f594SAlok Kataria 6537da7c156SBin Gao /* Calibrate TSC using MSR for Intel Atom SoCs */ 6547da7c156SBin Gao local_irq_save(flags); 6555f0e0309SThomas Gleixner fast_calibrate = try_msr_calibrate_tsc(); 6567da7c156SBin Gao local_irq_restore(flags); 6575f0e0309SThomas Gleixner if (fast_calibrate) 6587da7c156SBin Gao return fast_calibrate; 6597da7c156SBin Gao 660bfc0f594SAlok Kataria local_irq_save(flags); 6616ac40ed0SLinus Torvalds fast_calibrate = quick_pit_calibrate(); 662bfc0f594SAlok Kataria local_irq_restore(flags); 6636ac40ed0SLinus Torvalds if (fast_calibrate) 6646ac40ed0SLinus Torvalds return fast_calibrate; 665fbb16e24SThomas Gleixner 666fbb16e24SThomas Gleixner /* 667fbb16e24SThomas Gleixner * Run 5 calibration loops to get the lowest frequency value 668fbb16e24SThomas Gleixner * (the best estimate). We use two different calibration modes 669fbb16e24SThomas Gleixner * here: 670fbb16e24SThomas Gleixner * 671fbb16e24SThomas Gleixner * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and 672fbb16e24SThomas Gleixner * load a timeout of 50ms. We read the time right after we 673fbb16e24SThomas Gleixner * started the timer and wait until the PIT count down reaches 674fbb16e24SThomas Gleixner * zero. In each wait loop iteration we read the TSC and check 675fbb16e24SThomas Gleixner * the delta to the previous read. We keep track of the min 676fbb16e24SThomas Gleixner * and max values of that delta. The delta is mostly defined 677fbb16e24SThomas Gleixner * by the IO time of the PIT access, so we can detect when a 6780d2eb44fSLucas De Marchi * SMI/SMM disturbance happened between the two reads. If the 679fbb16e24SThomas Gleixner * maximum time is significantly larger than the minimum time, 680fbb16e24SThomas Gleixner * then we discard the result and have another try. 681fbb16e24SThomas Gleixner * 682fbb16e24SThomas Gleixner * 2) Reference counter. If available we use the HPET or the 683fbb16e24SThomas Gleixner * PMTIMER as a reference to check the sanity of that value. 684fbb16e24SThomas Gleixner * We use separate TSC readouts and check inside of the 685fbb16e24SThomas Gleixner * reference read for a SMI/SMM disturbance. We dicard 686fbb16e24SThomas Gleixner * disturbed values here as well. We do that around the PIT 687fbb16e24SThomas Gleixner * calibration delay loop as we have to wait for a certain 688fbb16e24SThomas Gleixner * amount of time anyway. 689fbb16e24SThomas Gleixner */ 690a977c400SThomas Gleixner 691a977c400SThomas Gleixner /* Preset PIT loop values */ 692a977c400SThomas Gleixner latch = CAL_LATCH; 693a977c400SThomas Gleixner ms = CAL_MS; 694a977c400SThomas Gleixner loopmin = CAL_PIT_LOOPS; 695a977c400SThomas Gleixner 696a977c400SThomas Gleixner for (i = 0; i < 3; i++) { 697ec0c15afSLinus Torvalds unsigned long tsc_pit_khz; 698bfc0f594SAlok Kataria 699fbb16e24SThomas Gleixner /* 700fbb16e24SThomas Gleixner * Read the start value and the reference count of 701ec0c15afSLinus Torvalds * hpet/pmtimer when available. Then do the PIT 702ec0c15afSLinus Torvalds * calibration, which will take at least 50ms, and 703ec0c15afSLinus Torvalds * read the end value. 704fbb16e24SThomas Gleixner */ 705ec0c15afSLinus Torvalds local_irq_save(flags); 706827014beSThomas Gleixner tsc1 = tsc_read_refs(&ref1, hpet); 707a977c400SThomas Gleixner tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); 708827014beSThomas Gleixner tsc2 = tsc_read_refs(&ref2, hpet); 709bfc0f594SAlok Kataria local_irq_restore(flags); 710bfc0f594SAlok Kataria 711ec0c15afSLinus Torvalds /* Pick the lowest PIT TSC calibration so far */ 712ec0c15afSLinus Torvalds tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); 713bfc0f594SAlok Kataria 714bfc0f594SAlok Kataria /* hpet or pmtimer available ? */ 71562627becSJohn Stultz if (ref1 == ref2) 716fbb16e24SThomas Gleixner continue; 717bfc0f594SAlok Kataria 718bfc0f594SAlok Kataria /* Check, whether the sampling was disturbed by an SMI */ 719fbb16e24SThomas Gleixner if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) 720fbb16e24SThomas Gleixner continue; 721bfc0f594SAlok Kataria 722bfc0f594SAlok Kataria tsc2 = (tsc2 - tsc1) * 1000000LL; 723d683ef7aSThomas Gleixner if (hpet) 724827014beSThomas Gleixner tsc2 = calc_hpet_ref(tsc2, ref1, ref2); 725d683ef7aSThomas Gleixner else 726827014beSThomas Gleixner tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); 727bfc0f594SAlok Kataria 728fbb16e24SThomas Gleixner tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); 729a977c400SThomas Gleixner 730a977c400SThomas Gleixner /* Check the reference deviation */ 731a977c400SThomas Gleixner delta = ((u64) tsc_pit_min) * 100; 732a977c400SThomas Gleixner do_div(delta, tsc_ref_min); 733a977c400SThomas Gleixner 734a977c400SThomas Gleixner /* 735a977c400SThomas Gleixner * If both calibration results are inside a 10% window 736a977c400SThomas Gleixner * then we can be sure, that the calibration 737a977c400SThomas Gleixner * succeeded. We break out of the loop right away. We 738a977c400SThomas Gleixner * use the reference value, as it is more precise. 739a977c400SThomas Gleixner */ 740a977c400SThomas Gleixner if (delta >= 90 && delta <= 110) { 741c767a54bSJoe Perches pr_info("PIT calibration matches %s. %d loops\n", 742a977c400SThomas Gleixner hpet ? "HPET" : "PMTIMER", i + 1); 743a977c400SThomas Gleixner return tsc_ref_min; 744bfc0f594SAlok Kataria } 745bfc0f594SAlok Kataria 746a977c400SThomas Gleixner /* 747a977c400SThomas Gleixner * Check whether PIT failed more than once. This 748a977c400SThomas Gleixner * happens in virtualized environments. We need to 749a977c400SThomas Gleixner * give the virtual PC a slightly longer timeframe for 750a977c400SThomas Gleixner * the HPET/PMTIMER to make the result precise. 751a977c400SThomas Gleixner */ 752a977c400SThomas Gleixner if (i == 1 && tsc_pit_min == ULONG_MAX) { 753a977c400SThomas Gleixner latch = CAL2_LATCH; 754a977c400SThomas Gleixner ms = CAL2_MS; 755a977c400SThomas Gleixner loopmin = CAL2_PIT_LOOPS; 756a977c400SThomas Gleixner } 757bfc0f594SAlok Kataria } 758bfc0f594SAlok Kataria 759fbb16e24SThomas Gleixner /* 760fbb16e24SThomas Gleixner * Now check the results. 761fbb16e24SThomas Gleixner */ 762fbb16e24SThomas Gleixner if (tsc_pit_min == ULONG_MAX) { 763fbb16e24SThomas Gleixner /* PIT gave no useful value */ 764c767a54bSJoe Perches pr_warn("Unable to calibrate against PIT\n"); 765fbb16e24SThomas Gleixner 766fbb16e24SThomas Gleixner /* We don't have an alternative source, disable TSC */ 767827014beSThomas Gleixner if (!hpet && !ref1 && !ref2) { 768c767a54bSJoe Perches pr_notice("No reference (HPET/PMTIMER) available\n"); 769fbb16e24SThomas Gleixner return 0; 770fbb16e24SThomas Gleixner } 771fbb16e24SThomas Gleixner 772fbb16e24SThomas Gleixner /* The alternative source failed as well, disable TSC */ 773fbb16e24SThomas Gleixner if (tsc_ref_min == ULONG_MAX) { 774c767a54bSJoe Perches pr_warn("HPET/PMTIMER calibration failed\n"); 775fbb16e24SThomas Gleixner return 0; 776fbb16e24SThomas Gleixner } 777fbb16e24SThomas Gleixner 778fbb16e24SThomas Gleixner /* Use the alternative source */ 779c767a54bSJoe Perches pr_info("using %s reference calibration\n", 780fbb16e24SThomas Gleixner hpet ? "HPET" : "PMTIMER"); 781fbb16e24SThomas Gleixner 782fbb16e24SThomas Gleixner return tsc_ref_min; 783fbb16e24SThomas Gleixner } 784fbb16e24SThomas Gleixner 785fbb16e24SThomas Gleixner /* We don't have an alternative source, use the PIT calibration value */ 786827014beSThomas Gleixner if (!hpet && !ref1 && !ref2) { 787c767a54bSJoe Perches pr_info("Using PIT calibration value\n"); 788fbb16e24SThomas Gleixner return tsc_pit_min; 789fbb16e24SThomas Gleixner } 790fbb16e24SThomas Gleixner 791fbb16e24SThomas Gleixner /* The alternative source failed, use the PIT calibration value */ 792fbb16e24SThomas Gleixner if (tsc_ref_min == ULONG_MAX) { 793c767a54bSJoe Perches pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n"); 794fbb16e24SThomas Gleixner return tsc_pit_min; 795fbb16e24SThomas Gleixner } 796fbb16e24SThomas Gleixner 797fbb16e24SThomas Gleixner /* 798fbb16e24SThomas Gleixner * The calibration values differ too much. In doubt, we use 799fbb16e24SThomas Gleixner * the PIT value as we know that there are PMTIMERs around 800a977c400SThomas Gleixner * running at double speed. At least we let the user know: 801fbb16e24SThomas Gleixner */ 802c767a54bSJoe Perches pr_warn("PIT calibration deviates from %s: %lu %lu\n", 803a977c400SThomas Gleixner hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); 804c767a54bSJoe Perches pr_info("Using PIT calibration value\n"); 805fbb16e24SThomas Gleixner return tsc_pit_min; 806fbb16e24SThomas Gleixner } 807bfc0f594SAlok Kataria 808bfc0f594SAlok Kataria int recalibrate_cpu_khz(void) 809bfc0f594SAlok Kataria { 810bfc0f594SAlok Kataria #ifndef CONFIG_SMP 811bfc0f594SAlok Kataria unsigned long cpu_khz_old = cpu_khz; 812bfc0f594SAlok Kataria 813bfc0f594SAlok Kataria if (cpu_has_tsc) { 8142d826404SThomas Gleixner tsc_khz = x86_platform.calibrate_tsc(); 815e93ef949SAlok Kataria cpu_khz = tsc_khz; 816bfc0f594SAlok Kataria cpu_data(0).loops_per_jiffy = 817bfc0f594SAlok Kataria cpufreq_scale(cpu_data(0).loops_per_jiffy, 818bfc0f594SAlok Kataria cpu_khz_old, cpu_khz); 819bfc0f594SAlok Kataria return 0; 820bfc0f594SAlok Kataria } else 821bfc0f594SAlok Kataria return -ENODEV; 822bfc0f594SAlok Kataria #else 823bfc0f594SAlok Kataria return -ENODEV; 824bfc0f594SAlok Kataria #endif 825bfc0f594SAlok Kataria } 826bfc0f594SAlok Kataria 827bfc0f594SAlok Kataria EXPORT_SYMBOL(recalibrate_cpu_khz); 828bfc0f594SAlok Kataria 8292dbe06faSAlok Kataria 830cd7240c0SSuresh Siddha static unsigned long long cyc2ns_suspend; 831cd7240c0SSuresh Siddha 832b74f05d6SMarcelo Tosatti void tsc_save_sched_clock_state(void) 833cd7240c0SSuresh Siddha { 83435af99e6SPeter Zijlstra if (!sched_clock_stable()) 835cd7240c0SSuresh Siddha return; 836cd7240c0SSuresh Siddha 837cd7240c0SSuresh Siddha cyc2ns_suspend = sched_clock(); 838cd7240c0SSuresh Siddha } 839cd7240c0SSuresh Siddha 840cd7240c0SSuresh Siddha /* 841cd7240c0SSuresh Siddha * Even on processors with invariant TSC, TSC gets reset in some the 842cd7240c0SSuresh Siddha * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to 843cd7240c0SSuresh Siddha * arbitrary value (still sync'd across cpu's) during resume from such sleep 844cd7240c0SSuresh Siddha * states. To cope up with this, recompute the cyc2ns_offset for each cpu so 845cd7240c0SSuresh Siddha * that sched_clock() continues from the point where it was left off during 846cd7240c0SSuresh Siddha * suspend. 847cd7240c0SSuresh Siddha */ 848b74f05d6SMarcelo Tosatti void tsc_restore_sched_clock_state(void) 849cd7240c0SSuresh Siddha { 850cd7240c0SSuresh Siddha unsigned long long offset; 851cd7240c0SSuresh Siddha unsigned long flags; 852cd7240c0SSuresh Siddha int cpu; 853cd7240c0SSuresh Siddha 85435af99e6SPeter Zijlstra if (!sched_clock_stable()) 855cd7240c0SSuresh Siddha return; 856cd7240c0SSuresh Siddha 857cd7240c0SSuresh Siddha local_irq_save(flags); 858cd7240c0SSuresh Siddha 85920d1c86aSPeter Zijlstra /* 86020d1c86aSPeter Zijlstra * We're comming out of suspend, there's no concurrency yet; don't 86120d1c86aSPeter Zijlstra * bother being nice about the RCU stuff, just write to both 86220d1c86aSPeter Zijlstra * data fields. 86320d1c86aSPeter Zijlstra */ 86420d1c86aSPeter Zijlstra 86520d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0); 86620d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0); 86720d1c86aSPeter Zijlstra 868cd7240c0SSuresh Siddha offset = cyc2ns_suspend - sched_clock(); 869cd7240c0SSuresh Siddha 87020d1c86aSPeter Zijlstra for_each_possible_cpu(cpu) { 87120d1c86aSPeter Zijlstra per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset; 87220d1c86aSPeter Zijlstra per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset; 87320d1c86aSPeter Zijlstra } 874cd7240c0SSuresh Siddha 875cd7240c0SSuresh Siddha local_irq_restore(flags); 876cd7240c0SSuresh Siddha } 877cd7240c0SSuresh Siddha 8782dbe06faSAlok Kataria #ifdef CONFIG_CPU_FREQ 8792dbe06faSAlok Kataria 8802dbe06faSAlok Kataria /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency 8812dbe06faSAlok Kataria * changes. 8822dbe06faSAlok Kataria * 8832dbe06faSAlok Kataria * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's 8842dbe06faSAlok Kataria * not that important because current Opteron setups do not support 8852dbe06faSAlok Kataria * scaling on SMP anyroads. 8862dbe06faSAlok Kataria * 8872dbe06faSAlok Kataria * Should fix up last_tsc too. Currently gettimeofday in the 8882dbe06faSAlok Kataria * first tick after the change will be slightly wrong. 8892dbe06faSAlok Kataria */ 8902dbe06faSAlok Kataria 8912dbe06faSAlok Kataria static unsigned int ref_freq; 8922dbe06faSAlok Kataria static unsigned long loops_per_jiffy_ref; 8932dbe06faSAlok Kataria static unsigned long tsc_khz_ref; 8942dbe06faSAlok Kataria 8952dbe06faSAlok Kataria static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 8962dbe06faSAlok Kataria void *data) 8972dbe06faSAlok Kataria { 8982dbe06faSAlok Kataria struct cpufreq_freqs *freq = data; 899931db6a3SDave Jones unsigned long *lpj; 9002dbe06faSAlok Kataria 9012dbe06faSAlok Kataria if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) 9022dbe06faSAlok Kataria return 0; 9032dbe06faSAlok Kataria 9042dbe06faSAlok Kataria lpj = &boot_cpu_data.loops_per_jiffy; 905931db6a3SDave Jones #ifdef CONFIG_SMP 906931db6a3SDave Jones if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 907931db6a3SDave Jones lpj = &cpu_data(freq->cpu).loops_per_jiffy; 9082dbe06faSAlok Kataria #endif 9092dbe06faSAlok Kataria 9102dbe06faSAlok Kataria if (!ref_freq) { 9112dbe06faSAlok Kataria ref_freq = freq->old; 9122dbe06faSAlok Kataria loops_per_jiffy_ref = *lpj; 9132dbe06faSAlok Kataria tsc_khz_ref = tsc_khz; 9142dbe06faSAlok Kataria } 9152dbe06faSAlok Kataria if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || 9160b443eadSViresh Kumar (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { 9172dbe06faSAlok Kataria *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); 9182dbe06faSAlok Kataria 9192dbe06faSAlok Kataria tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); 9202dbe06faSAlok Kataria if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 9212dbe06faSAlok Kataria mark_tsc_unstable("cpufreq changes"); 9222dbe06faSAlok Kataria } 9232dbe06faSAlok Kataria 92452a8968cSPeter Zijlstra set_cyc2ns_scale(tsc_khz, freq->cpu); 9252dbe06faSAlok Kataria 9262dbe06faSAlok Kataria return 0; 9272dbe06faSAlok Kataria } 9282dbe06faSAlok Kataria 9292dbe06faSAlok Kataria static struct notifier_block time_cpufreq_notifier_block = { 9302dbe06faSAlok Kataria .notifier_call = time_cpufreq_notifier 9312dbe06faSAlok Kataria }; 9322dbe06faSAlok Kataria 9332dbe06faSAlok Kataria static int __init cpufreq_tsc(void) 9342dbe06faSAlok Kataria { 935060700b5SLinus Torvalds if (!cpu_has_tsc) 936060700b5SLinus Torvalds return 0; 937060700b5SLinus Torvalds if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 938060700b5SLinus Torvalds return 0; 9392dbe06faSAlok Kataria cpufreq_register_notifier(&time_cpufreq_notifier_block, 9402dbe06faSAlok Kataria CPUFREQ_TRANSITION_NOTIFIER); 9412dbe06faSAlok Kataria return 0; 9422dbe06faSAlok Kataria } 9432dbe06faSAlok Kataria 9442dbe06faSAlok Kataria core_initcall(cpufreq_tsc); 9452dbe06faSAlok Kataria 9462dbe06faSAlok Kataria #endif /* CONFIG_CPU_FREQ */ 9478fbbc4b4SAlok Kataria 9488fbbc4b4SAlok Kataria /* clocksource code */ 9498fbbc4b4SAlok Kataria 9508fbbc4b4SAlok Kataria static struct clocksource clocksource_tsc; 9518fbbc4b4SAlok Kataria 9528fbbc4b4SAlok Kataria /* 9538fbbc4b4SAlok Kataria * We compare the TSC to the cycle_last value in the clocksource 9548fbbc4b4SAlok Kataria * structure to avoid a nasty time-warp. This can be observed in a 9558fbbc4b4SAlok Kataria * very small window right after one CPU updated cycle_last under 9568fbbc4b4SAlok Kataria * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which 9578fbbc4b4SAlok Kataria * is smaller than the cycle_last reference value due to a TSC which 9588fbbc4b4SAlok Kataria * is slighty behind. This delta is nowhere else observable, but in 9598fbbc4b4SAlok Kataria * that case it results in a forward time jump in the range of hours 9608fbbc4b4SAlok Kataria * due to the unsigned delta calculation of the time keeping core 9618fbbc4b4SAlok Kataria * code, which is necessary to support wrapping clocksources like pm 9628fbbc4b4SAlok Kataria * timer. 9638fbbc4b4SAlok Kataria */ 9648e19608eSMagnus Damm static cycle_t read_tsc(struct clocksource *cs) 9658fbbc4b4SAlok Kataria { 9668fbbc4b4SAlok Kataria cycle_t ret = (cycle_t)get_cycles(); 9678fbbc4b4SAlok Kataria 9688fbbc4b4SAlok Kataria return ret >= clocksource_tsc.cycle_last ? 9698fbbc4b4SAlok Kataria ret : clocksource_tsc.cycle_last; 9708fbbc4b4SAlok Kataria } 9718fbbc4b4SAlok Kataria 97217622339SMagnus Damm static void resume_tsc(struct clocksource *cs) 9731be39679SMartin Schwidefsky { 97482f9c080SFeng Tang if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) 9751be39679SMartin Schwidefsky clocksource_tsc.cycle_last = 0; 9761be39679SMartin Schwidefsky } 9771be39679SMartin Schwidefsky 9788fbbc4b4SAlok Kataria static struct clocksource clocksource_tsc = { 9798fbbc4b4SAlok Kataria .name = "tsc", 9808fbbc4b4SAlok Kataria .rating = 300, 9818fbbc4b4SAlok Kataria .read = read_tsc, 9821be39679SMartin Schwidefsky .resume = resume_tsc, 9838fbbc4b4SAlok Kataria .mask = CLOCKSOURCE_MASK(64), 9848fbbc4b4SAlok Kataria .flags = CLOCK_SOURCE_IS_CONTINUOUS | 9858fbbc4b4SAlok Kataria CLOCK_SOURCE_MUST_VERIFY, 98698d0ac38SAndy Lutomirski .archdata = { .vclock_mode = VCLOCK_TSC }, 9878fbbc4b4SAlok Kataria }; 9888fbbc4b4SAlok Kataria 9898fbbc4b4SAlok Kataria void mark_tsc_unstable(char *reason) 9908fbbc4b4SAlok Kataria { 9918fbbc4b4SAlok Kataria if (!tsc_unstable) { 9928fbbc4b4SAlok Kataria tsc_unstable = 1; 99335af99e6SPeter Zijlstra clear_sched_clock_stable(); 994e82b8e4eSVenkatesh Pallipadi disable_sched_clock_irqtime(); 995c767a54bSJoe Perches pr_info("Marking TSC unstable due to %s\n", reason); 9968fbbc4b4SAlok Kataria /* Change only the rating, when not registered */ 9978fbbc4b4SAlok Kataria if (clocksource_tsc.mult) 9987285dd7fSThomas Gleixner clocksource_mark_unstable(&clocksource_tsc); 9997285dd7fSThomas Gleixner else { 10007285dd7fSThomas Gleixner clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; 10018fbbc4b4SAlok Kataria clocksource_tsc.rating = 0; 10028fbbc4b4SAlok Kataria } 10038fbbc4b4SAlok Kataria } 10047285dd7fSThomas Gleixner } 10058fbbc4b4SAlok Kataria 10068fbbc4b4SAlok Kataria EXPORT_SYMBOL_GPL(mark_tsc_unstable); 10078fbbc4b4SAlok Kataria 1008395628efSAlok Kataria static void __init check_system_tsc_reliable(void) 1009395628efSAlok Kataria { 10108fbbc4b4SAlok Kataria #ifdef CONFIG_MGEODE_LX 10118fbbc4b4SAlok Kataria /* RTSC counts during suspend */ 10128fbbc4b4SAlok Kataria #define RTSC_SUSP 0x100 10138fbbc4b4SAlok Kataria unsigned long res_low, res_high; 10148fbbc4b4SAlok Kataria 10158fbbc4b4SAlok Kataria rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); 101600097c4fSThadeu Lima de Souza Cascardo /* Geode_LX - the OLPC CPU has a very reliable TSC */ 10178fbbc4b4SAlok Kataria if (res_low & RTSC_SUSP) 1018395628efSAlok Kataria tsc_clocksource_reliable = 1; 10198fbbc4b4SAlok Kataria #endif 1020395628efSAlok Kataria if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) 1021395628efSAlok Kataria tsc_clocksource_reliable = 1; 1022395628efSAlok Kataria } 10238fbbc4b4SAlok Kataria 10248fbbc4b4SAlok Kataria /* 10258fbbc4b4SAlok Kataria * Make an educated guess if the TSC is trustworthy and synchronized 10268fbbc4b4SAlok Kataria * over all CPUs. 10278fbbc4b4SAlok Kataria */ 1028148f9bb8SPaul Gortmaker int unsynchronized_tsc(void) 10298fbbc4b4SAlok Kataria { 10308fbbc4b4SAlok Kataria if (!cpu_has_tsc || tsc_unstable) 10318fbbc4b4SAlok Kataria return 1; 10328fbbc4b4SAlok Kataria 10333e5095d1SIngo Molnar #ifdef CONFIG_SMP 10348fbbc4b4SAlok Kataria if (apic_is_clustered_box()) 10358fbbc4b4SAlok Kataria return 1; 10368fbbc4b4SAlok Kataria #endif 10378fbbc4b4SAlok Kataria 10388fbbc4b4SAlok Kataria if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 10398fbbc4b4SAlok Kataria return 0; 1040d3b8f889Sjohn stultz 1041d3b8f889Sjohn stultz if (tsc_clocksource_reliable) 1042d3b8f889Sjohn stultz return 0; 10438fbbc4b4SAlok Kataria /* 10448fbbc4b4SAlok Kataria * Intel systems are normally all synchronized. 10458fbbc4b4SAlok Kataria * Exceptions must mark TSC as unstable: 10468fbbc4b4SAlok Kataria */ 10478fbbc4b4SAlok Kataria if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { 10488fbbc4b4SAlok Kataria /* assume multi socket systems are not synchronized: */ 10498fbbc4b4SAlok Kataria if (num_possible_cpus() > 1) 1050d3b8f889Sjohn stultz return 1; 10518fbbc4b4SAlok Kataria } 10528fbbc4b4SAlok Kataria 1053d3b8f889Sjohn stultz return 0; 10548fbbc4b4SAlok Kataria } 10558fbbc4b4SAlok Kataria 105608ec0c58SJohn Stultz 105708ec0c58SJohn Stultz static void tsc_refine_calibration_work(struct work_struct *work); 105808ec0c58SJohn Stultz static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); 105908ec0c58SJohn Stultz /** 106008ec0c58SJohn Stultz * tsc_refine_calibration_work - Further refine tsc freq calibration 106108ec0c58SJohn Stultz * @work - ignored. 106208ec0c58SJohn Stultz * 106308ec0c58SJohn Stultz * This functions uses delayed work over a period of a 106408ec0c58SJohn Stultz * second to further refine the TSC freq value. Since this is 106508ec0c58SJohn Stultz * timer based, instead of loop based, we don't block the boot 106608ec0c58SJohn Stultz * process while this longer calibration is done. 106708ec0c58SJohn Stultz * 10680d2eb44fSLucas De Marchi * If there are any calibration anomalies (too many SMIs, etc), 106908ec0c58SJohn Stultz * or the refined calibration is off by 1% of the fast early 107008ec0c58SJohn Stultz * calibration, we throw out the new calibration and use the 107108ec0c58SJohn Stultz * early calibration. 107208ec0c58SJohn Stultz */ 107308ec0c58SJohn Stultz static void tsc_refine_calibration_work(struct work_struct *work) 107408ec0c58SJohn Stultz { 107508ec0c58SJohn Stultz static u64 tsc_start = -1, ref_start; 107608ec0c58SJohn Stultz static int hpet; 107708ec0c58SJohn Stultz u64 tsc_stop, ref_stop, delta; 107808ec0c58SJohn Stultz unsigned long freq; 107908ec0c58SJohn Stultz 108008ec0c58SJohn Stultz /* Don't bother refining TSC on unstable systems */ 108108ec0c58SJohn Stultz if (check_tsc_unstable()) 108208ec0c58SJohn Stultz goto out; 108308ec0c58SJohn Stultz 108408ec0c58SJohn Stultz /* 108508ec0c58SJohn Stultz * Since the work is started early in boot, we may be 108608ec0c58SJohn Stultz * delayed the first time we expire. So set the workqueue 108708ec0c58SJohn Stultz * again once we know timers are working. 108808ec0c58SJohn Stultz */ 108908ec0c58SJohn Stultz if (tsc_start == -1) { 109008ec0c58SJohn Stultz /* 109108ec0c58SJohn Stultz * Only set hpet once, to avoid mixing hardware 109208ec0c58SJohn Stultz * if the hpet becomes enabled later. 109308ec0c58SJohn Stultz */ 109408ec0c58SJohn Stultz hpet = is_hpet_enabled(); 109508ec0c58SJohn Stultz schedule_delayed_work(&tsc_irqwork, HZ); 109608ec0c58SJohn Stultz tsc_start = tsc_read_refs(&ref_start, hpet); 109708ec0c58SJohn Stultz return; 109808ec0c58SJohn Stultz } 109908ec0c58SJohn Stultz 110008ec0c58SJohn Stultz tsc_stop = tsc_read_refs(&ref_stop, hpet); 110108ec0c58SJohn Stultz 110208ec0c58SJohn Stultz /* hpet or pmtimer available ? */ 110362627becSJohn Stultz if (ref_start == ref_stop) 110408ec0c58SJohn Stultz goto out; 110508ec0c58SJohn Stultz 110608ec0c58SJohn Stultz /* Check, whether the sampling was disturbed by an SMI */ 110708ec0c58SJohn Stultz if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) 110808ec0c58SJohn Stultz goto out; 110908ec0c58SJohn Stultz 111008ec0c58SJohn Stultz delta = tsc_stop - tsc_start; 111108ec0c58SJohn Stultz delta *= 1000000LL; 111208ec0c58SJohn Stultz if (hpet) 111308ec0c58SJohn Stultz freq = calc_hpet_ref(delta, ref_start, ref_stop); 111408ec0c58SJohn Stultz else 111508ec0c58SJohn Stultz freq = calc_pmtimer_ref(delta, ref_start, ref_stop); 111608ec0c58SJohn Stultz 111708ec0c58SJohn Stultz /* Make sure we're within 1% */ 111808ec0c58SJohn Stultz if (abs(tsc_khz - freq) > tsc_khz/100) 111908ec0c58SJohn Stultz goto out; 112008ec0c58SJohn Stultz 112108ec0c58SJohn Stultz tsc_khz = freq; 1122c767a54bSJoe Perches pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n", 1123c767a54bSJoe Perches (unsigned long)tsc_khz / 1000, 112408ec0c58SJohn Stultz (unsigned long)tsc_khz % 1000); 112508ec0c58SJohn Stultz 112608ec0c58SJohn Stultz out: 112708ec0c58SJohn Stultz clocksource_register_khz(&clocksource_tsc, tsc_khz); 112808ec0c58SJohn Stultz } 112908ec0c58SJohn Stultz 113008ec0c58SJohn Stultz 113108ec0c58SJohn Stultz static int __init init_tsc_clocksource(void) 11328fbbc4b4SAlok Kataria { 113329fe359cSThomas Gleixner if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz) 1134a8760ecaSThomas Gleixner return 0; 1135a8760ecaSThomas Gleixner 1136395628efSAlok Kataria if (tsc_clocksource_reliable) 1137395628efSAlok Kataria clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 11388fbbc4b4SAlok Kataria /* lower the rating if we already know its unstable: */ 11398fbbc4b4SAlok Kataria if (check_tsc_unstable()) { 11408fbbc4b4SAlok Kataria clocksource_tsc.rating = 0; 11418fbbc4b4SAlok Kataria clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 11428fbbc4b4SAlok Kataria } 114357779dc2SAlok Kataria 114482f9c080SFeng Tang if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) 114582f9c080SFeng Tang clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; 114682f9c080SFeng Tang 114757779dc2SAlok Kataria /* 114857779dc2SAlok Kataria * Trust the results of the earlier calibration on systems 114957779dc2SAlok Kataria * exporting a reliable TSC. 115057779dc2SAlok Kataria */ 115157779dc2SAlok Kataria if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { 115257779dc2SAlok Kataria clocksource_register_khz(&clocksource_tsc, tsc_khz); 115357779dc2SAlok Kataria return 0; 115457779dc2SAlok Kataria } 115557779dc2SAlok Kataria 115608ec0c58SJohn Stultz schedule_delayed_work(&tsc_irqwork, 0); 115708ec0c58SJohn Stultz return 0; 11588fbbc4b4SAlok Kataria } 115908ec0c58SJohn Stultz /* 116008ec0c58SJohn Stultz * We use device_initcall here, to ensure we run after the hpet 116108ec0c58SJohn Stultz * is fully initialized, which may occur at fs_initcall time. 116208ec0c58SJohn Stultz */ 116308ec0c58SJohn Stultz device_initcall(init_tsc_clocksource); 11648fbbc4b4SAlok Kataria 11658fbbc4b4SAlok Kataria void __init tsc_init(void) 11668fbbc4b4SAlok Kataria { 11678fbbc4b4SAlok Kataria u64 lpj; 11688fbbc4b4SAlok Kataria int cpu; 11698fbbc4b4SAlok Kataria 1170845b3944SThomas Gleixner x86_init.timers.tsc_pre_init(); 1171845b3944SThomas Gleixner 11728fbbc4b4SAlok Kataria if (!cpu_has_tsc) 11738fbbc4b4SAlok Kataria return; 11748fbbc4b4SAlok Kataria 11752d826404SThomas Gleixner tsc_khz = x86_platform.calibrate_tsc(); 1176e93ef949SAlok Kataria cpu_khz = tsc_khz; 11778fbbc4b4SAlok Kataria 1178e93ef949SAlok Kataria if (!tsc_khz) { 11798fbbc4b4SAlok Kataria mark_tsc_unstable("could not calculate TSC khz"); 11808fbbc4b4SAlok Kataria return; 11818fbbc4b4SAlok Kataria } 11828fbbc4b4SAlok Kataria 1183c767a54bSJoe Perches pr_info("Detected %lu.%03lu MHz processor\n", 11848fbbc4b4SAlok Kataria (unsigned long)cpu_khz / 1000, 11858fbbc4b4SAlok Kataria (unsigned long)cpu_khz % 1000); 11868fbbc4b4SAlok Kataria 11878fbbc4b4SAlok Kataria /* 11888fbbc4b4SAlok Kataria * Secondary CPUs do not run through tsc_init(), so set up 11898fbbc4b4SAlok Kataria * all the scale factors for all CPUs, assuming the same 11908fbbc4b4SAlok Kataria * speed as the bootup CPU. (cpufreq notifiers will fix this 11918fbbc4b4SAlok Kataria * up if their speed diverges) 11928fbbc4b4SAlok Kataria */ 119320d1c86aSPeter Zijlstra for_each_possible_cpu(cpu) { 119420d1c86aSPeter Zijlstra cyc2ns_init(cpu); 11958fbbc4b4SAlok Kataria set_cyc2ns_scale(cpu_khz, cpu); 119620d1c86aSPeter Zijlstra } 11978fbbc4b4SAlok Kataria 11988fbbc4b4SAlok Kataria if (tsc_disabled > 0) 11998fbbc4b4SAlok Kataria return; 12008fbbc4b4SAlok Kataria 12018fbbc4b4SAlok Kataria /* now allow native_sched_clock() to use rdtsc */ 120210b033d4SPeter Zijlstra 12038fbbc4b4SAlok Kataria tsc_disabled = 0; 120410b033d4SPeter Zijlstra static_key_slow_inc(&__use_tsc); 12058fbbc4b4SAlok Kataria 1206e82b8e4eSVenkatesh Pallipadi if (!no_sched_irq_time) 1207e82b8e4eSVenkatesh Pallipadi enable_sched_clock_irqtime(); 1208e82b8e4eSVenkatesh Pallipadi 120970de9a97SAlok Kataria lpj = ((u64)tsc_khz * 1000); 121070de9a97SAlok Kataria do_div(lpj, HZ); 121170de9a97SAlok Kataria lpj_fine = lpj; 121270de9a97SAlok Kataria 12138fbbc4b4SAlok Kataria use_tsc_delay(); 12148fbbc4b4SAlok Kataria 12158fbbc4b4SAlok Kataria if (unsynchronized_tsc()) 12168fbbc4b4SAlok Kataria mark_tsc_unstable("TSCs unsynchronized"); 12178fbbc4b4SAlok Kataria 1218395628efSAlok Kataria check_system_tsc_reliable(); 12198fbbc4b4SAlok Kataria } 12208fbbc4b4SAlok Kataria 1221b565201cSJack Steiner #ifdef CONFIG_SMP 1222b565201cSJack Steiner /* 1223b565201cSJack Steiner * If we have a constant TSC and are using the TSC for the delay loop, 1224b565201cSJack Steiner * we can skip clock calibration if another cpu in the same socket has already 1225b565201cSJack Steiner * been calibrated. This assumes that CONSTANT_TSC applies to all 1226b565201cSJack Steiner * cpus in the socket - this should be a safe assumption. 1227b565201cSJack Steiner */ 1228148f9bb8SPaul Gortmaker unsigned long calibrate_delay_is_known(void) 1229b565201cSJack Steiner { 1230b565201cSJack Steiner int i, cpu = smp_processor_id(); 1231b565201cSJack Steiner 1232b565201cSJack Steiner if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) 1233b565201cSJack Steiner return 0; 1234b565201cSJack Steiner 1235b565201cSJack Steiner for_each_online_cpu(i) 1236b565201cSJack Steiner if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id) 1237b565201cSJack Steiner return cpu_data(i).loops_per_jiffy; 1238b565201cSJack Steiner return 0; 1239b565201cSJack Steiner } 1240b565201cSJack Steiner #endif 1241