1c767a54bSJoe Perches #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 2c767a54bSJoe Perches 3bfc0f594SAlok Kataria #include <linux/kernel.h> 40ef95533SAlok Kataria #include <linux/sched.h> 50ef95533SAlok Kataria #include <linux/init.h> 60ef95533SAlok Kataria #include <linux/module.h> 70ef95533SAlok Kataria #include <linux/timer.h> 8bfc0f594SAlok Kataria #include <linux/acpi_pmtmr.h> 92dbe06faSAlok Kataria #include <linux/cpufreq.h> 108fbbc4b4SAlok Kataria #include <linux/delay.h> 118fbbc4b4SAlok Kataria #include <linux/clocksource.h> 128fbbc4b4SAlok Kataria #include <linux/percpu.h> 1308604bd9SArnd Bergmann #include <linux/timex.h> 1410b033d4SPeter Zijlstra #include <linux/static_key.h> 15bfc0f594SAlok Kataria 16bfc0f594SAlok Kataria #include <asm/hpet.h> 178fbbc4b4SAlok Kataria #include <asm/timer.h> 188fbbc4b4SAlok Kataria #include <asm/vgtod.h> 198fbbc4b4SAlok Kataria #include <asm/time.h> 208fbbc4b4SAlok Kataria #include <asm/delay.h> 2188b094fbSAlok Kataria #include <asm/hypervisor.h> 2208047c4fSThomas Gleixner #include <asm/nmi.h> 232d826404SThomas Gleixner #include <asm/x86_init.h> 240ef95533SAlok Kataria 25f24ade3aSIngo Molnar unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ 260ef95533SAlok Kataria EXPORT_SYMBOL(cpu_khz); 27f24ade3aSIngo Molnar 28f24ade3aSIngo Molnar unsigned int __read_mostly tsc_khz; 290ef95533SAlok Kataria EXPORT_SYMBOL(tsc_khz); 300ef95533SAlok Kataria 310ef95533SAlok Kataria /* 320ef95533SAlok Kataria * TSC can be unstable due to cpufreq or due to unsynced TSCs 330ef95533SAlok Kataria */ 34f24ade3aSIngo Molnar static int __read_mostly tsc_unstable; 350ef95533SAlok Kataria 360ef95533SAlok Kataria /* native_sched_clock() is called before tsc_init(), so 370ef95533SAlok Kataria we must start with the TSC soft disabled to prevent 380ef95533SAlok Kataria erroneous rdtsc usage on !cpu_has_tsc processors */ 39f24ade3aSIngo Molnar static int __read_mostly tsc_disabled = -1; 400ef95533SAlok Kataria 4110b033d4SPeter Zijlstra static struct static_key __use_tsc = STATIC_KEY_INIT; 4210b033d4SPeter Zijlstra 4328a00184SSuresh Siddha int tsc_clocksource_reliable; 4457c67da2SPeter Zijlstra 4520d1c86aSPeter Zijlstra /* 4620d1c86aSPeter Zijlstra * Use a ring-buffer like data structure, where a writer advances the head by 4720d1c86aSPeter Zijlstra * writing a new data entry and a reader advances the tail when it observes a 4820d1c86aSPeter Zijlstra * new entry. 4920d1c86aSPeter Zijlstra * 5020d1c86aSPeter Zijlstra * Writers are made to wait on readers until there's space to write a new 5120d1c86aSPeter Zijlstra * entry. 5220d1c86aSPeter Zijlstra * 5320d1c86aSPeter Zijlstra * This means that we can always use an {offset, mul} pair to compute a ns 5420d1c86aSPeter Zijlstra * value that is 'roughly' in the right direction, even if we're writing a new 5520d1c86aSPeter Zijlstra * {offset, mul} pair during the clock read. 5620d1c86aSPeter Zijlstra * 5720d1c86aSPeter Zijlstra * The down-side is that we can no longer guarantee strict monotonicity anymore 5820d1c86aSPeter Zijlstra * (assuming the TSC was that to begin with), because while we compute the 5920d1c86aSPeter Zijlstra * intersection point of the two clock slopes and make sure the time is 6020d1c86aSPeter Zijlstra * continuous at the point of switching; we can no longer guarantee a reader is 6120d1c86aSPeter Zijlstra * strictly before or after the switch point. 6220d1c86aSPeter Zijlstra * 6320d1c86aSPeter Zijlstra * It does mean a reader no longer needs to disable IRQs in order to avoid 6420d1c86aSPeter Zijlstra * CPU-Freq updates messing with his times, and similarly an NMI reader will 6520d1c86aSPeter Zijlstra * no longer run the risk of hitting half-written state. 6620d1c86aSPeter Zijlstra */ 6720d1c86aSPeter Zijlstra 6820d1c86aSPeter Zijlstra struct cyc2ns { 6920d1c86aSPeter Zijlstra struct cyc2ns_data data[2]; /* 0 + 2*24 = 48 */ 7020d1c86aSPeter Zijlstra struct cyc2ns_data *head; /* 48 + 8 = 56 */ 7120d1c86aSPeter Zijlstra struct cyc2ns_data *tail; /* 56 + 8 = 64 */ 7220d1c86aSPeter Zijlstra }; /* exactly fits one cacheline */ 7320d1c86aSPeter Zijlstra 7420d1c86aSPeter Zijlstra static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); 7520d1c86aSPeter Zijlstra 7620d1c86aSPeter Zijlstra struct cyc2ns_data *cyc2ns_read_begin(void) 7720d1c86aSPeter Zijlstra { 7820d1c86aSPeter Zijlstra struct cyc2ns_data *head; 7920d1c86aSPeter Zijlstra 8020d1c86aSPeter Zijlstra preempt_disable(); 8120d1c86aSPeter Zijlstra 8220d1c86aSPeter Zijlstra head = this_cpu_read(cyc2ns.head); 8320d1c86aSPeter Zijlstra /* 8420d1c86aSPeter Zijlstra * Ensure we observe the entry when we observe the pointer to it. 8520d1c86aSPeter Zijlstra * matches the wmb from cyc2ns_write_end(). 8620d1c86aSPeter Zijlstra */ 8720d1c86aSPeter Zijlstra smp_read_barrier_depends(); 8820d1c86aSPeter Zijlstra head->__count++; 8920d1c86aSPeter Zijlstra barrier(); 9020d1c86aSPeter Zijlstra 9120d1c86aSPeter Zijlstra return head; 9220d1c86aSPeter Zijlstra } 9320d1c86aSPeter Zijlstra 9420d1c86aSPeter Zijlstra void cyc2ns_read_end(struct cyc2ns_data *head) 9520d1c86aSPeter Zijlstra { 9620d1c86aSPeter Zijlstra barrier(); 9720d1c86aSPeter Zijlstra /* 9820d1c86aSPeter Zijlstra * If we're the outer most nested read; update the tail pointer 9920d1c86aSPeter Zijlstra * when we're done. This notifies possible pending writers 10020d1c86aSPeter Zijlstra * that we've observed the head pointer and that the other 10120d1c86aSPeter Zijlstra * entry is now free. 10220d1c86aSPeter Zijlstra */ 10320d1c86aSPeter Zijlstra if (!--head->__count) { 10420d1c86aSPeter Zijlstra /* 10520d1c86aSPeter Zijlstra * x86-TSO does not reorder writes with older reads; 10620d1c86aSPeter Zijlstra * therefore once this write becomes visible to another 10720d1c86aSPeter Zijlstra * cpu, we must be finished reading the cyc2ns_data. 10820d1c86aSPeter Zijlstra * 10920d1c86aSPeter Zijlstra * matches with cyc2ns_write_begin(). 11020d1c86aSPeter Zijlstra */ 11120d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.tail, head); 11220d1c86aSPeter Zijlstra } 11320d1c86aSPeter Zijlstra preempt_enable(); 11420d1c86aSPeter Zijlstra } 11520d1c86aSPeter Zijlstra 11620d1c86aSPeter Zijlstra /* 11720d1c86aSPeter Zijlstra * Begin writing a new @data entry for @cpu. 11820d1c86aSPeter Zijlstra * 11920d1c86aSPeter Zijlstra * Assumes some sort of write side lock; currently 'provided' by the assumption 12020d1c86aSPeter Zijlstra * that cpufreq will call its notifiers sequentially. 12120d1c86aSPeter Zijlstra */ 12220d1c86aSPeter Zijlstra static struct cyc2ns_data *cyc2ns_write_begin(int cpu) 12320d1c86aSPeter Zijlstra { 12420d1c86aSPeter Zijlstra struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); 12520d1c86aSPeter Zijlstra struct cyc2ns_data *data = c2n->data; 12620d1c86aSPeter Zijlstra 12720d1c86aSPeter Zijlstra if (data == c2n->head) 12820d1c86aSPeter Zijlstra data++; 12920d1c86aSPeter Zijlstra 13020d1c86aSPeter Zijlstra /* XXX send an IPI to @cpu in order to guarantee a read? */ 13120d1c86aSPeter Zijlstra 13220d1c86aSPeter Zijlstra /* 13320d1c86aSPeter Zijlstra * When we observe the tail write from cyc2ns_read_end(), 13420d1c86aSPeter Zijlstra * the cpu must be done with that entry and its safe 13520d1c86aSPeter Zijlstra * to start writing to it. 13620d1c86aSPeter Zijlstra */ 13720d1c86aSPeter Zijlstra while (c2n->tail == data) 13820d1c86aSPeter Zijlstra cpu_relax(); 13920d1c86aSPeter Zijlstra 14020d1c86aSPeter Zijlstra return data; 14120d1c86aSPeter Zijlstra } 14220d1c86aSPeter Zijlstra 14320d1c86aSPeter Zijlstra static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data) 14420d1c86aSPeter Zijlstra { 14520d1c86aSPeter Zijlstra struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); 14620d1c86aSPeter Zijlstra 14720d1c86aSPeter Zijlstra /* 14820d1c86aSPeter Zijlstra * Ensure the @data writes are visible before we publish the 14920d1c86aSPeter Zijlstra * entry. Matches the data-depencency in cyc2ns_read_begin(). 15020d1c86aSPeter Zijlstra */ 15120d1c86aSPeter Zijlstra smp_wmb(); 15220d1c86aSPeter Zijlstra 15320d1c86aSPeter Zijlstra ACCESS_ONCE(c2n->head) = data; 15420d1c86aSPeter Zijlstra } 15520d1c86aSPeter Zijlstra 15620d1c86aSPeter Zijlstra /* 15720d1c86aSPeter Zijlstra * Accelerators for sched_clock() 15857c67da2SPeter Zijlstra * convert from cycles(64bits) => nanoseconds (64bits) 15957c67da2SPeter Zijlstra * basic equation: 16057c67da2SPeter Zijlstra * ns = cycles / (freq / ns_per_sec) 16157c67da2SPeter Zijlstra * ns = cycles * (ns_per_sec / freq) 16257c67da2SPeter Zijlstra * ns = cycles * (10^9 / (cpu_khz * 10^3)) 16357c67da2SPeter Zijlstra * ns = cycles * (10^6 / cpu_khz) 16457c67da2SPeter Zijlstra * 16557c67da2SPeter Zijlstra * Then we use scaling math (suggested by george@mvista.com) to get: 16657c67da2SPeter Zijlstra * ns = cycles * (10^6 * SC / cpu_khz) / SC 16757c67da2SPeter Zijlstra * ns = cycles * cyc2ns_scale / SC 16857c67da2SPeter Zijlstra * 16957c67da2SPeter Zijlstra * And since SC is a constant power of two, we can convert the div 17057c67da2SPeter Zijlstra * into a shift. 17157c67da2SPeter Zijlstra * 17257c67da2SPeter Zijlstra * We can use khz divisor instead of mhz to keep a better precision, since 17357c67da2SPeter Zijlstra * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. 17457c67da2SPeter Zijlstra * (mathieu.desnoyers@polymtl.ca) 17557c67da2SPeter Zijlstra * 17657c67da2SPeter Zijlstra * -johnstul@us.ibm.com "math is hard, lets go shopping!" 17757c67da2SPeter Zijlstra */ 17857c67da2SPeter Zijlstra 17957c67da2SPeter Zijlstra #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ 18057c67da2SPeter Zijlstra 18120d1c86aSPeter Zijlstra static void cyc2ns_data_init(struct cyc2ns_data *data) 18220d1c86aSPeter Zijlstra { 1835e3c1afdSPeter Zijlstra data->cyc2ns_mul = 0; 18420d1c86aSPeter Zijlstra data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; 18520d1c86aSPeter Zijlstra data->cyc2ns_offset = 0; 18620d1c86aSPeter Zijlstra data->__count = 0; 18720d1c86aSPeter Zijlstra } 18820d1c86aSPeter Zijlstra 18920d1c86aSPeter Zijlstra static void cyc2ns_init(int cpu) 19020d1c86aSPeter Zijlstra { 19120d1c86aSPeter Zijlstra struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); 19220d1c86aSPeter Zijlstra 19320d1c86aSPeter Zijlstra cyc2ns_data_init(&c2n->data[0]); 19420d1c86aSPeter Zijlstra cyc2ns_data_init(&c2n->data[1]); 19520d1c86aSPeter Zijlstra 19620d1c86aSPeter Zijlstra c2n->head = c2n->data; 19720d1c86aSPeter Zijlstra c2n->tail = c2n->data; 19820d1c86aSPeter Zijlstra } 19920d1c86aSPeter Zijlstra 20057c67da2SPeter Zijlstra static inline unsigned long long cycles_2_ns(unsigned long long cyc) 20157c67da2SPeter Zijlstra { 20220d1c86aSPeter Zijlstra struct cyc2ns_data *data, *tail; 20320d1c86aSPeter Zijlstra unsigned long long ns; 20420d1c86aSPeter Zijlstra 20520d1c86aSPeter Zijlstra /* 20620d1c86aSPeter Zijlstra * See cyc2ns_read_*() for details; replicated in order to avoid 20720d1c86aSPeter Zijlstra * an extra few instructions that came with the abstraction. 20820d1c86aSPeter Zijlstra * Notable, it allows us to only do the __count and tail update 20920d1c86aSPeter Zijlstra * dance when its actually needed. 21020d1c86aSPeter Zijlstra */ 21120d1c86aSPeter Zijlstra 212569d6557SSteven Rostedt preempt_disable_notrace(); 21320d1c86aSPeter Zijlstra data = this_cpu_read(cyc2ns.head); 21420d1c86aSPeter Zijlstra tail = this_cpu_read(cyc2ns.tail); 21520d1c86aSPeter Zijlstra 21620d1c86aSPeter Zijlstra if (likely(data == tail)) { 21720d1c86aSPeter Zijlstra ns = data->cyc2ns_offset; 21820d1c86aSPeter Zijlstra ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); 21920d1c86aSPeter Zijlstra } else { 22020d1c86aSPeter Zijlstra data->__count++; 22120d1c86aSPeter Zijlstra 22220d1c86aSPeter Zijlstra barrier(); 22320d1c86aSPeter Zijlstra 22420d1c86aSPeter Zijlstra ns = data->cyc2ns_offset; 22520d1c86aSPeter Zijlstra ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); 22620d1c86aSPeter Zijlstra 22720d1c86aSPeter Zijlstra barrier(); 22820d1c86aSPeter Zijlstra 22920d1c86aSPeter Zijlstra if (!--data->__count) 23020d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.tail, data); 23120d1c86aSPeter Zijlstra } 232569d6557SSteven Rostedt preempt_enable_notrace(); 23320d1c86aSPeter Zijlstra 23457c67da2SPeter Zijlstra return ns; 23557c67da2SPeter Zijlstra } 23657c67da2SPeter Zijlstra 23757c67da2SPeter Zijlstra static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) 23857c67da2SPeter Zijlstra { 23920d1c86aSPeter Zijlstra unsigned long long tsc_now, ns_now; 24020d1c86aSPeter Zijlstra struct cyc2ns_data *data; 24120d1c86aSPeter Zijlstra unsigned long flags; 24257c67da2SPeter Zijlstra 24357c67da2SPeter Zijlstra local_irq_save(flags); 24457c67da2SPeter Zijlstra sched_clock_idle_sleep_event(); 24557c67da2SPeter Zijlstra 24620d1c86aSPeter Zijlstra if (!cpu_khz) 24720d1c86aSPeter Zijlstra goto done; 24820d1c86aSPeter Zijlstra 24920d1c86aSPeter Zijlstra data = cyc2ns_write_begin(cpu); 25057c67da2SPeter Zijlstra 2514ea1636bSAndy Lutomirski tsc_now = rdtsc(); 25257c67da2SPeter Zijlstra ns_now = cycles_2_ns(tsc_now); 25357c67da2SPeter Zijlstra 25420d1c86aSPeter Zijlstra /* 25520d1c86aSPeter Zijlstra * Compute a new multiplier as per the above comment and ensure our 25620d1c86aSPeter Zijlstra * time function is continuous; see the comment near struct 25720d1c86aSPeter Zijlstra * cyc2ns_data. 25820d1c86aSPeter Zijlstra */ 25989171579SMichal Nazarewicz data->cyc2ns_mul = 26089171579SMichal Nazarewicz DIV_ROUND_CLOSEST(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR, 26189171579SMichal Nazarewicz cpu_khz); 26220d1c86aSPeter Zijlstra data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; 26320d1c86aSPeter Zijlstra data->cyc2ns_offset = ns_now - 26420d1c86aSPeter Zijlstra mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); 26557c67da2SPeter Zijlstra 26620d1c86aSPeter Zijlstra cyc2ns_write_end(cpu, data); 26720d1c86aSPeter Zijlstra 26820d1c86aSPeter Zijlstra done: 26957c67da2SPeter Zijlstra sched_clock_idle_wakeup_event(0); 27057c67da2SPeter Zijlstra local_irq_restore(flags); 27157c67da2SPeter Zijlstra } 2720ef95533SAlok Kataria /* 2730ef95533SAlok Kataria * Scheduler clock - returns current time in nanosec units. 2740ef95533SAlok Kataria */ 2750ef95533SAlok Kataria u64 native_sched_clock(void) 2760ef95533SAlok Kataria { 27720d1c86aSPeter Zijlstra u64 tsc_now; 2780ef95533SAlok Kataria 2790ef95533SAlok Kataria /* 2800ef95533SAlok Kataria * Fall back to jiffies if there's no TSC available: 2810ef95533SAlok Kataria * ( But note that we still use it if the TSC is marked 2820ef95533SAlok Kataria * unstable. We do this because unlike Time Of Day, 2830ef95533SAlok Kataria * the scheduler clock tolerates small errors and it's 2840ef95533SAlok Kataria * very important for it to be as fast as the platform 2853ad2f3fbSDaniel Mack * can achieve it. ) 2860ef95533SAlok Kataria */ 28710b033d4SPeter Zijlstra if (!static_key_false(&__use_tsc)) { 2880ef95533SAlok Kataria /* No locking but a rare wrong value is not a big deal: */ 2890ef95533SAlok Kataria return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); 2900ef95533SAlok Kataria } 2910ef95533SAlok Kataria 2920ef95533SAlok Kataria /* read the Time Stamp Counter: */ 2934ea1636bSAndy Lutomirski tsc_now = rdtsc(); 2940ef95533SAlok Kataria 2950ef95533SAlok Kataria /* return the value in ns */ 29620d1c86aSPeter Zijlstra return cycles_2_ns(tsc_now); 2970ef95533SAlok Kataria } 2980ef95533SAlok Kataria 2990ef95533SAlok Kataria /* We need to define a real function for sched_clock, to override the 3000ef95533SAlok Kataria weak default version */ 3010ef95533SAlok Kataria #ifdef CONFIG_PARAVIRT 3020ef95533SAlok Kataria unsigned long long sched_clock(void) 3030ef95533SAlok Kataria { 3040ef95533SAlok Kataria return paravirt_sched_clock(); 3050ef95533SAlok Kataria } 3060ef95533SAlok Kataria #else 3070ef95533SAlok Kataria unsigned long long 3080ef95533SAlok Kataria sched_clock(void) __attribute__((alias("native_sched_clock"))); 3090ef95533SAlok Kataria #endif 3100ef95533SAlok Kataria 3110ef95533SAlok Kataria int check_tsc_unstable(void) 3120ef95533SAlok Kataria { 3130ef95533SAlok Kataria return tsc_unstable; 3140ef95533SAlok Kataria } 3150ef95533SAlok Kataria EXPORT_SYMBOL_GPL(check_tsc_unstable); 3160ef95533SAlok Kataria 317c73deb6aSAdrian Hunter int check_tsc_disabled(void) 318c73deb6aSAdrian Hunter { 319c73deb6aSAdrian Hunter return tsc_disabled; 320c73deb6aSAdrian Hunter } 321c73deb6aSAdrian Hunter EXPORT_SYMBOL_GPL(check_tsc_disabled); 322c73deb6aSAdrian Hunter 3230ef95533SAlok Kataria #ifdef CONFIG_X86_TSC 3240ef95533SAlok Kataria int __init notsc_setup(char *str) 3250ef95533SAlok Kataria { 326c767a54bSJoe Perches pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n"); 3270ef95533SAlok Kataria tsc_disabled = 1; 3280ef95533SAlok Kataria return 1; 3290ef95533SAlok Kataria } 3300ef95533SAlok Kataria #else 3310ef95533SAlok Kataria /* 3320ef95533SAlok Kataria * disable flag for tsc. Takes effect by clearing the TSC cpu flag 3330ef95533SAlok Kataria * in cpu/common.c 3340ef95533SAlok Kataria */ 3350ef95533SAlok Kataria int __init notsc_setup(char *str) 3360ef95533SAlok Kataria { 3370ef95533SAlok Kataria setup_clear_cpu_cap(X86_FEATURE_TSC); 3380ef95533SAlok Kataria return 1; 3390ef95533SAlok Kataria } 3400ef95533SAlok Kataria #endif 3410ef95533SAlok Kataria 3420ef95533SAlok Kataria __setup("notsc", notsc_setup); 343bfc0f594SAlok Kataria 344e82b8e4eSVenkatesh Pallipadi static int no_sched_irq_time; 345e82b8e4eSVenkatesh Pallipadi 346395628efSAlok Kataria static int __init tsc_setup(char *str) 347395628efSAlok Kataria { 348395628efSAlok Kataria if (!strcmp(str, "reliable")) 349395628efSAlok Kataria tsc_clocksource_reliable = 1; 350e82b8e4eSVenkatesh Pallipadi if (!strncmp(str, "noirqtime", 9)) 351e82b8e4eSVenkatesh Pallipadi no_sched_irq_time = 1; 352395628efSAlok Kataria return 1; 353395628efSAlok Kataria } 354395628efSAlok Kataria 355395628efSAlok Kataria __setup("tsc=", tsc_setup); 356395628efSAlok Kataria 357bfc0f594SAlok Kataria #define MAX_RETRIES 5 358bfc0f594SAlok Kataria #define SMI_TRESHOLD 50000 359bfc0f594SAlok Kataria 360bfc0f594SAlok Kataria /* 361bfc0f594SAlok Kataria * Read TSC and the reference counters. Take care of SMI disturbance 362bfc0f594SAlok Kataria */ 363827014beSThomas Gleixner static u64 tsc_read_refs(u64 *p, int hpet) 364bfc0f594SAlok Kataria { 365bfc0f594SAlok Kataria u64 t1, t2; 366bfc0f594SAlok Kataria int i; 367bfc0f594SAlok Kataria 368bfc0f594SAlok Kataria for (i = 0; i < MAX_RETRIES; i++) { 369bfc0f594SAlok Kataria t1 = get_cycles(); 370bfc0f594SAlok Kataria if (hpet) 371827014beSThomas Gleixner *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; 372bfc0f594SAlok Kataria else 373827014beSThomas Gleixner *p = acpi_pm_read_early(); 374bfc0f594SAlok Kataria t2 = get_cycles(); 375bfc0f594SAlok Kataria if ((t2 - t1) < SMI_TRESHOLD) 376bfc0f594SAlok Kataria return t2; 377bfc0f594SAlok Kataria } 378bfc0f594SAlok Kataria return ULLONG_MAX; 379bfc0f594SAlok Kataria } 380bfc0f594SAlok Kataria 381ec0c15afSLinus Torvalds /* 382d683ef7aSThomas Gleixner * Calculate the TSC frequency from HPET reference 383d683ef7aSThomas Gleixner */ 384d683ef7aSThomas Gleixner static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) 385d683ef7aSThomas Gleixner { 386d683ef7aSThomas Gleixner u64 tmp; 387d683ef7aSThomas Gleixner 388d683ef7aSThomas Gleixner if (hpet2 < hpet1) 389d683ef7aSThomas Gleixner hpet2 += 0x100000000ULL; 390d683ef7aSThomas Gleixner hpet2 -= hpet1; 391d683ef7aSThomas Gleixner tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); 392d683ef7aSThomas Gleixner do_div(tmp, 1000000); 393d683ef7aSThomas Gleixner do_div(deltatsc, tmp); 394d683ef7aSThomas Gleixner 395d683ef7aSThomas Gleixner return (unsigned long) deltatsc; 396d683ef7aSThomas Gleixner } 397d683ef7aSThomas Gleixner 398d683ef7aSThomas Gleixner /* 399d683ef7aSThomas Gleixner * Calculate the TSC frequency from PMTimer reference 400d683ef7aSThomas Gleixner */ 401d683ef7aSThomas Gleixner static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) 402d683ef7aSThomas Gleixner { 403d683ef7aSThomas Gleixner u64 tmp; 404d683ef7aSThomas Gleixner 405d683ef7aSThomas Gleixner if (!pm1 && !pm2) 406d683ef7aSThomas Gleixner return ULONG_MAX; 407d683ef7aSThomas Gleixner 408d683ef7aSThomas Gleixner if (pm2 < pm1) 409d683ef7aSThomas Gleixner pm2 += (u64)ACPI_PM_OVRRUN; 410d683ef7aSThomas Gleixner pm2 -= pm1; 411d683ef7aSThomas Gleixner tmp = pm2 * 1000000000LL; 412d683ef7aSThomas Gleixner do_div(tmp, PMTMR_TICKS_PER_SEC); 413d683ef7aSThomas Gleixner do_div(deltatsc, tmp); 414d683ef7aSThomas Gleixner 415d683ef7aSThomas Gleixner return (unsigned long) deltatsc; 416d683ef7aSThomas Gleixner } 417d683ef7aSThomas Gleixner 418a977c400SThomas Gleixner #define CAL_MS 10 419b7743970SDeepak Saxena #define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS)) 420a977c400SThomas Gleixner #define CAL_PIT_LOOPS 1000 421a977c400SThomas Gleixner 422a977c400SThomas Gleixner #define CAL2_MS 50 423b7743970SDeepak Saxena #define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS)) 424a977c400SThomas Gleixner #define CAL2_PIT_LOOPS 5000 425a977c400SThomas Gleixner 426cce3e057SThomas Gleixner 427ec0c15afSLinus Torvalds /* 428ec0c15afSLinus Torvalds * Try to calibrate the TSC against the Programmable 429ec0c15afSLinus Torvalds * Interrupt Timer and return the frequency of the TSC 430ec0c15afSLinus Torvalds * in kHz. 431ec0c15afSLinus Torvalds * 432ec0c15afSLinus Torvalds * Return ULONG_MAX on failure to calibrate. 433ec0c15afSLinus Torvalds */ 434a977c400SThomas Gleixner static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) 435ec0c15afSLinus Torvalds { 436ec0c15afSLinus Torvalds u64 tsc, t1, t2, delta; 437ec0c15afSLinus Torvalds unsigned long tscmin, tscmax; 438ec0c15afSLinus Torvalds int pitcnt; 439ec0c15afSLinus Torvalds 440ec0c15afSLinus Torvalds /* Set the Gate high, disable speaker */ 441ec0c15afSLinus Torvalds outb((inb(0x61) & ~0x02) | 0x01, 0x61); 442ec0c15afSLinus Torvalds 443ec0c15afSLinus Torvalds /* 444ec0c15afSLinus Torvalds * Setup CTC channel 2* for mode 0, (interrupt on terminal 445ec0c15afSLinus Torvalds * count mode), binary count. Set the latch register to 50ms 446ec0c15afSLinus Torvalds * (LSB then MSB) to begin countdown. 447ec0c15afSLinus Torvalds */ 448ec0c15afSLinus Torvalds outb(0xb0, 0x43); 449a977c400SThomas Gleixner outb(latch & 0xff, 0x42); 450a977c400SThomas Gleixner outb(latch >> 8, 0x42); 451ec0c15afSLinus Torvalds 452ec0c15afSLinus Torvalds tsc = t1 = t2 = get_cycles(); 453ec0c15afSLinus Torvalds 454ec0c15afSLinus Torvalds pitcnt = 0; 455ec0c15afSLinus Torvalds tscmax = 0; 456ec0c15afSLinus Torvalds tscmin = ULONG_MAX; 457ec0c15afSLinus Torvalds while ((inb(0x61) & 0x20) == 0) { 458ec0c15afSLinus Torvalds t2 = get_cycles(); 459ec0c15afSLinus Torvalds delta = t2 - tsc; 460ec0c15afSLinus Torvalds tsc = t2; 461ec0c15afSLinus Torvalds if ((unsigned long) delta < tscmin) 462ec0c15afSLinus Torvalds tscmin = (unsigned int) delta; 463ec0c15afSLinus Torvalds if ((unsigned long) delta > tscmax) 464ec0c15afSLinus Torvalds tscmax = (unsigned int) delta; 465ec0c15afSLinus Torvalds pitcnt++; 466ec0c15afSLinus Torvalds } 467ec0c15afSLinus Torvalds 468ec0c15afSLinus Torvalds /* 469ec0c15afSLinus Torvalds * Sanity checks: 470ec0c15afSLinus Torvalds * 471a977c400SThomas Gleixner * If we were not able to read the PIT more than loopmin 472ec0c15afSLinus Torvalds * times, then we have been hit by a massive SMI 473ec0c15afSLinus Torvalds * 474ec0c15afSLinus Torvalds * If the maximum is 10 times larger than the minimum, 475ec0c15afSLinus Torvalds * then we got hit by an SMI as well. 476ec0c15afSLinus Torvalds */ 477a977c400SThomas Gleixner if (pitcnt < loopmin || tscmax > 10 * tscmin) 478ec0c15afSLinus Torvalds return ULONG_MAX; 479ec0c15afSLinus Torvalds 480ec0c15afSLinus Torvalds /* Calculate the PIT value */ 481ec0c15afSLinus Torvalds delta = t2 - t1; 482a977c400SThomas Gleixner do_div(delta, ms); 483ec0c15afSLinus Torvalds return delta; 484ec0c15afSLinus Torvalds } 485ec0c15afSLinus Torvalds 4866ac40ed0SLinus Torvalds /* 4876ac40ed0SLinus Torvalds * This reads the current MSB of the PIT counter, and 4886ac40ed0SLinus Torvalds * checks if we are running on sufficiently fast and 4896ac40ed0SLinus Torvalds * non-virtualized hardware. 4906ac40ed0SLinus Torvalds * 4916ac40ed0SLinus Torvalds * Our expectations are: 4926ac40ed0SLinus Torvalds * 4936ac40ed0SLinus Torvalds * - the PIT is running at roughly 1.19MHz 4946ac40ed0SLinus Torvalds * 4956ac40ed0SLinus Torvalds * - each IO is going to take about 1us on real hardware, 4966ac40ed0SLinus Torvalds * but we allow it to be much faster (by a factor of 10) or 4976ac40ed0SLinus Torvalds * _slightly_ slower (ie we allow up to a 2us read+counter 4986ac40ed0SLinus Torvalds * update - anything else implies a unacceptably slow CPU 4996ac40ed0SLinus Torvalds * or PIT for the fast calibration to work. 5006ac40ed0SLinus Torvalds * 5016ac40ed0SLinus Torvalds * - with 256 PIT ticks to read the value, we have 214us to 5026ac40ed0SLinus Torvalds * see the same MSB (and overhead like doing a single TSC 5036ac40ed0SLinus Torvalds * read per MSB value etc). 5046ac40ed0SLinus Torvalds * 5056ac40ed0SLinus Torvalds * - We're doing 2 reads per loop (LSB, MSB), and we expect 5066ac40ed0SLinus Torvalds * them each to take about a microsecond on real hardware. 5076ac40ed0SLinus Torvalds * So we expect a count value of around 100. But we'll be 5086ac40ed0SLinus Torvalds * generous, and accept anything over 50. 5096ac40ed0SLinus Torvalds * 5106ac40ed0SLinus Torvalds * - if the PIT is stuck, and we see *many* more reads, we 5116ac40ed0SLinus Torvalds * return early (and the next caller of pit_expect_msb() 5126ac40ed0SLinus Torvalds * then consider it a failure when they don't see the 5136ac40ed0SLinus Torvalds * next expected value). 5146ac40ed0SLinus Torvalds * 5156ac40ed0SLinus Torvalds * These expectations mean that we know that we have seen the 5166ac40ed0SLinus Torvalds * transition from one expected value to another with a fairly 5176ac40ed0SLinus Torvalds * high accuracy, and we didn't miss any events. We can thus 5186ac40ed0SLinus Torvalds * use the TSC value at the transitions to calculate a pretty 5196ac40ed0SLinus Torvalds * good value for the TSC frequencty. 5206ac40ed0SLinus Torvalds */ 521b6e61eefSLinus Torvalds static inline int pit_verify_msb(unsigned char val) 522b6e61eefSLinus Torvalds { 523b6e61eefSLinus Torvalds /* Ignore LSB */ 524b6e61eefSLinus Torvalds inb(0x42); 525b6e61eefSLinus Torvalds return inb(0x42) == val; 526b6e61eefSLinus Torvalds } 527b6e61eefSLinus Torvalds 5289e8912e0SLinus Torvalds static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) 5296ac40ed0SLinus Torvalds { 5309e8912e0SLinus Torvalds int count; 53168f30fbeSLinus Torvalds u64 tsc = 0, prev_tsc = 0; 5326ac40ed0SLinus Torvalds 5336ac40ed0SLinus Torvalds for (count = 0; count < 50000; count++) { 534b6e61eefSLinus Torvalds if (!pit_verify_msb(val)) 5356ac40ed0SLinus Torvalds break; 53668f30fbeSLinus Torvalds prev_tsc = tsc; 5379e8912e0SLinus Torvalds tsc = get_cycles(); 5386ac40ed0SLinus Torvalds } 53968f30fbeSLinus Torvalds *deltap = get_cycles() - prev_tsc; 5409e8912e0SLinus Torvalds *tscp = tsc; 5419e8912e0SLinus Torvalds 5429e8912e0SLinus Torvalds /* 5439e8912e0SLinus Torvalds * We require _some_ success, but the quality control 5449e8912e0SLinus Torvalds * will be based on the error terms on the TSC values. 5459e8912e0SLinus Torvalds */ 5469e8912e0SLinus Torvalds return count > 5; 5476ac40ed0SLinus Torvalds } 5486ac40ed0SLinus Torvalds 5496ac40ed0SLinus Torvalds /* 5509e8912e0SLinus Torvalds * How many MSB values do we want to see? We aim for 5519e8912e0SLinus Torvalds * a maximum error rate of 500ppm (in practice the 5529e8912e0SLinus Torvalds * real error is much smaller), but refuse to spend 55368f30fbeSLinus Torvalds * more than 50ms on it. 5546ac40ed0SLinus Torvalds */ 55568f30fbeSLinus Torvalds #define MAX_QUICK_PIT_MS 50 5569e8912e0SLinus Torvalds #define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) 5576ac40ed0SLinus Torvalds 5586ac40ed0SLinus Torvalds static unsigned long quick_pit_calibrate(void) 5596ac40ed0SLinus Torvalds { 5609e8912e0SLinus Torvalds int i; 5619e8912e0SLinus Torvalds u64 tsc, delta; 5629e8912e0SLinus Torvalds unsigned long d1, d2; 5639e8912e0SLinus Torvalds 5646ac40ed0SLinus Torvalds /* Set the Gate high, disable speaker */ 5656ac40ed0SLinus Torvalds outb((inb(0x61) & ~0x02) | 0x01, 0x61); 5666ac40ed0SLinus Torvalds 5676ac40ed0SLinus Torvalds /* 5686ac40ed0SLinus Torvalds * Counter 2, mode 0 (one-shot), binary count 5696ac40ed0SLinus Torvalds * 5706ac40ed0SLinus Torvalds * NOTE! Mode 2 decrements by two (and then the 5716ac40ed0SLinus Torvalds * output is flipped each time, giving the same 5726ac40ed0SLinus Torvalds * final output frequency as a decrement-by-one), 5736ac40ed0SLinus Torvalds * so mode 0 is much better when looking at the 5746ac40ed0SLinus Torvalds * individual counts. 5756ac40ed0SLinus Torvalds */ 5766ac40ed0SLinus Torvalds outb(0xb0, 0x43); 5776ac40ed0SLinus Torvalds 5786ac40ed0SLinus Torvalds /* Start at 0xffff */ 5796ac40ed0SLinus Torvalds outb(0xff, 0x42); 5806ac40ed0SLinus Torvalds outb(0xff, 0x42); 5816ac40ed0SLinus Torvalds 582a6a80e1dSLinus Torvalds /* 583a6a80e1dSLinus Torvalds * The PIT starts counting at the next edge, so we 584a6a80e1dSLinus Torvalds * need to delay for a microsecond. The easiest way 585a6a80e1dSLinus Torvalds * to do that is to just read back the 16-bit counter 586a6a80e1dSLinus Torvalds * once from the PIT. 587a6a80e1dSLinus Torvalds */ 588b6e61eefSLinus Torvalds pit_verify_msb(0); 589a6a80e1dSLinus Torvalds 5909e8912e0SLinus Torvalds if (pit_expect_msb(0xff, &tsc, &d1)) { 5919e8912e0SLinus Torvalds for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { 5929e8912e0SLinus Torvalds if (!pit_expect_msb(0xff-i, &delta, &d2)) 5939e8912e0SLinus Torvalds break; 5946ac40ed0SLinus Torvalds 5956ac40ed0SLinus Torvalds /* 5969e8912e0SLinus Torvalds * Iterate until the error is less than 500 ppm 5974156e9a8SIngo Molnar */ 5989e8912e0SLinus Torvalds delta -= tsc; 599b6e61eefSLinus Torvalds if (d1+d2 >= delta >> 11) 600b6e61eefSLinus Torvalds continue; 601b6e61eefSLinus Torvalds 602b6e61eefSLinus Torvalds /* 603b6e61eefSLinus Torvalds * Check the PIT one more time to verify that 604b6e61eefSLinus Torvalds * all TSC reads were stable wrt the PIT. 605b6e61eefSLinus Torvalds * 606b6e61eefSLinus Torvalds * This also guarantees serialization of the 607b6e61eefSLinus Torvalds * last cycle read ('d2') in pit_expect_msb. 608b6e61eefSLinus Torvalds */ 609b6e61eefSLinus Torvalds if (!pit_verify_msb(0xfe - i)) 610b6e61eefSLinus Torvalds break; 6119e8912e0SLinus Torvalds goto success; 6129e8912e0SLinus Torvalds } 6139e8912e0SLinus Torvalds } 61452045217SAlexandre Demers pr_info("Fast TSC calibration failed\n"); 6159e8912e0SLinus Torvalds return 0; 6164156e9a8SIngo Molnar 6179e8912e0SLinus Torvalds success: 6184156e9a8SIngo Molnar /* 6196ac40ed0SLinus Torvalds * Ok, if we get here, then we've seen the 6209e8912e0SLinus Torvalds * MSB of the PIT decrement 'i' times, and the 6219e8912e0SLinus Torvalds * error has shrunk to less than 500 ppm. 6226ac40ed0SLinus Torvalds * 6236ac40ed0SLinus Torvalds * As a result, we can depend on there not being 6246ac40ed0SLinus Torvalds * any odd delays anywhere, and the TSC reads are 62568f30fbeSLinus Torvalds * reliable (within the error). 6266ac40ed0SLinus Torvalds * 6276ac40ed0SLinus Torvalds * kHz = ticks / time-in-seconds / 1000; 6289e8912e0SLinus Torvalds * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000 6299e8912e0SLinus Torvalds * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000) 6306ac40ed0SLinus Torvalds */ 6319e8912e0SLinus Torvalds delta *= PIT_TICK_RATE; 6329e8912e0SLinus Torvalds do_div(delta, i*256*1000); 633c767a54bSJoe Perches pr_info("Fast TSC calibration using PIT\n"); 6346ac40ed0SLinus Torvalds return delta; 6356ac40ed0SLinus Torvalds } 636ec0c15afSLinus Torvalds 637bfc0f594SAlok Kataria /** 638e93ef949SAlok Kataria * native_calibrate_tsc - calibrate the tsc on boot 639bfc0f594SAlok Kataria */ 640e93ef949SAlok Kataria unsigned long native_calibrate_tsc(void) 641bfc0f594SAlok Kataria { 642827014beSThomas Gleixner u64 tsc1, tsc2, delta, ref1, ref2; 643fbb16e24SThomas Gleixner unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; 6442d826404SThomas Gleixner unsigned long flags, latch, ms, fast_calibrate; 645a977c400SThomas Gleixner int hpet = is_hpet_enabled(), i, loopmin; 646bfc0f594SAlok Kataria 6477da7c156SBin Gao /* Calibrate TSC using MSR for Intel Atom SoCs */ 6487da7c156SBin Gao local_irq_save(flags); 6495f0e0309SThomas Gleixner fast_calibrate = try_msr_calibrate_tsc(); 6507da7c156SBin Gao local_irq_restore(flags); 6515f0e0309SThomas Gleixner if (fast_calibrate) 6527da7c156SBin Gao return fast_calibrate; 6537da7c156SBin Gao 654bfc0f594SAlok Kataria local_irq_save(flags); 6556ac40ed0SLinus Torvalds fast_calibrate = quick_pit_calibrate(); 656bfc0f594SAlok Kataria local_irq_restore(flags); 6576ac40ed0SLinus Torvalds if (fast_calibrate) 6586ac40ed0SLinus Torvalds return fast_calibrate; 659fbb16e24SThomas Gleixner 660fbb16e24SThomas Gleixner /* 661fbb16e24SThomas Gleixner * Run 5 calibration loops to get the lowest frequency value 662fbb16e24SThomas Gleixner * (the best estimate). We use two different calibration modes 663fbb16e24SThomas Gleixner * here: 664fbb16e24SThomas Gleixner * 665fbb16e24SThomas Gleixner * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and 666fbb16e24SThomas Gleixner * load a timeout of 50ms. We read the time right after we 667fbb16e24SThomas Gleixner * started the timer and wait until the PIT count down reaches 668fbb16e24SThomas Gleixner * zero. In each wait loop iteration we read the TSC and check 669fbb16e24SThomas Gleixner * the delta to the previous read. We keep track of the min 670fbb16e24SThomas Gleixner * and max values of that delta. The delta is mostly defined 671fbb16e24SThomas Gleixner * by the IO time of the PIT access, so we can detect when a 6720d2eb44fSLucas De Marchi * SMI/SMM disturbance happened between the two reads. If the 673fbb16e24SThomas Gleixner * maximum time is significantly larger than the minimum time, 674fbb16e24SThomas Gleixner * then we discard the result and have another try. 675fbb16e24SThomas Gleixner * 676fbb16e24SThomas Gleixner * 2) Reference counter. If available we use the HPET or the 677fbb16e24SThomas Gleixner * PMTIMER as a reference to check the sanity of that value. 678fbb16e24SThomas Gleixner * We use separate TSC readouts and check inside of the 679fbb16e24SThomas Gleixner * reference read for a SMI/SMM disturbance. We dicard 680fbb16e24SThomas Gleixner * disturbed values here as well. We do that around the PIT 681fbb16e24SThomas Gleixner * calibration delay loop as we have to wait for a certain 682fbb16e24SThomas Gleixner * amount of time anyway. 683fbb16e24SThomas Gleixner */ 684a977c400SThomas Gleixner 685a977c400SThomas Gleixner /* Preset PIT loop values */ 686a977c400SThomas Gleixner latch = CAL_LATCH; 687a977c400SThomas Gleixner ms = CAL_MS; 688a977c400SThomas Gleixner loopmin = CAL_PIT_LOOPS; 689a977c400SThomas Gleixner 690a977c400SThomas Gleixner for (i = 0; i < 3; i++) { 691ec0c15afSLinus Torvalds unsigned long tsc_pit_khz; 692bfc0f594SAlok Kataria 693fbb16e24SThomas Gleixner /* 694fbb16e24SThomas Gleixner * Read the start value and the reference count of 695ec0c15afSLinus Torvalds * hpet/pmtimer when available. Then do the PIT 696ec0c15afSLinus Torvalds * calibration, which will take at least 50ms, and 697ec0c15afSLinus Torvalds * read the end value. 698fbb16e24SThomas Gleixner */ 699ec0c15afSLinus Torvalds local_irq_save(flags); 700827014beSThomas Gleixner tsc1 = tsc_read_refs(&ref1, hpet); 701a977c400SThomas Gleixner tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); 702827014beSThomas Gleixner tsc2 = tsc_read_refs(&ref2, hpet); 703bfc0f594SAlok Kataria local_irq_restore(flags); 704bfc0f594SAlok Kataria 705ec0c15afSLinus Torvalds /* Pick the lowest PIT TSC calibration so far */ 706ec0c15afSLinus Torvalds tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); 707bfc0f594SAlok Kataria 708bfc0f594SAlok Kataria /* hpet or pmtimer available ? */ 70962627becSJohn Stultz if (ref1 == ref2) 710fbb16e24SThomas Gleixner continue; 711bfc0f594SAlok Kataria 712bfc0f594SAlok Kataria /* Check, whether the sampling was disturbed by an SMI */ 713fbb16e24SThomas Gleixner if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) 714fbb16e24SThomas Gleixner continue; 715bfc0f594SAlok Kataria 716bfc0f594SAlok Kataria tsc2 = (tsc2 - tsc1) * 1000000LL; 717d683ef7aSThomas Gleixner if (hpet) 718827014beSThomas Gleixner tsc2 = calc_hpet_ref(tsc2, ref1, ref2); 719d683ef7aSThomas Gleixner else 720827014beSThomas Gleixner tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); 721bfc0f594SAlok Kataria 722fbb16e24SThomas Gleixner tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); 723a977c400SThomas Gleixner 724a977c400SThomas Gleixner /* Check the reference deviation */ 725a977c400SThomas Gleixner delta = ((u64) tsc_pit_min) * 100; 726a977c400SThomas Gleixner do_div(delta, tsc_ref_min); 727a977c400SThomas Gleixner 728a977c400SThomas Gleixner /* 729a977c400SThomas Gleixner * If both calibration results are inside a 10% window 730a977c400SThomas Gleixner * then we can be sure, that the calibration 731a977c400SThomas Gleixner * succeeded. We break out of the loop right away. We 732a977c400SThomas Gleixner * use the reference value, as it is more precise. 733a977c400SThomas Gleixner */ 734a977c400SThomas Gleixner if (delta >= 90 && delta <= 110) { 735c767a54bSJoe Perches pr_info("PIT calibration matches %s. %d loops\n", 736a977c400SThomas Gleixner hpet ? "HPET" : "PMTIMER", i + 1); 737a977c400SThomas Gleixner return tsc_ref_min; 738bfc0f594SAlok Kataria } 739bfc0f594SAlok Kataria 740a977c400SThomas Gleixner /* 741a977c400SThomas Gleixner * Check whether PIT failed more than once. This 742a977c400SThomas Gleixner * happens in virtualized environments. We need to 743a977c400SThomas Gleixner * give the virtual PC a slightly longer timeframe for 744a977c400SThomas Gleixner * the HPET/PMTIMER to make the result precise. 745a977c400SThomas Gleixner */ 746a977c400SThomas Gleixner if (i == 1 && tsc_pit_min == ULONG_MAX) { 747a977c400SThomas Gleixner latch = CAL2_LATCH; 748a977c400SThomas Gleixner ms = CAL2_MS; 749a977c400SThomas Gleixner loopmin = CAL2_PIT_LOOPS; 750a977c400SThomas Gleixner } 751bfc0f594SAlok Kataria } 752bfc0f594SAlok Kataria 753fbb16e24SThomas Gleixner /* 754fbb16e24SThomas Gleixner * Now check the results. 755fbb16e24SThomas Gleixner */ 756fbb16e24SThomas Gleixner if (tsc_pit_min == ULONG_MAX) { 757fbb16e24SThomas Gleixner /* PIT gave no useful value */ 758c767a54bSJoe Perches pr_warn("Unable to calibrate against PIT\n"); 759fbb16e24SThomas Gleixner 760fbb16e24SThomas Gleixner /* We don't have an alternative source, disable TSC */ 761827014beSThomas Gleixner if (!hpet && !ref1 && !ref2) { 762c767a54bSJoe Perches pr_notice("No reference (HPET/PMTIMER) available\n"); 763fbb16e24SThomas Gleixner return 0; 764fbb16e24SThomas Gleixner } 765fbb16e24SThomas Gleixner 766fbb16e24SThomas Gleixner /* The alternative source failed as well, disable TSC */ 767fbb16e24SThomas Gleixner if (tsc_ref_min == ULONG_MAX) { 768c767a54bSJoe Perches pr_warn("HPET/PMTIMER calibration failed\n"); 769fbb16e24SThomas Gleixner return 0; 770fbb16e24SThomas Gleixner } 771fbb16e24SThomas Gleixner 772fbb16e24SThomas Gleixner /* Use the alternative source */ 773c767a54bSJoe Perches pr_info("using %s reference calibration\n", 774fbb16e24SThomas Gleixner hpet ? "HPET" : "PMTIMER"); 775fbb16e24SThomas Gleixner 776fbb16e24SThomas Gleixner return tsc_ref_min; 777fbb16e24SThomas Gleixner } 778fbb16e24SThomas Gleixner 779fbb16e24SThomas Gleixner /* We don't have an alternative source, use the PIT calibration value */ 780827014beSThomas Gleixner if (!hpet && !ref1 && !ref2) { 781c767a54bSJoe Perches pr_info("Using PIT calibration value\n"); 782fbb16e24SThomas Gleixner return tsc_pit_min; 783fbb16e24SThomas Gleixner } 784fbb16e24SThomas Gleixner 785fbb16e24SThomas Gleixner /* The alternative source failed, use the PIT calibration value */ 786fbb16e24SThomas Gleixner if (tsc_ref_min == ULONG_MAX) { 787c767a54bSJoe Perches pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n"); 788fbb16e24SThomas Gleixner return tsc_pit_min; 789fbb16e24SThomas Gleixner } 790fbb16e24SThomas Gleixner 791fbb16e24SThomas Gleixner /* 792fbb16e24SThomas Gleixner * The calibration values differ too much. In doubt, we use 793fbb16e24SThomas Gleixner * the PIT value as we know that there are PMTIMERs around 794a977c400SThomas Gleixner * running at double speed. At least we let the user know: 795fbb16e24SThomas Gleixner */ 796c767a54bSJoe Perches pr_warn("PIT calibration deviates from %s: %lu %lu\n", 797a977c400SThomas Gleixner hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); 798c767a54bSJoe Perches pr_info("Using PIT calibration value\n"); 799fbb16e24SThomas Gleixner return tsc_pit_min; 800fbb16e24SThomas Gleixner } 801bfc0f594SAlok Kataria 802bfc0f594SAlok Kataria int recalibrate_cpu_khz(void) 803bfc0f594SAlok Kataria { 804bfc0f594SAlok Kataria #ifndef CONFIG_SMP 805bfc0f594SAlok Kataria unsigned long cpu_khz_old = cpu_khz; 806bfc0f594SAlok Kataria 807bfc0f594SAlok Kataria if (cpu_has_tsc) { 8082d826404SThomas Gleixner tsc_khz = x86_platform.calibrate_tsc(); 809e93ef949SAlok Kataria cpu_khz = tsc_khz; 810bfc0f594SAlok Kataria cpu_data(0).loops_per_jiffy = 811bfc0f594SAlok Kataria cpufreq_scale(cpu_data(0).loops_per_jiffy, 812bfc0f594SAlok Kataria cpu_khz_old, cpu_khz); 813bfc0f594SAlok Kataria return 0; 814bfc0f594SAlok Kataria } else 815bfc0f594SAlok Kataria return -ENODEV; 816bfc0f594SAlok Kataria #else 817bfc0f594SAlok Kataria return -ENODEV; 818bfc0f594SAlok Kataria #endif 819bfc0f594SAlok Kataria } 820bfc0f594SAlok Kataria 821bfc0f594SAlok Kataria EXPORT_SYMBOL(recalibrate_cpu_khz); 822bfc0f594SAlok Kataria 8232dbe06faSAlok Kataria 824cd7240c0SSuresh Siddha static unsigned long long cyc2ns_suspend; 825cd7240c0SSuresh Siddha 826b74f05d6SMarcelo Tosatti void tsc_save_sched_clock_state(void) 827cd7240c0SSuresh Siddha { 82835af99e6SPeter Zijlstra if (!sched_clock_stable()) 829cd7240c0SSuresh Siddha return; 830cd7240c0SSuresh Siddha 831cd7240c0SSuresh Siddha cyc2ns_suspend = sched_clock(); 832cd7240c0SSuresh Siddha } 833cd7240c0SSuresh Siddha 834cd7240c0SSuresh Siddha /* 835cd7240c0SSuresh Siddha * Even on processors with invariant TSC, TSC gets reset in some the 836cd7240c0SSuresh Siddha * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to 837cd7240c0SSuresh Siddha * arbitrary value (still sync'd across cpu's) during resume from such sleep 838cd7240c0SSuresh Siddha * states. To cope up with this, recompute the cyc2ns_offset for each cpu so 839cd7240c0SSuresh Siddha * that sched_clock() continues from the point where it was left off during 840cd7240c0SSuresh Siddha * suspend. 841cd7240c0SSuresh Siddha */ 842b74f05d6SMarcelo Tosatti void tsc_restore_sched_clock_state(void) 843cd7240c0SSuresh Siddha { 844cd7240c0SSuresh Siddha unsigned long long offset; 845cd7240c0SSuresh Siddha unsigned long flags; 846cd7240c0SSuresh Siddha int cpu; 847cd7240c0SSuresh Siddha 84835af99e6SPeter Zijlstra if (!sched_clock_stable()) 849cd7240c0SSuresh Siddha return; 850cd7240c0SSuresh Siddha 851cd7240c0SSuresh Siddha local_irq_save(flags); 852cd7240c0SSuresh Siddha 85320d1c86aSPeter Zijlstra /* 85420d1c86aSPeter Zijlstra * We're comming out of suspend, there's no concurrency yet; don't 85520d1c86aSPeter Zijlstra * bother being nice about the RCU stuff, just write to both 85620d1c86aSPeter Zijlstra * data fields. 85720d1c86aSPeter Zijlstra */ 85820d1c86aSPeter Zijlstra 85920d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0); 86020d1c86aSPeter Zijlstra this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0); 86120d1c86aSPeter Zijlstra 862cd7240c0SSuresh Siddha offset = cyc2ns_suspend - sched_clock(); 863cd7240c0SSuresh Siddha 86420d1c86aSPeter Zijlstra for_each_possible_cpu(cpu) { 86520d1c86aSPeter Zijlstra per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset; 86620d1c86aSPeter Zijlstra per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset; 86720d1c86aSPeter Zijlstra } 868cd7240c0SSuresh Siddha 869cd7240c0SSuresh Siddha local_irq_restore(flags); 870cd7240c0SSuresh Siddha } 871cd7240c0SSuresh Siddha 8722dbe06faSAlok Kataria #ifdef CONFIG_CPU_FREQ 8732dbe06faSAlok Kataria 8742dbe06faSAlok Kataria /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency 8752dbe06faSAlok Kataria * changes. 8762dbe06faSAlok Kataria * 8772dbe06faSAlok Kataria * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's 8782dbe06faSAlok Kataria * not that important because current Opteron setups do not support 8792dbe06faSAlok Kataria * scaling on SMP anyroads. 8802dbe06faSAlok Kataria * 8812dbe06faSAlok Kataria * Should fix up last_tsc too. Currently gettimeofday in the 8822dbe06faSAlok Kataria * first tick after the change will be slightly wrong. 8832dbe06faSAlok Kataria */ 8842dbe06faSAlok Kataria 8852dbe06faSAlok Kataria static unsigned int ref_freq; 8862dbe06faSAlok Kataria static unsigned long loops_per_jiffy_ref; 8872dbe06faSAlok Kataria static unsigned long tsc_khz_ref; 8882dbe06faSAlok Kataria 8892dbe06faSAlok Kataria static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 8902dbe06faSAlok Kataria void *data) 8912dbe06faSAlok Kataria { 8922dbe06faSAlok Kataria struct cpufreq_freqs *freq = data; 893931db6a3SDave Jones unsigned long *lpj; 8942dbe06faSAlok Kataria 8952dbe06faSAlok Kataria if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) 8962dbe06faSAlok Kataria return 0; 8972dbe06faSAlok Kataria 8982dbe06faSAlok Kataria lpj = &boot_cpu_data.loops_per_jiffy; 899931db6a3SDave Jones #ifdef CONFIG_SMP 900931db6a3SDave Jones if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 901931db6a3SDave Jones lpj = &cpu_data(freq->cpu).loops_per_jiffy; 9022dbe06faSAlok Kataria #endif 9032dbe06faSAlok Kataria 9042dbe06faSAlok Kataria if (!ref_freq) { 9052dbe06faSAlok Kataria ref_freq = freq->old; 9062dbe06faSAlok Kataria loops_per_jiffy_ref = *lpj; 9072dbe06faSAlok Kataria tsc_khz_ref = tsc_khz; 9082dbe06faSAlok Kataria } 9092dbe06faSAlok Kataria if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || 9100b443eadSViresh Kumar (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { 9112dbe06faSAlok Kataria *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); 9122dbe06faSAlok Kataria 9132dbe06faSAlok Kataria tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); 9142dbe06faSAlok Kataria if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 9152dbe06faSAlok Kataria mark_tsc_unstable("cpufreq changes"); 9162dbe06faSAlok Kataria 91752a8968cSPeter Zijlstra set_cyc2ns_scale(tsc_khz, freq->cpu); 9183896c329SPeter Zijlstra } 9192dbe06faSAlok Kataria 9202dbe06faSAlok Kataria return 0; 9212dbe06faSAlok Kataria } 9222dbe06faSAlok Kataria 9232dbe06faSAlok Kataria static struct notifier_block time_cpufreq_notifier_block = { 9242dbe06faSAlok Kataria .notifier_call = time_cpufreq_notifier 9252dbe06faSAlok Kataria }; 9262dbe06faSAlok Kataria 9272dbe06faSAlok Kataria static int __init cpufreq_tsc(void) 9282dbe06faSAlok Kataria { 929060700b5SLinus Torvalds if (!cpu_has_tsc) 930060700b5SLinus Torvalds return 0; 931060700b5SLinus Torvalds if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 932060700b5SLinus Torvalds return 0; 9332dbe06faSAlok Kataria cpufreq_register_notifier(&time_cpufreq_notifier_block, 9342dbe06faSAlok Kataria CPUFREQ_TRANSITION_NOTIFIER); 9352dbe06faSAlok Kataria return 0; 9362dbe06faSAlok Kataria } 9372dbe06faSAlok Kataria 9382dbe06faSAlok Kataria core_initcall(cpufreq_tsc); 9392dbe06faSAlok Kataria 9402dbe06faSAlok Kataria #endif /* CONFIG_CPU_FREQ */ 9418fbbc4b4SAlok Kataria 9428fbbc4b4SAlok Kataria /* clocksource code */ 9438fbbc4b4SAlok Kataria 9448fbbc4b4SAlok Kataria static struct clocksource clocksource_tsc; 9458fbbc4b4SAlok Kataria 9468fbbc4b4SAlok Kataria /* 94709ec5442SThomas Gleixner * We used to compare the TSC to the cycle_last value in the clocksource 9488fbbc4b4SAlok Kataria * structure to avoid a nasty time-warp. This can be observed in a 9498fbbc4b4SAlok Kataria * very small window right after one CPU updated cycle_last under 9508fbbc4b4SAlok Kataria * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which 9518fbbc4b4SAlok Kataria * is smaller than the cycle_last reference value due to a TSC which 9528fbbc4b4SAlok Kataria * is slighty behind. This delta is nowhere else observable, but in 9538fbbc4b4SAlok Kataria * that case it results in a forward time jump in the range of hours 9548fbbc4b4SAlok Kataria * due to the unsigned delta calculation of the time keeping core 9558fbbc4b4SAlok Kataria * code, which is necessary to support wrapping clocksources like pm 9568fbbc4b4SAlok Kataria * timer. 95709ec5442SThomas Gleixner * 95809ec5442SThomas Gleixner * This sanity check is now done in the core timekeeping code. 95909ec5442SThomas Gleixner * checking the result of read_tsc() - cycle_last for being negative. 96009ec5442SThomas Gleixner * That works because CLOCKSOURCE_MASK(64) does not mask out any bit. 9618fbbc4b4SAlok Kataria */ 9628e19608eSMagnus Damm static cycle_t read_tsc(struct clocksource *cs) 9638fbbc4b4SAlok Kataria { 96427c63405SAndy Lutomirski return (cycle_t)rdtsc_ordered(); 9658fbbc4b4SAlok Kataria } 9668fbbc4b4SAlok Kataria 96709ec5442SThomas Gleixner /* 96809ec5442SThomas Gleixner * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() 96909ec5442SThomas Gleixner */ 9708fbbc4b4SAlok Kataria static struct clocksource clocksource_tsc = { 9718fbbc4b4SAlok Kataria .name = "tsc", 9728fbbc4b4SAlok Kataria .rating = 300, 9738fbbc4b4SAlok Kataria .read = read_tsc, 9748fbbc4b4SAlok Kataria .mask = CLOCKSOURCE_MASK(64), 9758fbbc4b4SAlok Kataria .flags = CLOCK_SOURCE_IS_CONTINUOUS | 9768fbbc4b4SAlok Kataria CLOCK_SOURCE_MUST_VERIFY, 97798d0ac38SAndy Lutomirski .archdata = { .vclock_mode = VCLOCK_TSC }, 9788fbbc4b4SAlok Kataria }; 9798fbbc4b4SAlok Kataria 9808fbbc4b4SAlok Kataria void mark_tsc_unstable(char *reason) 9818fbbc4b4SAlok Kataria { 9828fbbc4b4SAlok Kataria if (!tsc_unstable) { 9838fbbc4b4SAlok Kataria tsc_unstable = 1; 98435af99e6SPeter Zijlstra clear_sched_clock_stable(); 985e82b8e4eSVenkatesh Pallipadi disable_sched_clock_irqtime(); 986c767a54bSJoe Perches pr_info("Marking TSC unstable due to %s\n", reason); 9878fbbc4b4SAlok Kataria /* Change only the rating, when not registered */ 9888fbbc4b4SAlok Kataria if (clocksource_tsc.mult) 9897285dd7fSThomas Gleixner clocksource_mark_unstable(&clocksource_tsc); 9907285dd7fSThomas Gleixner else { 9917285dd7fSThomas Gleixner clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; 9928fbbc4b4SAlok Kataria clocksource_tsc.rating = 0; 9938fbbc4b4SAlok Kataria } 9948fbbc4b4SAlok Kataria } 9957285dd7fSThomas Gleixner } 9968fbbc4b4SAlok Kataria 9978fbbc4b4SAlok Kataria EXPORT_SYMBOL_GPL(mark_tsc_unstable); 9988fbbc4b4SAlok Kataria 999395628efSAlok Kataria static void __init check_system_tsc_reliable(void) 1000395628efSAlok Kataria { 10018fbbc4b4SAlok Kataria #ifdef CONFIG_MGEODE_LX 10028fbbc4b4SAlok Kataria /* RTSC counts during suspend */ 10038fbbc4b4SAlok Kataria #define RTSC_SUSP 0x100 10048fbbc4b4SAlok Kataria unsigned long res_low, res_high; 10058fbbc4b4SAlok Kataria 10068fbbc4b4SAlok Kataria rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); 100700097c4fSThadeu Lima de Souza Cascardo /* Geode_LX - the OLPC CPU has a very reliable TSC */ 10088fbbc4b4SAlok Kataria if (res_low & RTSC_SUSP) 1009395628efSAlok Kataria tsc_clocksource_reliable = 1; 10108fbbc4b4SAlok Kataria #endif 1011395628efSAlok Kataria if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) 1012395628efSAlok Kataria tsc_clocksource_reliable = 1; 1013395628efSAlok Kataria } 10148fbbc4b4SAlok Kataria 10158fbbc4b4SAlok Kataria /* 10168fbbc4b4SAlok Kataria * Make an educated guess if the TSC is trustworthy and synchronized 10178fbbc4b4SAlok Kataria * over all CPUs. 10188fbbc4b4SAlok Kataria */ 1019148f9bb8SPaul Gortmaker int unsynchronized_tsc(void) 10208fbbc4b4SAlok Kataria { 10218fbbc4b4SAlok Kataria if (!cpu_has_tsc || tsc_unstable) 10228fbbc4b4SAlok Kataria return 1; 10238fbbc4b4SAlok Kataria 10243e5095d1SIngo Molnar #ifdef CONFIG_SMP 10258fbbc4b4SAlok Kataria if (apic_is_clustered_box()) 10268fbbc4b4SAlok Kataria return 1; 10278fbbc4b4SAlok Kataria #endif 10288fbbc4b4SAlok Kataria 10298fbbc4b4SAlok Kataria if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 10308fbbc4b4SAlok Kataria return 0; 1031d3b8f889Sjohn stultz 1032d3b8f889Sjohn stultz if (tsc_clocksource_reliable) 1033d3b8f889Sjohn stultz return 0; 10348fbbc4b4SAlok Kataria /* 10358fbbc4b4SAlok Kataria * Intel systems are normally all synchronized. 10368fbbc4b4SAlok Kataria * Exceptions must mark TSC as unstable: 10378fbbc4b4SAlok Kataria */ 10388fbbc4b4SAlok Kataria if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { 10398fbbc4b4SAlok Kataria /* assume multi socket systems are not synchronized: */ 10408fbbc4b4SAlok Kataria if (num_possible_cpus() > 1) 1041d3b8f889Sjohn stultz return 1; 10428fbbc4b4SAlok Kataria } 10438fbbc4b4SAlok Kataria 1044d3b8f889Sjohn stultz return 0; 10458fbbc4b4SAlok Kataria } 10468fbbc4b4SAlok Kataria 104708ec0c58SJohn Stultz 104808ec0c58SJohn Stultz static void tsc_refine_calibration_work(struct work_struct *work); 104908ec0c58SJohn Stultz static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); 105008ec0c58SJohn Stultz /** 105108ec0c58SJohn Stultz * tsc_refine_calibration_work - Further refine tsc freq calibration 105208ec0c58SJohn Stultz * @work - ignored. 105308ec0c58SJohn Stultz * 105408ec0c58SJohn Stultz * This functions uses delayed work over a period of a 105508ec0c58SJohn Stultz * second to further refine the TSC freq value. Since this is 105608ec0c58SJohn Stultz * timer based, instead of loop based, we don't block the boot 105708ec0c58SJohn Stultz * process while this longer calibration is done. 105808ec0c58SJohn Stultz * 10590d2eb44fSLucas De Marchi * If there are any calibration anomalies (too many SMIs, etc), 106008ec0c58SJohn Stultz * or the refined calibration is off by 1% of the fast early 106108ec0c58SJohn Stultz * calibration, we throw out the new calibration and use the 106208ec0c58SJohn Stultz * early calibration. 106308ec0c58SJohn Stultz */ 106408ec0c58SJohn Stultz static void tsc_refine_calibration_work(struct work_struct *work) 106508ec0c58SJohn Stultz { 106608ec0c58SJohn Stultz static u64 tsc_start = -1, ref_start; 106708ec0c58SJohn Stultz static int hpet; 106808ec0c58SJohn Stultz u64 tsc_stop, ref_stop, delta; 106908ec0c58SJohn Stultz unsigned long freq; 107008ec0c58SJohn Stultz 107108ec0c58SJohn Stultz /* Don't bother refining TSC on unstable systems */ 107208ec0c58SJohn Stultz if (check_tsc_unstable()) 107308ec0c58SJohn Stultz goto out; 107408ec0c58SJohn Stultz 107508ec0c58SJohn Stultz /* 107608ec0c58SJohn Stultz * Since the work is started early in boot, we may be 107708ec0c58SJohn Stultz * delayed the first time we expire. So set the workqueue 107808ec0c58SJohn Stultz * again once we know timers are working. 107908ec0c58SJohn Stultz */ 108008ec0c58SJohn Stultz if (tsc_start == -1) { 108108ec0c58SJohn Stultz /* 108208ec0c58SJohn Stultz * Only set hpet once, to avoid mixing hardware 108308ec0c58SJohn Stultz * if the hpet becomes enabled later. 108408ec0c58SJohn Stultz */ 108508ec0c58SJohn Stultz hpet = is_hpet_enabled(); 108608ec0c58SJohn Stultz schedule_delayed_work(&tsc_irqwork, HZ); 108708ec0c58SJohn Stultz tsc_start = tsc_read_refs(&ref_start, hpet); 108808ec0c58SJohn Stultz return; 108908ec0c58SJohn Stultz } 109008ec0c58SJohn Stultz 109108ec0c58SJohn Stultz tsc_stop = tsc_read_refs(&ref_stop, hpet); 109208ec0c58SJohn Stultz 109308ec0c58SJohn Stultz /* hpet or pmtimer available ? */ 109462627becSJohn Stultz if (ref_start == ref_stop) 109508ec0c58SJohn Stultz goto out; 109608ec0c58SJohn Stultz 109708ec0c58SJohn Stultz /* Check, whether the sampling was disturbed by an SMI */ 109808ec0c58SJohn Stultz if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) 109908ec0c58SJohn Stultz goto out; 110008ec0c58SJohn Stultz 110108ec0c58SJohn Stultz delta = tsc_stop - tsc_start; 110208ec0c58SJohn Stultz delta *= 1000000LL; 110308ec0c58SJohn Stultz if (hpet) 110408ec0c58SJohn Stultz freq = calc_hpet_ref(delta, ref_start, ref_stop); 110508ec0c58SJohn Stultz else 110608ec0c58SJohn Stultz freq = calc_pmtimer_ref(delta, ref_start, ref_stop); 110708ec0c58SJohn Stultz 110808ec0c58SJohn Stultz /* Make sure we're within 1% */ 110908ec0c58SJohn Stultz if (abs(tsc_khz - freq) > tsc_khz/100) 111008ec0c58SJohn Stultz goto out; 111108ec0c58SJohn Stultz 111208ec0c58SJohn Stultz tsc_khz = freq; 1113c767a54bSJoe Perches pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n", 1114c767a54bSJoe Perches (unsigned long)tsc_khz / 1000, 111508ec0c58SJohn Stultz (unsigned long)tsc_khz % 1000); 111608ec0c58SJohn Stultz 111708ec0c58SJohn Stultz out: 111808ec0c58SJohn Stultz clocksource_register_khz(&clocksource_tsc, tsc_khz); 111908ec0c58SJohn Stultz } 112008ec0c58SJohn Stultz 112108ec0c58SJohn Stultz 112208ec0c58SJohn Stultz static int __init init_tsc_clocksource(void) 11238fbbc4b4SAlok Kataria { 112429fe359cSThomas Gleixner if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz) 1125a8760ecaSThomas Gleixner return 0; 1126a8760ecaSThomas Gleixner 1127395628efSAlok Kataria if (tsc_clocksource_reliable) 1128395628efSAlok Kataria clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 11298fbbc4b4SAlok Kataria /* lower the rating if we already know its unstable: */ 11308fbbc4b4SAlok Kataria if (check_tsc_unstable()) { 11318fbbc4b4SAlok Kataria clocksource_tsc.rating = 0; 11328fbbc4b4SAlok Kataria clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 11338fbbc4b4SAlok Kataria } 113457779dc2SAlok Kataria 113582f9c080SFeng Tang if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) 113682f9c080SFeng Tang clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; 113782f9c080SFeng Tang 113857779dc2SAlok Kataria /* 113957779dc2SAlok Kataria * Trust the results of the earlier calibration on systems 114057779dc2SAlok Kataria * exporting a reliable TSC. 114157779dc2SAlok Kataria */ 114257779dc2SAlok Kataria if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { 114357779dc2SAlok Kataria clocksource_register_khz(&clocksource_tsc, tsc_khz); 114457779dc2SAlok Kataria return 0; 114557779dc2SAlok Kataria } 114657779dc2SAlok Kataria 114708ec0c58SJohn Stultz schedule_delayed_work(&tsc_irqwork, 0); 114808ec0c58SJohn Stultz return 0; 11498fbbc4b4SAlok Kataria } 115008ec0c58SJohn Stultz /* 115108ec0c58SJohn Stultz * We use device_initcall here, to ensure we run after the hpet 115208ec0c58SJohn Stultz * is fully initialized, which may occur at fs_initcall time. 115308ec0c58SJohn Stultz */ 115408ec0c58SJohn Stultz device_initcall(init_tsc_clocksource); 11558fbbc4b4SAlok Kataria 11568fbbc4b4SAlok Kataria void __init tsc_init(void) 11578fbbc4b4SAlok Kataria { 11588fbbc4b4SAlok Kataria u64 lpj; 11598fbbc4b4SAlok Kataria int cpu; 11608fbbc4b4SAlok Kataria 1161845b3944SThomas Gleixner x86_init.timers.tsc_pre_init(); 1162845b3944SThomas Gleixner 1163b47dcbdcSAndy Lutomirski if (!cpu_has_tsc) { 1164b47dcbdcSAndy Lutomirski setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); 11658fbbc4b4SAlok Kataria return; 1166b47dcbdcSAndy Lutomirski } 11678fbbc4b4SAlok Kataria 11682d826404SThomas Gleixner tsc_khz = x86_platform.calibrate_tsc(); 1169e93ef949SAlok Kataria cpu_khz = tsc_khz; 11708fbbc4b4SAlok Kataria 1171e93ef949SAlok Kataria if (!tsc_khz) { 11728fbbc4b4SAlok Kataria mark_tsc_unstable("could not calculate TSC khz"); 1173b47dcbdcSAndy Lutomirski setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); 11748fbbc4b4SAlok Kataria return; 11758fbbc4b4SAlok Kataria } 11768fbbc4b4SAlok Kataria 1177c767a54bSJoe Perches pr_info("Detected %lu.%03lu MHz processor\n", 11788fbbc4b4SAlok Kataria (unsigned long)cpu_khz / 1000, 11798fbbc4b4SAlok Kataria (unsigned long)cpu_khz % 1000); 11808fbbc4b4SAlok Kataria 11818fbbc4b4SAlok Kataria /* 11828fbbc4b4SAlok Kataria * Secondary CPUs do not run through tsc_init(), so set up 11838fbbc4b4SAlok Kataria * all the scale factors for all CPUs, assuming the same 11848fbbc4b4SAlok Kataria * speed as the bootup CPU. (cpufreq notifiers will fix this 11858fbbc4b4SAlok Kataria * up if their speed diverges) 11868fbbc4b4SAlok Kataria */ 118720d1c86aSPeter Zijlstra for_each_possible_cpu(cpu) { 118820d1c86aSPeter Zijlstra cyc2ns_init(cpu); 11898fbbc4b4SAlok Kataria set_cyc2ns_scale(cpu_khz, cpu); 119020d1c86aSPeter Zijlstra } 11918fbbc4b4SAlok Kataria 11928fbbc4b4SAlok Kataria if (tsc_disabled > 0) 11938fbbc4b4SAlok Kataria return; 11948fbbc4b4SAlok Kataria 11958fbbc4b4SAlok Kataria /* now allow native_sched_clock() to use rdtsc */ 119610b033d4SPeter Zijlstra 11978fbbc4b4SAlok Kataria tsc_disabled = 0; 119810b033d4SPeter Zijlstra static_key_slow_inc(&__use_tsc); 11998fbbc4b4SAlok Kataria 1200e82b8e4eSVenkatesh Pallipadi if (!no_sched_irq_time) 1201e82b8e4eSVenkatesh Pallipadi enable_sched_clock_irqtime(); 1202e82b8e4eSVenkatesh Pallipadi 120370de9a97SAlok Kataria lpj = ((u64)tsc_khz * 1000); 120470de9a97SAlok Kataria do_div(lpj, HZ); 120570de9a97SAlok Kataria lpj_fine = lpj; 120670de9a97SAlok Kataria 12078fbbc4b4SAlok Kataria use_tsc_delay(); 12088fbbc4b4SAlok Kataria 12098fbbc4b4SAlok Kataria if (unsynchronized_tsc()) 12108fbbc4b4SAlok Kataria mark_tsc_unstable("TSCs unsynchronized"); 12118fbbc4b4SAlok Kataria 1212395628efSAlok Kataria check_system_tsc_reliable(); 12138fbbc4b4SAlok Kataria } 12148fbbc4b4SAlok Kataria 1215b565201cSJack Steiner #ifdef CONFIG_SMP 1216b565201cSJack Steiner /* 1217b565201cSJack Steiner * If we have a constant TSC and are using the TSC for the delay loop, 1218b565201cSJack Steiner * we can skip clock calibration if another cpu in the same socket has already 1219b565201cSJack Steiner * been calibrated. This assumes that CONSTANT_TSC applies to all 1220b565201cSJack Steiner * cpus in the socket - this should be a safe assumption. 1221b565201cSJack Steiner */ 1222148f9bb8SPaul Gortmaker unsigned long calibrate_delay_is_known(void) 1223b565201cSJack Steiner { 1224b565201cSJack Steiner int i, cpu = smp_processor_id(); 1225b565201cSJack Steiner 1226b565201cSJack Steiner if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) 1227b565201cSJack Steiner return 0; 1228b565201cSJack Steiner 1229b565201cSJack Steiner for_each_online_cpu(i) 1230b565201cSJack Steiner if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id) 1231b565201cSJack Steiner return cpu_data(i).loops_per_jiffy; 1232b565201cSJack Steiner return 0; 1233b565201cSJack Steiner } 1234b565201cSJack Steiner #endif 1235