1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Precise Delay Loops for i386 4 * 5 * Copyright (C) 1993 Linus Torvalds 6 * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> 7 * Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com> 8 * 9 * The __delay function must _NOT_ be inlined as its execution time 10 * depends wildly on alignment on many x86 processors. The additional 11 * jump magic is needed to get the timing stable on all the CPU's 12 * we have to worry about. 13 */ 14 15 #include <linux/export.h> 16 #include <linux/sched.h> 17 #include <linux/timex.h> 18 #include <linux/preempt.h> 19 #include <linux/delay.h> 20 21 #include <asm/processor.h> 22 #include <asm/delay.h> 23 #include <asm/timer.h> 24 #include <asm/mwait.h> 25 26 #ifdef CONFIG_SMP 27 # include <asm/smp.h> 28 #endif 29 30 static void delay_loop(u64 __loops); 31 32 /* 33 * Calibration and selection of the delay mechanism happens only once 34 * during boot. 35 */ 36 static void (*delay_fn)(u64) __ro_after_init = delay_loop; 37 static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init; 38 39 /* simple loop based delay: */ 40 static void delay_loop(u64 __loops) 41 { 42 unsigned long loops = (unsigned long)__loops; 43 44 asm volatile( 45 " test %0,%0 \n" 46 " jz 3f \n" 47 " jmp 1f \n" 48 49 ".align 16 \n" 50 "1: jmp 2f \n" 51 52 ".align 16 \n" 53 "2: dec %0 \n" 54 " jnz 2b \n" 55 "3: dec %0 \n" 56 57 : /* we don't need output */ 58 :"a" (loops) 59 ); 60 } 61 62 /* TSC based delay: */ 63 static void delay_tsc(u64 cycles) 64 { 65 u64 bclock, now; 66 int cpu; 67 68 preempt_disable(); 69 cpu = smp_processor_id(); 70 bclock = rdtsc_ordered(); 71 for (;;) { 72 now = rdtsc_ordered(); 73 if ((now - bclock) >= cycles) 74 break; 75 76 /* Allow RT tasks to run */ 77 preempt_enable(); 78 rep_nop(); 79 preempt_disable(); 80 81 /* 82 * It is possible that we moved to another CPU, and 83 * since TSC's are per-cpu we need to calculate 84 * that. The delay must guarantee that we wait "at 85 * least" the amount of time. Being moved to another 86 * CPU could make the wait longer but we just need to 87 * make sure we waited long enough. Rebalance the 88 * counter for this CPU. 89 */ 90 if (unlikely(cpu != smp_processor_id())) { 91 cycles -= (now - bclock); 92 cpu = smp_processor_id(); 93 bclock = rdtsc_ordered(); 94 } 95 } 96 preempt_enable(); 97 } 98 99 /* 100 * On Intel the TPAUSE instruction waits until any of: 101 * 1) the TSC counter exceeds the value provided in EDX:EAX 102 * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded 103 * 3) an external interrupt occurs 104 */ 105 static void delay_halt_tpause(u64 start, u64 cycles) 106 { 107 u64 until = start + cycles; 108 u32 eax, edx; 109 110 eax = lower_32_bits(until); 111 edx = upper_32_bits(until); 112 113 /* 114 * Hard code the deeper (C0.2) sleep state because exit latency is 115 * small compared to the "microseconds" that usleep() will delay. 116 */ 117 __tpause(TPAUSE_C02_STATE, edx, eax); 118 } 119 120 /* 121 * On some AMD platforms, MWAITX has a configurable 32-bit timer, that 122 * counts with TSC frequency. The input value is the number of TSC cycles 123 * to wait. MWAITX will also exit when the timer expires. 124 */ 125 static void delay_halt_mwaitx(u64 unused, u64 cycles) 126 { 127 u64 delay; 128 129 delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles); 130 /* 131 * Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu 132 * variable as the monitor target. 133 */ 134 __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); 135 136 /* 137 * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not 138 * enter any deep C-state and we use it here in delay() to minimize 139 * wakeup latency. 140 */ 141 __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE); 142 } 143 144 /* 145 * Call a vendor specific function to delay for a given amount of time. Because 146 * these functions may return earlier than requested, check for actual elapsed 147 * time and call again until done. 148 */ 149 static void delay_halt(u64 __cycles) 150 { 151 u64 start, end, cycles = __cycles; 152 153 /* 154 * Timer value of 0 causes MWAITX to wait indefinitely, unless there 155 * is a store on the memory monitored by MONITORX. 156 */ 157 if (!cycles) 158 return; 159 160 start = rdtsc_ordered(); 161 162 for (;;) { 163 delay_halt_fn(start, cycles); 164 end = rdtsc_ordered(); 165 166 if (cycles <= end - start) 167 break; 168 169 cycles -= end - start; 170 start = end; 171 } 172 } 173 174 void __init use_tsc_delay(void) 175 { 176 if (delay_fn == delay_loop) 177 delay_fn = delay_tsc; 178 } 179 180 void __init use_tpause_delay(void) 181 { 182 delay_halt_fn = delay_halt_tpause; 183 delay_fn = delay_halt; 184 } 185 186 void use_mwaitx_delay(void) 187 { 188 delay_halt_fn = delay_halt_mwaitx; 189 delay_fn = delay_halt; 190 } 191 192 int read_current_timer(unsigned long *timer_val) 193 { 194 if (delay_fn == delay_tsc) { 195 *timer_val = rdtsc(); 196 return 0; 197 } 198 return -1; 199 } 200 201 void __delay(unsigned long loops) 202 { 203 delay_fn(loops); 204 } 205 EXPORT_SYMBOL(__delay); 206 207 noinline void __const_udelay(unsigned long xloops) 208 { 209 unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy; 210 int d0; 211 212 xloops *= 4; 213 asm("mull %%edx" 214 :"=d" (xloops), "=&a" (d0) 215 :"1" (xloops), "0" (lpj * (HZ / 4))); 216 217 __delay(++xloops); 218 } 219 EXPORT_SYMBOL(__const_udelay); 220 221 void __udelay(unsigned long usecs) 222 { 223 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ 224 } 225 EXPORT_SYMBOL(__udelay); 226 227 void __ndelay(unsigned long nsecs) 228 { 229 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ 230 } 231 EXPORT_SYMBOL(__ndelay); 232