1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
2f0fbf0abSGlauber Costa /*
3f0fbf0abSGlauber Costa * Precise Delay Loops for i386
4f0fbf0abSGlauber Costa *
5f0fbf0abSGlauber Costa * Copyright (C) 1993 Linus Torvalds
6f0fbf0abSGlauber Costa * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
7f0fbf0abSGlauber Costa * Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>
8f0fbf0abSGlauber Costa *
9f0fbf0abSGlauber Costa * The __delay function must _NOT_ be inlined as its execution time
10f0fbf0abSGlauber Costa * depends wildly on alignment on many x86 processors. The additional
11f0fbf0abSGlauber Costa * jump magic is needed to get the timing stable on all the CPU's
12f0fbf0abSGlauber Costa * we have to worry about.
13f0fbf0abSGlauber Costa */
14f0fbf0abSGlauber Costa
15e683014cSPaul Gortmaker #include <linux/export.h>
16f0fbf0abSGlauber Costa #include <linux/sched.h>
17f0fbf0abSGlauber Costa #include <linux/timex.h>
18f0fbf0abSGlauber Costa #include <linux/preempt.h>
19f0fbf0abSGlauber Costa #include <linux/delay.h>
20f0fbf0abSGlauber Costa
21f0fbf0abSGlauber Costa #include <asm/processor.h>
22f0fbf0abSGlauber Costa #include <asm/delay.h>
23f0fbf0abSGlauber Costa #include <asm/timer.h>
24b466bdb6SHuang Rui #include <asm/mwait.h>
25f0fbf0abSGlauber Costa
26f0fbf0abSGlauber Costa #ifdef CONFIG_SMP
27f0fbf0abSGlauber Costa # include <asm/smp.h>
28f0fbf0abSGlauber Costa #endif
29f0fbf0abSGlauber Costa
30e8824890SThomas Gleixner static void delay_loop(u64 __loops);
31e8824890SThomas Gleixner
32e8824890SThomas Gleixner /*
33e8824890SThomas Gleixner * Calibration and selection of the delay mechanism happens only once
34e8824890SThomas Gleixner * during boot.
35e8824890SThomas Gleixner */
36e8824890SThomas Gleixner static void (*delay_fn)(u64) __ro_after_init = delay_loop;
3746f90c7aSKyung Min Park static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init;
38e8824890SThomas Gleixner
39f0fbf0abSGlauber Costa /* simple loop based delay: */
delay_loop(u64 __loops)40e8824890SThomas Gleixner static void delay_loop(u64 __loops)
41f0fbf0abSGlauber Costa {
42e8824890SThomas Gleixner unsigned long loops = (unsigned long)__loops;
43e8824890SThomas Gleixner
44f0fbf0abSGlauber Costa asm volatile(
45f0fbf0abSGlauber Costa " test %0,%0 \n"
46f0fbf0abSGlauber Costa " jz 3f \n"
47f0fbf0abSGlauber Costa " jmp 1f \n"
48f0fbf0abSGlauber Costa
49f0fbf0abSGlauber Costa ".align 16 \n"
50f0fbf0abSGlauber Costa "1: jmp 2f \n"
51f0fbf0abSGlauber Costa
52f0fbf0abSGlauber Costa ".align 16 \n"
53f0fbf0abSGlauber Costa "2: dec %0 \n"
54f0fbf0abSGlauber Costa " jnz 2b \n"
55f0fbf0abSGlauber Costa "3: dec %0 \n"
56f0fbf0abSGlauber Costa
57*b86eb740SAmmar Faizi : "+a" (loops)
58*b86eb740SAmmar Faizi :
59f0fbf0abSGlauber Costa );
60f0fbf0abSGlauber Costa }
61f0fbf0abSGlauber Costa
62f0fbf0abSGlauber Costa /* TSC based delay: */
delay_tsc(u64 cycles)63e8824890SThomas Gleixner static void delay_tsc(u64 cycles)
64f0fbf0abSGlauber Costa {
65e8824890SThomas Gleixner u64 bclock, now;
66f0fbf0abSGlauber Costa int cpu;
67f0fbf0abSGlauber Costa
68f0fbf0abSGlauber Costa preempt_disable();
69f0fbf0abSGlauber Costa cpu = smp_processor_id();
7003b9730bSAndy Lutomirski bclock = rdtsc_ordered();
71f0fbf0abSGlauber Costa for (;;) {
7203b9730bSAndy Lutomirski now = rdtsc_ordered();
73e8824890SThomas Gleixner if ((now - bclock) >= cycles)
74f0fbf0abSGlauber Costa break;
75f0fbf0abSGlauber Costa
76f0fbf0abSGlauber Costa /* Allow RT tasks to run */
77f0fbf0abSGlauber Costa preempt_enable();
78f0fbf0abSGlauber Costa rep_nop();
79f0fbf0abSGlauber Costa preempt_disable();
80f0fbf0abSGlauber Costa
81f0fbf0abSGlauber Costa /*
82f0fbf0abSGlauber Costa * It is possible that we moved to another CPU, and
83f0fbf0abSGlauber Costa * since TSC's are per-cpu we need to calculate
84f0fbf0abSGlauber Costa * that. The delay must guarantee that we wait "at
85f0fbf0abSGlauber Costa * least" the amount of time. Being moved to another
86f0fbf0abSGlauber Costa * CPU could make the wait longer but we just need to
87f0fbf0abSGlauber Costa * make sure we waited long enough. Rebalance the
88f0fbf0abSGlauber Costa * counter for this CPU.
89f0fbf0abSGlauber Costa */
90f0fbf0abSGlauber Costa if (unlikely(cpu != smp_processor_id())) {
91e8824890SThomas Gleixner cycles -= (now - bclock);
92f0fbf0abSGlauber Costa cpu = smp_processor_id();
9303b9730bSAndy Lutomirski bclock = rdtsc_ordered();
94f0fbf0abSGlauber Costa }
95f0fbf0abSGlauber Costa }
96f0fbf0abSGlauber Costa preempt_enable();
97f0fbf0abSGlauber Costa }
98f0fbf0abSGlauber Costa
99f0fbf0abSGlauber Costa /*
100cec5f268SKyung Min Park * On Intel the TPAUSE instruction waits until any of:
101cec5f268SKyung Min Park * 1) the TSC counter exceeds the value provided in EDX:EAX
102cec5f268SKyung Min Park * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded
103cec5f268SKyung Min Park * 3) an external interrupt occurs
104cec5f268SKyung Min Park */
delay_halt_tpause(u64 start,u64 cycles)105cec5f268SKyung Min Park static void delay_halt_tpause(u64 start, u64 cycles)
106cec5f268SKyung Min Park {
107cec5f268SKyung Min Park u64 until = start + cycles;
108cec5f268SKyung Min Park u32 eax, edx;
109cec5f268SKyung Min Park
110cec5f268SKyung Min Park eax = lower_32_bits(until);
111cec5f268SKyung Min Park edx = upper_32_bits(until);
112cec5f268SKyung Min Park
113cec5f268SKyung Min Park /*
114cec5f268SKyung Min Park * Hard code the deeper (C0.2) sleep state because exit latency is
115cec5f268SKyung Min Park * small compared to the "microseconds" that usleep() will delay.
116cec5f268SKyung Min Park */
117cec5f268SKyung Min Park __tpause(TPAUSE_C02_STATE, edx, eax);
118cec5f268SKyung Min Park }
119cec5f268SKyung Min Park
120cec5f268SKyung Min Park /*
121b466bdb6SHuang Rui * On some AMD platforms, MWAITX has a configurable 32-bit timer, that
122e8824890SThomas Gleixner * counts with TSC frequency. The input value is the number of TSC cycles
123e8824890SThomas Gleixner * to wait. MWAITX will also exit when the timer expires.
124b466bdb6SHuang Rui */
delay_halt_mwaitx(u64 unused,u64 cycles)12546f90c7aSKyung Min Park static void delay_halt_mwaitx(u64 unused, u64 cycles)
126b466bdb6SHuang Rui {
12746f90c7aSKyung Min Park u64 delay;
12846f90c7aSKyung Min Park
12946f90c7aSKyung Min Park delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
13046f90c7aSKyung Min Park /*
13146f90c7aSKyung Min Park * Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu
13246f90c7aSKyung Min Park * variable as the monitor target.
13346f90c7aSKyung Min Park */
13446f90c7aSKyung Min Park __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
13546f90c7aSKyung Min Park
13646f90c7aSKyung Min Park /*
13746f90c7aSKyung Min Park * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not
13846f90c7aSKyung Min Park * enter any deep C-state and we use it here in delay() to minimize
13946f90c7aSKyung Min Park * wakeup latency.
14046f90c7aSKyung Min Park */
14146f90c7aSKyung Min Park __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
14246f90c7aSKyung Min Park }
14346f90c7aSKyung Min Park
14446f90c7aSKyung Min Park /*
14546f90c7aSKyung Min Park * Call a vendor specific function to delay for a given amount of time. Because
14646f90c7aSKyung Min Park * these functions may return earlier than requested, check for actual elapsed
14746f90c7aSKyung Min Park * time and call again until done.
14846f90c7aSKyung Min Park */
delay_halt(u64 __cycles)14946f90c7aSKyung Min Park static void delay_halt(u64 __cycles)
15046f90c7aSKyung Min Park {
15146f90c7aSKyung Min Park u64 start, end, cycles = __cycles;
152b466bdb6SHuang Rui
15388d879d2SJanakarajan Natarajan /*
15488d879d2SJanakarajan Natarajan * Timer value of 0 causes MWAITX to wait indefinitely, unless there
15588d879d2SJanakarajan Natarajan * is a store on the memory monitored by MONITORX.
15688d879d2SJanakarajan Natarajan */
157e8824890SThomas Gleixner if (!cycles)
15888d879d2SJanakarajan Natarajan return;
15988d879d2SJanakarajan Natarajan
160b466bdb6SHuang Rui start = rdtsc_ordered();
161b466bdb6SHuang Rui
162b466bdb6SHuang Rui for (;;) {
16346f90c7aSKyung Min Park delay_halt_fn(start, cycles);
164b466bdb6SHuang Rui end = rdtsc_ordered();
165b466bdb6SHuang Rui
166e8824890SThomas Gleixner if (cycles <= end - start)
167b466bdb6SHuang Rui break;
168b466bdb6SHuang Rui
169e8824890SThomas Gleixner cycles -= end - start;
170b466bdb6SHuang Rui start = end;
171b466bdb6SHuang Rui }
172b466bdb6SHuang Rui }
173b466bdb6SHuang Rui
use_tsc_delay(void)174e8824890SThomas Gleixner void __init use_tsc_delay(void)
175f0fbf0abSGlauber Costa {
176b466bdb6SHuang Rui if (delay_fn == delay_loop)
177f0fbf0abSGlauber Costa delay_fn = delay_tsc;
178f0fbf0abSGlauber Costa }
179f0fbf0abSGlauber Costa
use_tpause_delay(void)180cec5f268SKyung Min Park void __init use_tpause_delay(void)
181cec5f268SKyung Min Park {
182cec5f268SKyung Min Park delay_halt_fn = delay_halt_tpause;
183cec5f268SKyung Min Park delay_fn = delay_halt;
184cec5f268SKyung Min Park }
185cec5f268SKyung Min Park
use_mwaitx_delay(void)186b466bdb6SHuang Rui void use_mwaitx_delay(void)
187b466bdb6SHuang Rui {
18846f90c7aSKyung Min Park delay_halt_fn = delay_halt_mwaitx;
18946f90c7aSKyung Min Park delay_fn = delay_halt;
190b466bdb6SHuang Rui }
191b466bdb6SHuang Rui
read_current_timer(unsigned long * timer_val)192a18e3690SGreg Kroah-Hartman int read_current_timer(unsigned long *timer_val)
193f0fbf0abSGlauber Costa {
194f0fbf0abSGlauber Costa if (delay_fn == delay_tsc) {
1954ea1636bSAndy Lutomirski *timer_val = rdtsc();
196f0fbf0abSGlauber Costa return 0;
197f0fbf0abSGlauber Costa }
198f0fbf0abSGlauber Costa return -1;
199f0fbf0abSGlauber Costa }
200f0fbf0abSGlauber Costa
__delay(unsigned long loops)201f0fbf0abSGlauber Costa void __delay(unsigned long loops)
202f0fbf0abSGlauber Costa {
203f0fbf0abSGlauber Costa delay_fn(loops);
204f0fbf0abSGlauber Costa }
205f0fbf0abSGlauber Costa EXPORT_SYMBOL(__delay);
206f0fbf0abSGlauber Costa
__const_udelay(unsigned long xloops)20781423c37SAndi Kleen noinline void __const_udelay(unsigned long xloops)
208f0fbf0abSGlauber Costa {
2094c45c516SJiri Slaby unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy;
210f0fbf0abSGlauber Costa int d0;
211f0fbf0abSGlauber Costa
212f0fbf0abSGlauber Costa xloops *= 4;
213f0fbf0abSGlauber Costa asm("mull %%edx"
214f0fbf0abSGlauber Costa :"=d" (xloops), "=&a" (d0)
2154c45c516SJiri Slaby :"1" (xloops), "0" (lpj * (HZ / 4)));
216f0fbf0abSGlauber Costa
217f0fbf0abSGlauber Costa __delay(++xloops);
218f0fbf0abSGlauber Costa }
219f0fbf0abSGlauber Costa EXPORT_SYMBOL(__const_udelay);
220f0fbf0abSGlauber Costa
__udelay(unsigned long usecs)221f0fbf0abSGlauber Costa void __udelay(unsigned long usecs)
222f0fbf0abSGlauber Costa {
223f0fbf0abSGlauber Costa __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
224f0fbf0abSGlauber Costa }
225f0fbf0abSGlauber Costa EXPORT_SYMBOL(__udelay);
226f0fbf0abSGlauber Costa
__ndelay(unsigned long nsecs)227f0fbf0abSGlauber Costa void __ndelay(unsigned long nsecs)
228f0fbf0abSGlauber Costa {
229f0fbf0abSGlauber Costa __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
230f0fbf0abSGlauber Costa }
231f0fbf0abSGlauber Costa EXPORT_SYMBOL(__ndelay);
232