xref: /openbmc/linux/arch/x86/lib/delay.c (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
2f0fbf0abSGlauber Costa /*
3f0fbf0abSGlauber Costa  *	Precise Delay Loops for i386
4f0fbf0abSGlauber Costa  *
5f0fbf0abSGlauber Costa  *	Copyright (C) 1993 Linus Torvalds
6f0fbf0abSGlauber Costa  *	Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
7f0fbf0abSGlauber Costa  *	Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>
8f0fbf0abSGlauber Costa  *
9f0fbf0abSGlauber Costa  *	The __delay function must _NOT_ be inlined as its execution time
10f0fbf0abSGlauber Costa  *	depends wildly on alignment on many x86 processors. The additional
11f0fbf0abSGlauber Costa  *	jump magic is needed to get the timing stable on all the CPU's
12f0fbf0abSGlauber Costa  *	we have to worry about.
13f0fbf0abSGlauber Costa  */
14f0fbf0abSGlauber Costa 
15e683014cSPaul Gortmaker #include <linux/export.h>
16f0fbf0abSGlauber Costa #include <linux/sched.h>
17f0fbf0abSGlauber Costa #include <linux/timex.h>
18f0fbf0abSGlauber Costa #include <linux/preempt.h>
19f0fbf0abSGlauber Costa #include <linux/delay.h>
20f0fbf0abSGlauber Costa 
21f0fbf0abSGlauber Costa #include <asm/processor.h>
22f0fbf0abSGlauber Costa #include <asm/delay.h>
23f0fbf0abSGlauber Costa #include <asm/timer.h>
24b466bdb6SHuang Rui #include <asm/mwait.h>
25f0fbf0abSGlauber Costa 
26f0fbf0abSGlauber Costa #ifdef CONFIG_SMP
27f0fbf0abSGlauber Costa # include <asm/smp.h>
28f0fbf0abSGlauber Costa #endif
29f0fbf0abSGlauber Costa 
30e8824890SThomas Gleixner static void delay_loop(u64 __loops);
31e8824890SThomas Gleixner 
32e8824890SThomas Gleixner /*
33e8824890SThomas Gleixner  * Calibration and selection of the delay mechanism happens only once
34e8824890SThomas Gleixner  * during boot.
35e8824890SThomas Gleixner  */
36e8824890SThomas Gleixner static void (*delay_fn)(u64) __ro_after_init = delay_loop;
3746f90c7aSKyung Min Park static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init;
38e8824890SThomas Gleixner 
39f0fbf0abSGlauber Costa /* simple loop based delay: */
delay_loop(u64 __loops)40e8824890SThomas Gleixner static void delay_loop(u64 __loops)
41f0fbf0abSGlauber Costa {
42e8824890SThomas Gleixner 	unsigned long loops = (unsigned long)__loops;
43e8824890SThomas Gleixner 
44f0fbf0abSGlauber Costa 	asm volatile(
45f0fbf0abSGlauber Costa 		"	test %0,%0	\n"
46f0fbf0abSGlauber Costa 		"	jz 3f		\n"
47f0fbf0abSGlauber Costa 		"	jmp 1f		\n"
48f0fbf0abSGlauber Costa 
49f0fbf0abSGlauber Costa 		".align 16		\n"
50f0fbf0abSGlauber Costa 		"1:	jmp 2f		\n"
51f0fbf0abSGlauber Costa 
52f0fbf0abSGlauber Costa 		".align 16		\n"
53f0fbf0abSGlauber Costa 		"2:	dec %0		\n"
54f0fbf0abSGlauber Costa 		"	jnz 2b		\n"
55f0fbf0abSGlauber Costa 		"3:	dec %0		\n"
56f0fbf0abSGlauber Costa 
57*b86eb740SAmmar Faizi 		: "+a" (loops)
58*b86eb740SAmmar Faizi 		:
59f0fbf0abSGlauber Costa 	);
60f0fbf0abSGlauber Costa }
61f0fbf0abSGlauber Costa 
62f0fbf0abSGlauber Costa /* TSC based delay: */
delay_tsc(u64 cycles)63e8824890SThomas Gleixner static void delay_tsc(u64 cycles)
64f0fbf0abSGlauber Costa {
65e8824890SThomas Gleixner 	u64 bclock, now;
66f0fbf0abSGlauber Costa 	int cpu;
67f0fbf0abSGlauber Costa 
68f0fbf0abSGlauber Costa 	preempt_disable();
69f0fbf0abSGlauber Costa 	cpu = smp_processor_id();
7003b9730bSAndy Lutomirski 	bclock = rdtsc_ordered();
71f0fbf0abSGlauber Costa 	for (;;) {
7203b9730bSAndy Lutomirski 		now = rdtsc_ordered();
73e8824890SThomas Gleixner 		if ((now - bclock) >= cycles)
74f0fbf0abSGlauber Costa 			break;
75f0fbf0abSGlauber Costa 
76f0fbf0abSGlauber Costa 		/* Allow RT tasks to run */
77f0fbf0abSGlauber Costa 		preempt_enable();
78f0fbf0abSGlauber Costa 		rep_nop();
79f0fbf0abSGlauber Costa 		preempt_disable();
80f0fbf0abSGlauber Costa 
81f0fbf0abSGlauber Costa 		/*
82f0fbf0abSGlauber Costa 		 * It is possible that we moved to another CPU, and
83f0fbf0abSGlauber Costa 		 * since TSC's are per-cpu we need to calculate
84f0fbf0abSGlauber Costa 		 * that. The delay must guarantee that we wait "at
85f0fbf0abSGlauber Costa 		 * least" the amount of time. Being moved to another
86f0fbf0abSGlauber Costa 		 * CPU could make the wait longer but we just need to
87f0fbf0abSGlauber Costa 		 * make sure we waited long enough. Rebalance the
88f0fbf0abSGlauber Costa 		 * counter for this CPU.
89f0fbf0abSGlauber Costa 		 */
90f0fbf0abSGlauber Costa 		if (unlikely(cpu != smp_processor_id())) {
91e8824890SThomas Gleixner 			cycles -= (now - bclock);
92f0fbf0abSGlauber Costa 			cpu = smp_processor_id();
9303b9730bSAndy Lutomirski 			bclock = rdtsc_ordered();
94f0fbf0abSGlauber Costa 		}
95f0fbf0abSGlauber Costa 	}
96f0fbf0abSGlauber Costa 	preempt_enable();
97f0fbf0abSGlauber Costa }
98f0fbf0abSGlauber Costa 
99f0fbf0abSGlauber Costa /*
100cec5f268SKyung Min Park  * On Intel the TPAUSE instruction waits until any of:
101cec5f268SKyung Min Park  * 1) the TSC counter exceeds the value provided in EDX:EAX
102cec5f268SKyung Min Park  * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded
103cec5f268SKyung Min Park  * 3) an external interrupt occurs
104cec5f268SKyung Min Park  */
delay_halt_tpause(u64 start,u64 cycles)105cec5f268SKyung Min Park static void delay_halt_tpause(u64 start, u64 cycles)
106cec5f268SKyung Min Park {
107cec5f268SKyung Min Park 	u64 until = start + cycles;
108cec5f268SKyung Min Park 	u32 eax, edx;
109cec5f268SKyung Min Park 
110cec5f268SKyung Min Park 	eax = lower_32_bits(until);
111cec5f268SKyung Min Park 	edx = upper_32_bits(until);
112cec5f268SKyung Min Park 
113cec5f268SKyung Min Park 	/*
114cec5f268SKyung Min Park 	 * Hard code the deeper (C0.2) sleep state because exit latency is
115cec5f268SKyung Min Park 	 * small compared to the "microseconds" that usleep() will delay.
116cec5f268SKyung Min Park 	 */
117cec5f268SKyung Min Park 	__tpause(TPAUSE_C02_STATE, edx, eax);
118cec5f268SKyung Min Park }
119cec5f268SKyung Min Park 
120cec5f268SKyung Min Park /*
121b466bdb6SHuang Rui  * On some AMD platforms, MWAITX has a configurable 32-bit timer, that
122e8824890SThomas Gleixner  * counts with TSC frequency. The input value is the number of TSC cycles
123e8824890SThomas Gleixner  * to wait. MWAITX will also exit when the timer expires.
124b466bdb6SHuang Rui  */
delay_halt_mwaitx(u64 unused,u64 cycles)12546f90c7aSKyung Min Park static void delay_halt_mwaitx(u64 unused, u64 cycles)
126b466bdb6SHuang Rui {
12746f90c7aSKyung Min Park 	u64 delay;
12846f90c7aSKyung Min Park 
12946f90c7aSKyung Min Park 	delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
13046f90c7aSKyung Min Park 	/*
13146f90c7aSKyung Min Park 	 * Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu
13246f90c7aSKyung Min Park 	 * variable as the monitor target.
13346f90c7aSKyung Min Park 	 */
13446f90c7aSKyung Min Park 	 __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
13546f90c7aSKyung Min Park 
13646f90c7aSKyung Min Park 	/*
13746f90c7aSKyung Min Park 	 * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not
13846f90c7aSKyung Min Park 	 * enter any deep C-state and we use it here in delay() to minimize
13946f90c7aSKyung Min Park 	 * wakeup latency.
14046f90c7aSKyung Min Park 	 */
14146f90c7aSKyung Min Park 	__mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
14246f90c7aSKyung Min Park }
14346f90c7aSKyung Min Park 
14446f90c7aSKyung Min Park /*
14546f90c7aSKyung Min Park  * Call a vendor specific function to delay for a given amount of time. Because
14646f90c7aSKyung Min Park  * these functions may return earlier than requested, check for actual elapsed
14746f90c7aSKyung Min Park  * time and call again until done.
14846f90c7aSKyung Min Park  */
delay_halt(u64 __cycles)14946f90c7aSKyung Min Park static void delay_halt(u64 __cycles)
15046f90c7aSKyung Min Park {
15146f90c7aSKyung Min Park 	u64 start, end, cycles = __cycles;
152b466bdb6SHuang Rui 
15388d879d2SJanakarajan Natarajan 	/*
15488d879d2SJanakarajan Natarajan 	 * Timer value of 0 causes MWAITX to wait indefinitely, unless there
15588d879d2SJanakarajan Natarajan 	 * is a store on the memory monitored by MONITORX.
15688d879d2SJanakarajan Natarajan 	 */
157e8824890SThomas Gleixner 	if (!cycles)
15888d879d2SJanakarajan Natarajan 		return;
15988d879d2SJanakarajan Natarajan 
160b466bdb6SHuang Rui 	start = rdtsc_ordered();
161b466bdb6SHuang Rui 
162b466bdb6SHuang Rui 	for (;;) {
16346f90c7aSKyung Min Park 		delay_halt_fn(start, cycles);
164b466bdb6SHuang Rui 		end = rdtsc_ordered();
165b466bdb6SHuang Rui 
166e8824890SThomas Gleixner 		if (cycles <= end - start)
167b466bdb6SHuang Rui 			break;
168b466bdb6SHuang Rui 
169e8824890SThomas Gleixner 		cycles -= end - start;
170b466bdb6SHuang Rui 		start = end;
171b466bdb6SHuang Rui 	}
172b466bdb6SHuang Rui }
173b466bdb6SHuang Rui 
use_tsc_delay(void)174e8824890SThomas Gleixner void __init use_tsc_delay(void)
175f0fbf0abSGlauber Costa {
176b466bdb6SHuang Rui 	if (delay_fn == delay_loop)
177f0fbf0abSGlauber Costa 		delay_fn = delay_tsc;
178f0fbf0abSGlauber Costa }
179f0fbf0abSGlauber Costa 
use_tpause_delay(void)180cec5f268SKyung Min Park void __init use_tpause_delay(void)
181cec5f268SKyung Min Park {
182cec5f268SKyung Min Park 	delay_halt_fn = delay_halt_tpause;
183cec5f268SKyung Min Park 	delay_fn = delay_halt;
184cec5f268SKyung Min Park }
185cec5f268SKyung Min Park 
use_mwaitx_delay(void)186b466bdb6SHuang Rui void use_mwaitx_delay(void)
187b466bdb6SHuang Rui {
18846f90c7aSKyung Min Park 	delay_halt_fn = delay_halt_mwaitx;
18946f90c7aSKyung Min Park 	delay_fn = delay_halt;
190b466bdb6SHuang Rui }
191b466bdb6SHuang Rui 
read_current_timer(unsigned long * timer_val)192a18e3690SGreg Kroah-Hartman int read_current_timer(unsigned long *timer_val)
193f0fbf0abSGlauber Costa {
194f0fbf0abSGlauber Costa 	if (delay_fn == delay_tsc) {
1954ea1636bSAndy Lutomirski 		*timer_val = rdtsc();
196f0fbf0abSGlauber Costa 		return 0;
197f0fbf0abSGlauber Costa 	}
198f0fbf0abSGlauber Costa 	return -1;
199f0fbf0abSGlauber Costa }
200f0fbf0abSGlauber Costa 
__delay(unsigned long loops)201f0fbf0abSGlauber Costa void __delay(unsigned long loops)
202f0fbf0abSGlauber Costa {
203f0fbf0abSGlauber Costa 	delay_fn(loops);
204f0fbf0abSGlauber Costa }
205f0fbf0abSGlauber Costa EXPORT_SYMBOL(__delay);
206f0fbf0abSGlauber Costa 
__const_udelay(unsigned long xloops)20781423c37SAndi Kleen noinline void __const_udelay(unsigned long xloops)
208f0fbf0abSGlauber Costa {
2094c45c516SJiri Slaby 	unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy;
210f0fbf0abSGlauber Costa 	int d0;
211f0fbf0abSGlauber Costa 
212f0fbf0abSGlauber Costa 	xloops *= 4;
213f0fbf0abSGlauber Costa 	asm("mull %%edx"
214f0fbf0abSGlauber Costa 		:"=d" (xloops), "=&a" (d0)
2154c45c516SJiri Slaby 		:"1" (xloops), "0" (lpj * (HZ / 4)));
216f0fbf0abSGlauber Costa 
217f0fbf0abSGlauber Costa 	__delay(++xloops);
218f0fbf0abSGlauber Costa }
219f0fbf0abSGlauber Costa EXPORT_SYMBOL(__const_udelay);
220f0fbf0abSGlauber Costa 
__udelay(unsigned long usecs)221f0fbf0abSGlauber Costa void __udelay(unsigned long usecs)
222f0fbf0abSGlauber Costa {
223f0fbf0abSGlauber Costa 	__const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
224f0fbf0abSGlauber Costa }
225f0fbf0abSGlauber Costa EXPORT_SYMBOL(__udelay);
226f0fbf0abSGlauber Costa 
__ndelay(unsigned long nsecs)227f0fbf0abSGlauber Costa void __ndelay(unsigned long nsecs)
228f0fbf0abSGlauber Costa {
229f0fbf0abSGlauber Costa 	__const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
230f0fbf0abSGlauber Costa }
231f0fbf0abSGlauber Costa EXPORT_SYMBOL(__ndelay);
232