xref: /openbmc/linux/arch/x86/lib/copy_user_64.S (revision a8f4fcdd8ba7d191c29ae87a2315906fe90368d6)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
4 * Copyright 2002 Andi Kleen, SuSE Labs.
5 *
6 * Functions to copy from and to user space.
7 */
8
9#include <linux/linkage.h>
10#include <asm/current.h>
11#include <asm/asm-offsets.h>
12#include <asm/thread_info.h>
13#include <asm/cpufeatures.h>
14#include <asm/alternative.h>
15#include <asm/asm.h>
16#include <asm/smap.h>
17#include <asm/export.h>
18#include <asm/trapnr.h>
19
20.macro ALIGN_DESTINATION
21	/* check for bad alignment of destination */
22	movl %edi,%ecx
23	andl $7,%ecx
24	jz 102f				/* already aligned */
25	subl $8,%ecx
26	negl %ecx
27	subl %ecx,%edx
28100:	movb (%rsi),%al
29101:	movb %al,(%rdi)
30	incq %rsi
31	incq %rdi
32	decl %ecx
33	jnz 100b
34102:
35	.section .fixup,"ax"
36103:	addl %ecx,%edx			/* ecx is zerorest also */
37	jmp .Lcopy_user_handle_tail
38	.previous
39
40	_ASM_EXTABLE_CPY(100b, 103b)
41	_ASM_EXTABLE_CPY(101b, 103b)
42	.endm
43
44/*
45 * copy_user_generic_unrolled - memory copy with exception handling.
46 * This version is for CPUs like P4 that don't have efficient micro
47 * code for rep movsq
48 *
49 * Input:
50 * rdi destination
51 * rsi source
52 * rdx count
53 *
54 * Output:
55 * eax uncopied bytes or 0 if successful.
56 */
57SYM_FUNC_START(copy_user_generic_unrolled)
58	ASM_STAC
59	cmpl $8,%edx
60	jb 20f		/* less then 8 bytes, go to byte copy loop */
61	ALIGN_DESTINATION
62	movl %edx,%ecx
63	andl $63,%edx
64	shrl $6,%ecx
65	jz .L_copy_short_string
661:	movq (%rsi),%r8
672:	movq 1*8(%rsi),%r9
683:	movq 2*8(%rsi),%r10
694:	movq 3*8(%rsi),%r11
705:	movq %r8,(%rdi)
716:	movq %r9,1*8(%rdi)
727:	movq %r10,2*8(%rdi)
738:	movq %r11,3*8(%rdi)
749:	movq 4*8(%rsi),%r8
7510:	movq 5*8(%rsi),%r9
7611:	movq 6*8(%rsi),%r10
7712:	movq 7*8(%rsi),%r11
7813:	movq %r8,4*8(%rdi)
7914:	movq %r9,5*8(%rdi)
8015:	movq %r10,6*8(%rdi)
8116:	movq %r11,7*8(%rdi)
82	leaq 64(%rsi),%rsi
83	leaq 64(%rdi),%rdi
84	decl %ecx
85	jnz 1b
86.L_copy_short_string:
87	movl %edx,%ecx
88	andl $7,%edx
89	shrl $3,%ecx
90	jz 20f
9118:	movq (%rsi),%r8
9219:	movq %r8,(%rdi)
93	leaq 8(%rsi),%rsi
94	leaq 8(%rdi),%rdi
95	decl %ecx
96	jnz 18b
9720:	andl %edx,%edx
98	jz 23f
99	movl %edx,%ecx
10021:	movb (%rsi),%al
10122:	movb %al,(%rdi)
102	incq %rsi
103	incq %rdi
104	decl %ecx
105	jnz 21b
10623:	xor %eax,%eax
107	ASM_CLAC
108	ret
109
110	.section .fixup,"ax"
11130:	shll $6,%ecx
112	addl %ecx,%edx
113	jmp 60f
11440:	leal (%rdx,%rcx,8),%edx
115	jmp 60f
11650:	movl %ecx,%edx
11760:	jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
118	.previous
119
120	_ASM_EXTABLE_CPY(1b, 30b)
121	_ASM_EXTABLE_CPY(2b, 30b)
122	_ASM_EXTABLE_CPY(3b, 30b)
123	_ASM_EXTABLE_CPY(4b, 30b)
124	_ASM_EXTABLE_CPY(5b, 30b)
125	_ASM_EXTABLE_CPY(6b, 30b)
126	_ASM_EXTABLE_CPY(7b, 30b)
127	_ASM_EXTABLE_CPY(8b, 30b)
128	_ASM_EXTABLE_CPY(9b, 30b)
129	_ASM_EXTABLE_CPY(10b, 30b)
130	_ASM_EXTABLE_CPY(11b, 30b)
131	_ASM_EXTABLE_CPY(12b, 30b)
132	_ASM_EXTABLE_CPY(13b, 30b)
133	_ASM_EXTABLE_CPY(14b, 30b)
134	_ASM_EXTABLE_CPY(15b, 30b)
135	_ASM_EXTABLE_CPY(16b, 30b)
136	_ASM_EXTABLE_CPY(18b, 40b)
137	_ASM_EXTABLE_CPY(19b, 40b)
138	_ASM_EXTABLE_CPY(21b, 50b)
139	_ASM_EXTABLE_CPY(22b, 50b)
140SYM_FUNC_END(copy_user_generic_unrolled)
141EXPORT_SYMBOL(copy_user_generic_unrolled)
142
143/* Some CPUs run faster using the string copy instructions.
144 * This is also a lot simpler. Use them when possible.
145 *
146 * Only 4GB of copy is supported. This shouldn't be a problem
147 * because the kernel normally only writes from/to page sized chunks
148 * even if user space passed a longer buffer.
149 * And more would be dangerous because both Intel and AMD have
150 * errata with rep movsq > 4GB. If someone feels the need to fix
151 * this please consider this.
152 *
153 * Input:
154 * rdi destination
155 * rsi source
156 * rdx count
157 *
158 * Output:
159 * eax uncopied bytes or 0 if successful.
160 */
161SYM_FUNC_START(copy_user_generic_string)
162	ASM_STAC
163	cmpl $8,%edx
164	jb 2f		/* less than 8 bytes, go to byte copy loop */
165	ALIGN_DESTINATION
166	movl %edx,%ecx
167	shrl $3,%ecx
168	andl $7,%edx
1691:	rep
170	movsq
1712:	movl %edx,%ecx
1723:	rep
173	movsb
174	xorl %eax,%eax
175	ASM_CLAC
176	ret
177
178	.section .fixup,"ax"
17911:	leal (%rdx,%rcx,8),%ecx
18012:	movl %ecx,%edx		/* ecx is zerorest also */
181	jmp .Lcopy_user_handle_tail
182	.previous
183
184	_ASM_EXTABLE_CPY(1b, 11b)
185	_ASM_EXTABLE_CPY(3b, 12b)
186SYM_FUNC_END(copy_user_generic_string)
187EXPORT_SYMBOL(copy_user_generic_string)
188
189/*
190 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
191 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
192 *
193 * Input:
194 * rdi destination
195 * rsi source
196 * rdx count
197 *
198 * Output:
199 * eax uncopied bytes or 0 if successful.
200 */
201SYM_FUNC_START(copy_user_enhanced_fast_string)
202	ASM_STAC
203	cmpl $64,%edx
204	jb .L_copy_short_string	/* less then 64 bytes, avoid the costly 'rep' */
205	movl %edx,%ecx
2061:	rep
207	movsb
208	xorl %eax,%eax
209	ASM_CLAC
210	ret
211
212	.section .fixup,"ax"
21312:	movl %ecx,%edx		/* ecx is zerorest also */
214	jmp .Lcopy_user_handle_tail
215	.previous
216
217	_ASM_EXTABLE_CPY(1b, 12b)
218SYM_FUNC_END(copy_user_enhanced_fast_string)
219EXPORT_SYMBOL(copy_user_enhanced_fast_string)
220
221/*
222 * Try to copy last bytes and clear the rest if needed.
223 * Since protection fault in copy_from/to_user is not a normal situation,
224 * it is not necessary to optimize tail handling.
225 * Don't try to copy the tail if machine check happened
226 *
227 * Input:
228 * rdi destination
229 * rsi source
230 * rdx count
231 *
232 * Output:
233 * eax uncopied bytes or 0 if successful.
234 */
235SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
236	movl %edx,%ecx
2371:	rep movsb
2382:	mov %ecx,%eax
239	ASM_CLAC
240	ret
241
242	_ASM_EXTABLE_CPY(1b, 2b)
243SYM_CODE_END(.Lcopy_user_handle_tail)
244
245/*
246 * copy_user_nocache - Uncached memory copy with exception handling
247 * This will force destination out of cache for more performance.
248 *
249 * Note: Cached memory copy is used when destination or size is not
250 * naturally aligned. That is:
251 *  - Require 8-byte alignment when size is 8 bytes or larger.
252 *  - Require 4-byte alignment when size is 4 bytes.
253 */
254SYM_FUNC_START(__copy_user_nocache)
255	ASM_STAC
256
257	/* If size is less than 8 bytes, go to 4-byte copy */
258	cmpl $8,%edx
259	jb .L_4b_nocache_copy_entry
260
261	/* If destination is not 8-byte aligned, "cache" copy to align it */
262	ALIGN_DESTINATION
263
264	/* Set 4x8-byte copy count and remainder */
265	movl %edx,%ecx
266	andl $63,%edx
267	shrl $6,%ecx
268	jz .L_8b_nocache_copy_entry	/* jump if count is 0 */
269
270	/* Perform 4x8-byte nocache loop-copy */
271.L_4x8b_nocache_copy_loop:
2721:	movq (%rsi),%r8
2732:	movq 1*8(%rsi),%r9
2743:	movq 2*8(%rsi),%r10
2754:	movq 3*8(%rsi),%r11
2765:	movnti %r8,(%rdi)
2776:	movnti %r9,1*8(%rdi)
2787:	movnti %r10,2*8(%rdi)
2798:	movnti %r11,3*8(%rdi)
2809:	movq 4*8(%rsi),%r8
28110:	movq 5*8(%rsi),%r9
28211:	movq 6*8(%rsi),%r10
28312:	movq 7*8(%rsi),%r11
28413:	movnti %r8,4*8(%rdi)
28514:	movnti %r9,5*8(%rdi)
28615:	movnti %r10,6*8(%rdi)
28716:	movnti %r11,7*8(%rdi)
288	leaq 64(%rsi),%rsi
289	leaq 64(%rdi),%rdi
290	decl %ecx
291	jnz .L_4x8b_nocache_copy_loop
292
293	/* Set 8-byte copy count and remainder */
294.L_8b_nocache_copy_entry:
295	movl %edx,%ecx
296	andl $7,%edx
297	shrl $3,%ecx
298	jz .L_4b_nocache_copy_entry	/* jump if count is 0 */
299
300	/* Perform 8-byte nocache loop-copy */
301.L_8b_nocache_copy_loop:
30220:	movq (%rsi),%r8
30321:	movnti %r8,(%rdi)
304	leaq 8(%rsi),%rsi
305	leaq 8(%rdi),%rdi
306	decl %ecx
307	jnz .L_8b_nocache_copy_loop
308
309	/* If no byte left, we're done */
310.L_4b_nocache_copy_entry:
311	andl %edx,%edx
312	jz .L_finish_copy
313
314	/* If destination is not 4-byte aligned, go to byte copy: */
315	movl %edi,%ecx
316	andl $3,%ecx
317	jnz .L_1b_cache_copy_entry
318
319	/* Set 4-byte copy count (1 or 0) and remainder */
320	movl %edx,%ecx
321	andl $3,%edx
322	shrl $2,%ecx
323	jz .L_1b_cache_copy_entry	/* jump if count is 0 */
324
325	/* Perform 4-byte nocache copy: */
32630:	movl (%rsi),%r8d
32731:	movnti %r8d,(%rdi)
328	leaq 4(%rsi),%rsi
329	leaq 4(%rdi),%rdi
330
331	/* If no bytes left, we're done: */
332	andl %edx,%edx
333	jz .L_finish_copy
334
335	/* Perform byte "cache" loop-copy for the remainder */
336.L_1b_cache_copy_entry:
337	movl %edx,%ecx
338.L_1b_cache_copy_loop:
33940:	movb (%rsi),%al
34041:	movb %al,(%rdi)
341	incq %rsi
342	incq %rdi
343	decl %ecx
344	jnz .L_1b_cache_copy_loop
345
346	/* Finished copying; fence the prior stores */
347.L_finish_copy:
348	xorl %eax,%eax
349	ASM_CLAC
350	sfence
351	ret
352
353	.section .fixup,"ax"
354.L_fixup_4x8b_copy:
355	shll $6,%ecx
356	addl %ecx,%edx
357	jmp .L_fixup_handle_tail
358.L_fixup_8b_copy:
359	lea (%rdx,%rcx,8),%rdx
360	jmp .L_fixup_handle_tail
361.L_fixup_4b_copy:
362	lea (%rdx,%rcx,4),%rdx
363	jmp .L_fixup_handle_tail
364.L_fixup_1b_copy:
365	movl %ecx,%edx
366.L_fixup_handle_tail:
367	sfence
368	jmp .Lcopy_user_handle_tail
369	.previous
370
371	_ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
372	_ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
373	_ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
374	_ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
375	_ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
376	_ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
377	_ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
378	_ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
379	_ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
380	_ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
381	_ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
382	_ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
383	_ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
384	_ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
385	_ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
386	_ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
387	_ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
388	_ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
389	_ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
390	_ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
391	_ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
392	_ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
393SYM_FUNC_END(__copy_user_nocache)
394EXPORT_SYMBOL(__copy_user_nocache)
395