xref: /openbmc/linux/arch/x86/lib/copy_user_64.S (revision e983940270f10fe8551baf0098be76ea478294a3)
1/*
2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
3 * Copyright 2002 Andi Kleen, SuSE Labs.
4 * Subject to the GNU Public License v2.
5 *
6 * Functions to copy from and to user space.
7 */
8
9#include <linux/linkage.h>
10#include <asm/current.h>
11#include <asm/asm-offsets.h>
12#include <asm/thread_info.h>
13#include <asm/cpufeatures.h>
14#include <asm/alternative-asm.h>
15#include <asm/asm.h>
16#include <asm/smap.h>
17#include <asm/export.h>
18
19/* Standard copy_to_user with segment limit checking */
20ENTRY(_copy_to_user)
21	mov PER_CPU_VAR(current_task), %rax
22	movq %rdi,%rcx
23	addq %rdx,%rcx
24	jc bad_to_user
25	cmpq TASK_addr_limit(%rax),%rcx
26	ja bad_to_user
27	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\
28		      "jmp copy_user_generic_string",		\
29		      X86_FEATURE_REP_GOOD,			\
30		      "jmp copy_user_enhanced_fast_string",	\
31		      X86_FEATURE_ERMS
32ENDPROC(_copy_to_user)
33EXPORT_SYMBOL(_copy_to_user)
34
35/* Standard copy_from_user with segment limit checking */
36ENTRY(_copy_from_user)
37	mov PER_CPU_VAR(current_task), %rax
38	movq %rsi,%rcx
39	addq %rdx,%rcx
40	jc bad_from_user
41	cmpq TASK_addr_limit(%rax),%rcx
42	ja bad_from_user
43	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\
44		      "jmp copy_user_generic_string",		\
45		      X86_FEATURE_REP_GOOD,			\
46		      "jmp copy_user_enhanced_fast_string",	\
47		      X86_FEATURE_ERMS
48ENDPROC(_copy_from_user)
49EXPORT_SYMBOL(_copy_from_user)
50
51
52	.section .fixup,"ax"
53	/* must zero dest */
54ENTRY(bad_from_user)
55bad_from_user:
56	movl %edx,%ecx
57	xorl %eax,%eax
58	rep
59	stosb
60bad_to_user:
61	movl %edx,%eax
62	ret
63ENDPROC(bad_from_user)
64	.previous
65
66/*
67 * copy_user_generic_unrolled - memory copy with exception handling.
68 * This version is for CPUs like P4 that don't have efficient micro
69 * code for rep movsq
70 *
71 * Input:
72 * rdi destination
73 * rsi source
74 * rdx count
75 *
76 * Output:
77 * eax uncopied bytes or 0 if successful.
78 */
79ENTRY(copy_user_generic_unrolled)
80	ASM_STAC
81	cmpl $8,%edx
82	jb 20f		/* less then 8 bytes, go to byte copy loop */
83	ALIGN_DESTINATION
84	movl %edx,%ecx
85	andl $63,%edx
86	shrl $6,%ecx
87	jz 17f
881:	movq (%rsi),%r8
892:	movq 1*8(%rsi),%r9
903:	movq 2*8(%rsi),%r10
914:	movq 3*8(%rsi),%r11
925:	movq %r8,(%rdi)
936:	movq %r9,1*8(%rdi)
947:	movq %r10,2*8(%rdi)
958:	movq %r11,3*8(%rdi)
969:	movq 4*8(%rsi),%r8
9710:	movq 5*8(%rsi),%r9
9811:	movq 6*8(%rsi),%r10
9912:	movq 7*8(%rsi),%r11
10013:	movq %r8,4*8(%rdi)
10114:	movq %r9,5*8(%rdi)
10215:	movq %r10,6*8(%rdi)
10316:	movq %r11,7*8(%rdi)
104	leaq 64(%rsi),%rsi
105	leaq 64(%rdi),%rdi
106	decl %ecx
107	jnz 1b
10817:	movl %edx,%ecx
109	andl $7,%edx
110	shrl $3,%ecx
111	jz 20f
11218:	movq (%rsi),%r8
11319:	movq %r8,(%rdi)
114	leaq 8(%rsi),%rsi
115	leaq 8(%rdi),%rdi
116	decl %ecx
117	jnz 18b
11820:	andl %edx,%edx
119	jz 23f
120	movl %edx,%ecx
12121:	movb (%rsi),%al
12222:	movb %al,(%rdi)
123	incq %rsi
124	incq %rdi
125	decl %ecx
126	jnz 21b
12723:	xor %eax,%eax
128	ASM_CLAC
129	ret
130
131	.section .fixup,"ax"
13230:	shll $6,%ecx
133	addl %ecx,%edx
134	jmp 60f
13540:	leal (%rdx,%rcx,8),%edx
136	jmp 60f
13750:	movl %ecx,%edx
13860:	jmp copy_user_handle_tail /* ecx is zerorest also */
139	.previous
140
141	_ASM_EXTABLE(1b,30b)
142	_ASM_EXTABLE(2b,30b)
143	_ASM_EXTABLE(3b,30b)
144	_ASM_EXTABLE(4b,30b)
145	_ASM_EXTABLE(5b,30b)
146	_ASM_EXTABLE(6b,30b)
147	_ASM_EXTABLE(7b,30b)
148	_ASM_EXTABLE(8b,30b)
149	_ASM_EXTABLE(9b,30b)
150	_ASM_EXTABLE(10b,30b)
151	_ASM_EXTABLE(11b,30b)
152	_ASM_EXTABLE(12b,30b)
153	_ASM_EXTABLE(13b,30b)
154	_ASM_EXTABLE(14b,30b)
155	_ASM_EXTABLE(15b,30b)
156	_ASM_EXTABLE(16b,30b)
157	_ASM_EXTABLE(18b,40b)
158	_ASM_EXTABLE(19b,40b)
159	_ASM_EXTABLE(21b,50b)
160	_ASM_EXTABLE(22b,50b)
161ENDPROC(copy_user_generic_unrolled)
162EXPORT_SYMBOL(copy_user_generic_unrolled)
163
164/* Some CPUs run faster using the string copy instructions.
165 * This is also a lot simpler. Use them when possible.
166 *
167 * Only 4GB of copy is supported. This shouldn't be a problem
168 * because the kernel normally only writes from/to page sized chunks
169 * even if user space passed a longer buffer.
170 * And more would be dangerous because both Intel and AMD have
171 * errata with rep movsq > 4GB. If someone feels the need to fix
172 * this please consider this.
173 *
174 * Input:
175 * rdi destination
176 * rsi source
177 * rdx count
178 *
179 * Output:
180 * eax uncopied bytes or 0 if successful.
181 */
182ENTRY(copy_user_generic_string)
183	ASM_STAC
184	cmpl $8,%edx
185	jb 2f		/* less than 8 bytes, go to byte copy loop */
186	ALIGN_DESTINATION
187	movl %edx,%ecx
188	shrl $3,%ecx
189	andl $7,%edx
1901:	rep
191	movsq
1922:	movl %edx,%ecx
1933:	rep
194	movsb
195	xorl %eax,%eax
196	ASM_CLAC
197	ret
198
199	.section .fixup,"ax"
20011:	leal (%rdx,%rcx,8),%ecx
20112:	movl %ecx,%edx		/* ecx is zerorest also */
202	jmp copy_user_handle_tail
203	.previous
204
205	_ASM_EXTABLE(1b,11b)
206	_ASM_EXTABLE(3b,12b)
207ENDPROC(copy_user_generic_string)
208EXPORT_SYMBOL(copy_user_generic_string)
209
210/*
211 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
212 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
213 *
214 * Input:
215 * rdi destination
216 * rsi source
217 * rdx count
218 *
219 * Output:
220 * eax uncopied bytes or 0 if successful.
221 */
222ENTRY(copy_user_enhanced_fast_string)
223	ASM_STAC
224	movl %edx,%ecx
2251:	rep
226	movsb
227	xorl %eax,%eax
228	ASM_CLAC
229	ret
230
231	.section .fixup,"ax"
23212:	movl %ecx,%edx		/* ecx is zerorest also */
233	jmp copy_user_handle_tail
234	.previous
235
236	_ASM_EXTABLE(1b,12b)
237ENDPROC(copy_user_enhanced_fast_string)
238EXPORT_SYMBOL(copy_user_enhanced_fast_string)
239
240/*
241 * copy_user_nocache - Uncached memory copy with exception handling
242 * This will force destination out of cache for more performance.
243 *
244 * Note: Cached memory copy is used when destination or size is not
245 * naturally aligned. That is:
246 *  - Require 8-byte alignment when size is 8 bytes or larger.
247 *  - Require 4-byte alignment when size is 4 bytes.
248 */
249ENTRY(__copy_user_nocache)
250	ASM_STAC
251
252	/* If size is less than 8 bytes, go to 4-byte copy */
253	cmpl $8,%edx
254	jb .L_4b_nocache_copy_entry
255
256	/* If destination is not 8-byte aligned, "cache" copy to align it */
257	ALIGN_DESTINATION
258
259	/* Set 4x8-byte copy count and remainder */
260	movl %edx,%ecx
261	andl $63,%edx
262	shrl $6,%ecx
263	jz .L_8b_nocache_copy_entry	/* jump if count is 0 */
264
265	/* Perform 4x8-byte nocache loop-copy */
266.L_4x8b_nocache_copy_loop:
2671:	movq (%rsi),%r8
2682:	movq 1*8(%rsi),%r9
2693:	movq 2*8(%rsi),%r10
2704:	movq 3*8(%rsi),%r11
2715:	movnti %r8,(%rdi)
2726:	movnti %r9,1*8(%rdi)
2737:	movnti %r10,2*8(%rdi)
2748:	movnti %r11,3*8(%rdi)
2759:	movq 4*8(%rsi),%r8
27610:	movq 5*8(%rsi),%r9
27711:	movq 6*8(%rsi),%r10
27812:	movq 7*8(%rsi),%r11
27913:	movnti %r8,4*8(%rdi)
28014:	movnti %r9,5*8(%rdi)
28115:	movnti %r10,6*8(%rdi)
28216:	movnti %r11,7*8(%rdi)
283	leaq 64(%rsi),%rsi
284	leaq 64(%rdi),%rdi
285	decl %ecx
286	jnz .L_4x8b_nocache_copy_loop
287
288	/* Set 8-byte copy count and remainder */
289.L_8b_nocache_copy_entry:
290	movl %edx,%ecx
291	andl $7,%edx
292	shrl $3,%ecx
293	jz .L_4b_nocache_copy_entry	/* jump if count is 0 */
294
295	/* Perform 8-byte nocache loop-copy */
296.L_8b_nocache_copy_loop:
29720:	movq (%rsi),%r8
29821:	movnti %r8,(%rdi)
299	leaq 8(%rsi),%rsi
300	leaq 8(%rdi),%rdi
301	decl %ecx
302	jnz .L_8b_nocache_copy_loop
303
304	/* If no byte left, we're done */
305.L_4b_nocache_copy_entry:
306	andl %edx,%edx
307	jz .L_finish_copy
308
309	/* If destination is not 4-byte aligned, go to byte copy: */
310	movl %edi,%ecx
311	andl $3,%ecx
312	jnz .L_1b_cache_copy_entry
313
314	/* Set 4-byte copy count (1 or 0) and remainder */
315	movl %edx,%ecx
316	andl $3,%edx
317	shrl $2,%ecx
318	jz .L_1b_cache_copy_entry	/* jump if count is 0 */
319
320	/* Perform 4-byte nocache copy: */
32130:	movl (%rsi),%r8d
32231:	movnti %r8d,(%rdi)
323	leaq 4(%rsi),%rsi
324	leaq 4(%rdi),%rdi
325
326	/* If no bytes left, we're done: */
327	andl %edx,%edx
328	jz .L_finish_copy
329
330	/* Perform byte "cache" loop-copy for the remainder */
331.L_1b_cache_copy_entry:
332	movl %edx,%ecx
333.L_1b_cache_copy_loop:
33440:	movb (%rsi),%al
33541:	movb %al,(%rdi)
336	incq %rsi
337	incq %rdi
338	decl %ecx
339	jnz .L_1b_cache_copy_loop
340
341	/* Finished copying; fence the prior stores */
342.L_finish_copy:
343	xorl %eax,%eax
344	ASM_CLAC
345	sfence
346	ret
347
348	.section .fixup,"ax"
349.L_fixup_4x8b_copy:
350	shll $6,%ecx
351	addl %ecx,%edx
352	jmp .L_fixup_handle_tail
353.L_fixup_8b_copy:
354	lea (%rdx,%rcx,8),%rdx
355	jmp .L_fixup_handle_tail
356.L_fixup_4b_copy:
357	lea (%rdx,%rcx,4),%rdx
358	jmp .L_fixup_handle_tail
359.L_fixup_1b_copy:
360	movl %ecx,%edx
361.L_fixup_handle_tail:
362	sfence
363	jmp copy_user_handle_tail
364	.previous
365
366	_ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
367	_ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
368	_ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
369	_ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
370	_ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
371	_ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
372	_ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
373	_ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
374	_ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
375	_ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
376	_ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
377	_ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
378	_ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
379	_ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
380	_ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
381	_ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
382	_ASM_EXTABLE(20b,.L_fixup_8b_copy)
383	_ASM_EXTABLE(21b,.L_fixup_8b_copy)
384	_ASM_EXTABLE(30b,.L_fixup_4b_copy)
385	_ASM_EXTABLE(31b,.L_fixup_4b_copy)
386	_ASM_EXTABLE(40b,.L_fixup_1b_copy)
387	_ASM_EXTABLE(41b,.L_fixup_1b_copy)
388ENDPROC(__copy_user_nocache)
389EXPORT_SYMBOL(__copy_user_nocache)
390