xref: /openbmc/linux/arch/x86/lib/copy_user_64.S (revision df2634f43f5106947f3735a0b61a6527a4b278cd)
1/*
2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
3 * Copyright 2002 Andi Kleen, SuSE Labs.
4 * Subject to the GNU Public License v2.
5 *
6 * Functions to copy from and to user space.
7 */
8
9#include <linux/linkage.h>
10#include <asm/dwarf2.h>
11
12#define FIX_ALIGNMENT 1
13
14#include <asm/current.h>
15#include <asm/asm-offsets.h>
16#include <asm/thread_info.h>
17#include <asm/cpufeature.h>
18
19	.macro ALTERNATIVE_JUMP feature,orig,alt
200:
21	.byte 0xe9	/* 32bit jump */
22	.long \orig-1f	/* by default jump to orig */
231:
24	.section .altinstr_replacement,"ax"
252:	.byte 0xe9			/* near jump with 32bit immediate */
26	.long \alt-1b /* offset */   /* or alternatively to alt */
27	.previous
28	.section .altinstructions,"a"
29	.align 8
30	.quad  0b
31	.quad  2b
32	.word  \feature			/* when feature is set */
33	.byte  5
34	.byte  5
35	.previous
36	.endm
37
38	.macro ALIGN_DESTINATION
39#ifdef FIX_ALIGNMENT
40	/* check for bad alignment of destination */
41	movl %edi,%ecx
42	andl $7,%ecx
43	jz 102f				/* already aligned */
44	subl $8,%ecx
45	negl %ecx
46	subl %ecx,%edx
47100:	movb (%rsi),%al
48101:	movb %al,(%rdi)
49	incq %rsi
50	incq %rdi
51	decl %ecx
52	jnz 100b
53102:
54	.section .fixup,"ax"
55103:	addl %ecx,%edx			/* ecx is zerorest also */
56	jmp copy_user_handle_tail
57	.previous
58
59	.section __ex_table,"a"
60	.align 8
61	.quad 100b,103b
62	.quad 101b,103b
63	.previous
64#endif
65	.endm
66
67/* Standard copy_to_user with segment limit checking */
68ENTRY(_copy_to_user)
69	CFI_STARTPROC
70	GET_THREAD_INFO(%rax)
71	movq %rdi,%rcx
72	addq %rdx,%rcx
73	jc bad_to_user
74	cmpq TI_addr_limit(%rax),%rcx
75	jae bad_to_user
76	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
77	CFI_ENDPROC
78ENDPROC(_copy_to_user)
79
80/* Standard copy_from_user with segment limit checking */
81ENTRY(_copy_from_user)
82	CFI_STARTPROC
83	GET_THREAD_INFO(%rax)
84	movq %rsi,%rcx
85	addq %rdx,%rcx
86	jc bad_from_user
87	cmpq TI_addr_limit(%rax),%rcx
88	jae bad_from_user
89	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
90	CFI_ENDPROC
91ENDPROC(_copy_from_user)
92
93	.section .fixup,"ax"
94	/* must zero dest */
95ENTRY(bad_from_user)
96bad_from_user:
97	CFI_STARTPROC
98	movl %edx,%ecx
99	xorl %eax,%eax
100	rep
101	stosb
102bad_to_user:
103	movl %edx,%eax
104	ret
105	CFI_ENDPROC
106ENDPROC(bad_from_user)
107	.previous
108
109/*
110 * copy_user_generic_unrolled - memory copy with exception handling.
111 * This version is for CPUs like P4 that don't have efficient micro
112 * code for rep movsq
113 *
114 * Input:
115 * rdi destination
116 * rsi source
117 * rdx count
118 *
119 * Output:
120 * eax uncopied bytes or 0 if successfull.
121 */
122ENTRY(copy_user_generic_unrolled)
123	CFI_STARTPROC
124	cmpl $8,%edx
125	jb 20f		/* less then 8 bytes, go to byte copy loop */
126	ALIGN_DESTINATION
127	movl %edx,%ecx
128	andl $63,%edx
129	shrl $6,%ecx
130	jz 17f
1311:	movq (%rsi),%r8
1322:	movq 1*8(%rsi),%r9
1333:	movq 2*8(%rsi),%r10
1344:	movq 3*8(%rsi),%r11
1355:	movq %r8,(%rdi)
1366:	movq %r9,1*8(%rdi)
1377:	movq %r10,2*8(%rdi)
1388:	movq %r11,3*8(%rdi)
1399:	movq 4*8(%rsi),%r8
14010:	movq 5*8(%rsi),%r9
14111:	movq 6*8(%rsi),%r10
14212:	movq 7*8(%rsi),%r11
14313:	movq %r8,4*8(%rdi)
14414:	movq %r9,5*8(%rdi)
14515:	movq %r10,6*8(%rdi)
14616:	movq %r11,7*8(%rdi)
147	leaq 64(%rsi),%rsi
148	leaq 64(%rdi),%rdi
149	decl %ecx
150	jnz 1b
15117:	movl %edx,%ecx
152	andl $7,%edx
153	shrl $3,%ecx
154	jz 20f
15518:	movq (%rsi),%r8
15619:	movq %r8,(%rdi)
157	leaq 8(%rsi),%rsi
158	leaq 8(%rdi),%rdi
159	decl %ecx
160	jnz 18b
16120:	andl %edx,%edx
162	jz 23f
163	movl %edx,%ecx
16421:	movb (%rsi),%al
16522:	movb %al,(%rdi)
166	incq %rsi
167	incq %rdi
168	decl %ecx
169	jnz 21b
17023:	xor %eax,%eax
171	ret
172
173	.section .fixup,"ax"
17430:	shll $6,%ecx
175	addl %ecx,%edx
176	jmp 60f
17740:	lea (%rdx,%rcx,8),%rdx
178	jmp 60f
17950:	movl %ecx,%edx
18060:	jmp copy_user_handle_tail /* ecx is zerorest also */
181	.previous
182
183	.section __ex_table,"a"
184	.align 8
185	.quad 1b,30b
186	.quad 2b,30b
187	.quad 3b,30b
188	.quad 4b,30b
189	.quad 5b,30b
190	.quad 6b,30b
191	.quad 7b,30b
192	.quad 8b,30b
193	.quad 9b,30b
194	.quad 10b,30b
195	.quad 11b,30b
196	.quad 12b,30b
197	.quad 13b,30b
198	.quad 14b,30b
199	.quad 15b,30b
200	.quad 16b,30b
201	.quad 18b,40b
202	.quad 19b,40b
203	.quad 21b,50b
204	.quad 22b,50b
205	.previous
206	CFI_ENDPROC
207ENDPROC(copy_user_generic_unrolled)
208
209/* Some CPUs run faster using the string copy instructions.
210 * This is also a lot simpler. Use them when possible.
211 *
212 * Only 4GB of copy is supported. This shouldn't be a problem
213 * because the kernel normally only writes from/to page sized chunks
214 * even if user space passed a longer buffer.
215 * And more would be dangerous because both Intel and AMD have
216 * errata with rep movsq > 4GB. If someone feels the need to fix
217 * this please consider this.
218 *
219 * Input:
220 * rdi destination
221 * rsi source
222 * rdx count
223 *
224 * Output:
225 * eax uncopied bytes or 0 if successful.
226 */
227ENTRY(copy_user_generic_string)
228	CFI_STARTPROC
229	andl %edx,%edx
230	jz 4f
231	cmpl $8,%edx
232	jb 2f		/* less than 8 bytes, go to byte copy loop */
233	ALIGN_DESTINATION
234	movl %edx,%ecx
235	shrl $3,%ecx
236	andl $7,%edx
2371:	rep
238	movsq
2392:	movl %edx,%ecx
2403:	rep
241	movsb
2424:	xorl %eax,%eax
243	ret
244
245	.section .fixup,"ax"
24611:	lea (%rdx,%rcx,8),%rcx
24712:	movl %ecx,%edx		/* ecx is zerorest also */
248	jmp copy_user_handle_tail
249	.previous
250
251	.section __ex_table,"a"
252	.align 8
253	.quad 1b,11b
254	.quad 3b,12b
255	.previous
256	CFI_ENDPROC
257ENDPROC(copy_user_generic_string)
258