xref: /openbmc/linux/arch/x86/lib/copy_user_64.S (revision 3c93ca00)
1/*
2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
3 * Copyright 2002 Andi Kleen, SuSE Labs.
4 * Subject to the GNU Public License v2.
5 *
6 * Functions to copy from and to user space.
7 */
8
9#include <linux/linkage.h>
10#include <asm/dwarf2.h>
11
12#define FIX_ALIGNMENT 1
13
14#include <asm/current.h>
15#include <asm/asm-offsets.h>
16#include <asm/thread_info.h>
17#include <asm/cpufeature.h>
18
19	.macro ALTERNATIVE_JUMP feature,orig,alt
200:
21	.byte 0xe9	/* 32bit jump */
22	.long \orig-1f	/* by default jump to orig */
231:
24	.section .altinstr_replacement,"ax"
252:	.byte 0xe9			/* near jump with 32bit immediate */
26	.long \alt-1b /* offset */   /* or alternatively to alt */
27	.previous
28	.section .altinstructions,"a"
29	.align 8
30	.quad  0b
31	.quad  2b
32	.byte  \feature			/* when feature is set */
33	.byte  5
34	.byte  5
35	.previous
36	.endm
37
38	.macro ALIGN_DESTINATION
39#ifdef FIX_ALIGNMENT
40	/* check for bad alignment of destination */
41	movl %edi,%ecx
42	andl $7,%ecx
43	jz 102f				/* already aligned */
44	subl $8,%ecx
45	negl %ecx
46	subl %ecx,%edx
47100:	movb (%rsi),%al
48101:	movb %al,(%rdi)
49	incq %rsi
50	incq %rdi
51	decl %ecx
52	jnz 100b
53102:
54	.section .fixup,"ax"
55103:	addl %ecx,%edx			/* ecx is zerorest also */
56	jmp copy_user_handle_tail
57	.previous
58
59	.section __ex_table,"a"
60	.align 8
61	.quad 100b,103b
62	.quad 101b,103b
63	.previous
64#endif
65	.endm
66
67/* Standard copy_to_user with segment limit checking */
68ENTRY(_copy_to_user)
69	CFI_STARTPROC
70	GET_THREAD_INFO(%rax)
71	movq %rdi,%rcx
72	addq %rdx,%rcx
73	jc bad_to_user
74	cmpq TI_addr_limit(%rax),%rcx
75	jae bad_to_user
76	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
77	CFI_ENDPROC
78ENDPROC(_copy_to_user)
79
80/* Standard copy_from_user with segment limit checking */
81ENTRY(_copy_from_user)
82	CFI_STARTPROC
83	GET_THREAD_INFO(%rax)
84	movq %rsi,%rcx
85	addq %rdx,%rcx
86	jc bad_from_user
87	cmpq TI_addr_limit(%rax),%rcx
88	jae bad_from_user
89	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
90	CFI_ENDPROC
91ENDPROC(_copy_from_user)
92
93ENTRY(copy_user_generic)
94	CFI_STARTPROC
95	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
96	CFI_ENDPROC
97ENDPROC(copy_user_generic)
98
99	.section .fixup,"ax"
100	/* must zero dest */
101ENTRY(bad_from_user)
102bad_from_user:
103	CFI_STARTPROC
104	movl %edx,%ecx
105	xorl %eax,%eax
106	rep
107	stosb
108bad_to_user:
109	movl %edx,%eax
110	ret
111	CFI_ENDPROC
112ENDPROC(bad_from_user)
113	.previous
114
115/*
116 * copy_user_generic_unrolled - memory copy with exception handling.
117 * This version is for CPUs like P4 that don't have efficient micro
118 * code for rep movsq
119 *
120 * Input:
121 * rdi destination
122 * rsi source
123 * rdx count
124 *
125 * Output:
126 * eax uncopied bytes or 0 if successfull.
127 */
128ENTRY(copy_user_generic_unrolled)
129	CFI_STARTPROC
130	cmpl $8,%edx
131	jb 20f		/* less then 8 bytes, go to byte copy loop */
132	ALIGN_DESTINATION
133	movl %edx,%ecx
134	andl $63,%edx
135	shrl $6,%ecx
136	jz 17f
1371:	movq (%rsi),%r8
1382:	movq 1*8(%rsi),%r9
1393:	movq 2*8(%rsi),%r10
1404:	movq 3*8(%rsi),%r11
1415:	movq %r8,(%rdi)
1426:	movq %r9,1*8(%rdi)
1437:	movq %r10,2*8(%rdi)
1448:	movq %r11,3*8(%rdi)
1459:	movq 4*8(%rsi),%r8
14610:	movq 5*8(%rsi),%r9
14711:	movq 6*8(%rsi),%r10
14812:	movq 7*8(%rsi),%r11
14913:	movq %r8,4*8(%rdi)
15014:	movq %r9,5*8(%rdi)
15115:	movq %r10,6*8(%rdi)
15216:	movq %r11,7*8(%rdi)
153	leaq 64(%rsi),%rsi
154	leaq 64(%rdi),%rdi
155	decl %ecx
156	jnz 1b
15717:	movl %edx,%ecx
158	andl $7,%edx
159	shrl $3,%ecx
160	jz 20f
16118:	movq (%rsi),%r8
16219:	movq %r8,(%rdi)
163	leaq 8(%rsi),%rsi
164	leaq 8(%rdi),%rdi
165	decl %ecx
166	jnz 18b
16720:	andl %edx,%edx
168	jz 23f
169	movl %edx,%ecx
17021:	movb (%rsi),%al
17122:	movb %al,(%rdi)
172	incq %rsi
173	incq %rdi
174	decl %ecx
175	jnz 21b
17623:	xor %eax,%eax
177	ret
178
179	.section .fixup,"ax"
18030:	shll $6,%ecx
181	addl %ecx,%edx
182	jmp 60f
18340:	lea (%rdx,%rcx,8),%rdx
184	jmp 60f
18550:	movl %ecx,%edx
18660:	jmp copy_user_handle_tail /* ecx is zerorest also */
187	.previous
188
189	.section __ex_table,"a"
190	.align 8
191	.quad 1b,30b
192	.quad 2b,30b
193	.quad 3b,30b
194	.quad 4b,30b
195	.quad 5b,30b
196	.quad 6b,30b
197	.quad 7b,30b
198	.quad 8b,30b
199	.quad 9b,30b
200	.quad 10b,30b
201	.quad 11b,30b
202	.quad 12b,30b
203	.quad 13b,30b
204	.quad 14b,30b
205	.quad 15b,30b
206	.quad 16b,30b
207	.quad 18b,40b
208	.quad 19b,40b
209	.quad 21b,50b
210	.quad 22b,50b
211	.previous
212	CFI_ENDPROC
213ENDPROC(copy_user_generic_unrolled)
214
215/* Some CPUs run faster using the string copy instructions.
216 * This is also a lot simpler. Use them when possible.
217 *
218 * Only 4GB of copy is supported. This shouldn't be a problem
219 * because the kernel normally only writes from/to page sized chunks
220 * even if user space passed a longer buffer.
221 * And more would be dangerous because both Intel and AMD have
222 * errata with rep movsq > 4GB. If someone feels the need to fix
223 * this please consider this.
224 *
225 * Input:
226 * rdi destination
227 * rsi source
228 * rdx count
229 *
230 * Output:
231 * eax uncopied bytes or 0 if successful.
232 */
233ENTRY(copy_user_generic_string)
234	CFI_STARTPROC
235	andl %edx,%edx
236	jz 4f
237	cmpl $8,%edx
238	jb 2f		/* less than 8 bytes, go to byte copy loop */
239	ALIGN_DESTINATION
240	movl %edx,%ecx
241	shrl $3,%ecx
242	andl $7,%edx
2431:	rep
244	movsq
2452:	movl %edx,%ecx
2463:	rep
247	movsb
2484:	xorl %eax,%eax
249	ret
250
251	.section .fixup,"ax"
25211:	lea (%rdx,%rcx,8),%rcx
25312:	movl %ecx,%edx		/* ecx is zerorest also */
254	jmp copy_user_handle_tail
255	.previous
256
257	.section __ex_table,"a"
258	.align 8
259	.quad 1b,11b
260	.quad 3b,12b
261	.previous
262	CFI_ENDPROC
263ENDPROC(copy_user_generic_string)
264