xref: /openbmc/linux/arch/x86/lib/copy_user_64.S (revision 4307bec9)
1ad2fc2cdSVitaly Mayatskikh/*
2ad2fc2cdSVitaly Mayatskikh * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
3ad2fc2cdSVitaly Mayatskikh * Copyright 2002 Andi Kleen, SuSE Labs.
4185f3d38SThomas Gleixner * Subject to the GNU Public License v2.
5185f3d38SThomas Gleixner *
6185f3d38SThomas Gleixner * Functions to copy from and to user space.
7185f3d38SThomas Gleixner */
8185f3d38SThomas Gleixner
9185f3d38SThomas Gleixner#include <linux/linkage.h>
10185f3d38SThomas Gleixner#include <asm/dwarf2.h>
11185f3d38SThomas Gleixner
12185f3d38SThomas Gleixner#define FIX_ALIGNMENT 1
13185f3d38SThomas Gleixner
14185f3d38SThomas Gleixner#include <asm/current.h>
15185f3d38SThomas Gleixner#include <asm/asm-offsets.h>
16185f3d38SThomas Gleixner#include <asm/thread_info.h>
17185f3d38SThomas Gleixner#include <asm/cpufeature.h>
18*4307bec9SFenghua Yu#include <asm/alternative-asm.h>
19185f3d38SThomas Gleixner
20*4307bec9SFenghua Yu/*
21*4307bec9SFenghua Yu * By placing feature2 after feature1 in altinstructions section, we logically
22*4307bec9SFenghua Yu * implement:
23*4307bec9SFenghua Yu * If CPU has feature2, jmp to alt2 is used
24*4307bec9SFenghua Yu * else if CPU has feature1, jmp to alt1 is used
25*4307bec9SFenghua Yu * else jmp to orig is used.
26*4307bec9SFenghua Yu */
27*4307bec9SFenghua Yu	.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
28185f3d38SThomas Gleixner0:
29185f3d38SThomas Gleixner	.byte 0xe9	/* 32bit jump */
30185f3d38SThomas Gleixner	.long \orig-1f	/* by default jump to orig */
31185f3d38SThomas Gleixner1:
32185f3d38SThomas Gleixner	.section .altinstr_replacement,"ax"
33185f3d38SThomas Gleixner2:	.byte 0xe9			/* near jump with 32bit immediate */
34*4307bec9SFenghua Yu	.long \alt1-1b /* offset */   /* or alternatively to alt1 */
35*4307bec9SFenghua Yu3:	.byte 0xe9			/* near jump with 32bit immediate */
36*4307bec9SFenghua Yu	.long \alt2-1b /* offset */   /* or alternatively to alt2 */
37185f3d38SThomas Gleixner	.previous
38*4307bec9SFenghua Yu
39185f3d38SThomas Gleixner	.section .altinstructions,"a"
40*4307bec9SFenghua Yu	altinstruction_entry 0b,2b,\feature1,5,5
41*4307bec9SFenghua Yu	altinstruction_entry 0b,3b,\feature2,5,5
42185f3d38SThomas Gleixner	.previous
43185f3d38SThomas Gleixner	.endm
44185f3d38SThomas Gleixner
45ad2fc2cdSVitaly Mayatskikh	.macro ALIGN_DESTINATION
46ad2fc2cdSVitaly Mayatskikh#ifdef FIX_ALIGNMENT
47ad2fc2cdSVitaly Mayatskikh	/* check for bad alignment of destination */
48ad2fc2cdSVitaly Mayatskikh	movl %edi,%ecx
49ad2fc2cdSVitaly Mayatskikh	andl $7,%ecx
50ad2fc2cdSVitaly Mayatskikh	jz 102f				/* already aligned */
51ad2fc2cdSVitaly Mayatskikh	subl $8,%ecx
52ad2fc2cdSVitaly Mayatskikh	negl %ecx
53ad2fc2cdSVitaly Mayatskikh	subl %ecx,%edx
54ad2fc2cdSVitaly Mayatskikh100:	movb (%rsi),%al
55ad2fc2cdSVitaly Mayatskikh101:	movb %al,(%rdi)
56ad2fc2cdSVitaly Mayatskikh	incq %rsi
57ad2fc2cdSVitaly Mayatskikh	incq %rdi
58ad2fc2cdSVitaly Mayatskikh	decl %ecx
59ad2fc2cdSVitaly Mayatskikh	jnz 100b
60ad2fc2cdSVitaly Mayatskikh102:
61ad2fc2cdSVitaly Mayatskikh	.section .fixup,"ax"
62afd962a9SVitaly Mayatskikh103:	addl %ecx,%edx			/* ecx is zerorest also */
63ad2fc2cdSVitaly Mayatskikh	jmp copy_user_handle_tail
64ad2fc2cdSVitaly Mayatskikh	.previous
65ad2fc2cdSVitaly Mayatskikh
66ad2fc2cdSVitaly Mayatskikh	.section __ex_table,"a"
67ad2fc2cdSVitaly Mayatskikh	.align 8
68ad2fc2cdSVitaly Mayatskikh	.quad 100b,103b
69ad2fc2cdSVitaly Mayatskikh	.quad 101b,103b
70ad2fc2cdSVitaly Mayatskikh	.previous
71ad2fc2cdSVitaly Mayatskikh#endif
72ad2fc2cdSVitaly Mayatskikh	.endm
73ad2fc2cdSVitaly Mayatskikh
74185f3d38SThomas Gleixner/* Standard copy_to_user with segment limit checking */
753c93ca00SFrederic WeisbeckerENTRY(_copy_to_user)
76185f3d38SThomas Gleixner	CFI_STARTPROC
77185f3d38SThomas Gleixner	GET_THREAD_INFO(%rax)
78185f3d38SThomas Gleixner	movq %rdi,%rcx
79185f3d38SThomas Gleixner	addq %rdx,%rcx
80185f3d38SThomas Gleixner	jc bad_to_user
8126ccb8a7SGlauber Costa	cmpq TI_addr_limit(%rax),%rcx
82185f3d38SThomas Gleixner	jae bad_to_user
83*4307bec9SFenghua Yu	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
84*4307bec9SFenghua Yu		copy_user_generic_unrolled,copy_user_generic_string,	\
85*4307bec9SFenghua Yu		copy_user_enhanced_fast_string
86185f3d38SThomas Gleixner	CFI_ENDPROC
873c93ca00SFrederic WeisbeckerENDPROC(_copy_to_user)
88185f3d38SThomas Gleixner
89185f3d38SThomas Gleixner/* Standard copy_from_user with segment limit checking */
909f0cf4adSArjan van de VenENTRY(_copy_from_user)
91185f3d38SThomas Gleixner	CFI_STARTPROC
92185f3d38SThomas Gleixner	GET_THREAD_INFO(%rax)
93185f3d38SThomas Gleixner	movq %rsi,%rcx
94185f3d38SThomas Gleixner	addq %rdx,%rcx
95185f3d38SThomas Gleixner	jc bad_from_user
9626ccb8a7SGlauber Costa	cmpq TI_addr_limit(%rax),%rcx
97185f3d38SThomas Gleixner	jae bad_from_user
98*4307bec9SFenghua Yu	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
99*4307bec9SFenghua Yu		copy_user_generic_unrolled,copy_user_generic_string,	\
100*4307bec9SFenghua Yu		copy_user_enhanced_fast_string
101185f3d38SThomas Gleixner	CFI_ENDPROC
1029f0cf4adSArjan van de VenENDPROC(_copy_from_user)
103185f3d38SThomas Gleixner
104185f3d38SThomas Gleixner	.section .fixup,"ax"
105185f3d38SThomas Gleixner	/* must zero dest */
106ad2fc2cdSVitaly MayatskikhENTRY(bad_from_user)
107185f3d38SThomas Gleixnerbad_from_user:
108185f3d38SThomas Gleixner	CFI_STARTPROC
109185f3d38SThomas Gleixner	movl %edx,%ecx
110185f3d38SThomas Gleixner	xorl %eax,%eax
111185f3d38SThomas Gleixner	rep
112185f3d38SThomas Gleixner	stosb
113185f3d38SThomas Gleixnerbad_to_user:
114185f3d38SThomas Gleixner	movl %edx,%eax
115185f3d38SThomas Gleixner	ret
116185f3d38SThomas Gleixner	CFI_ENDPROC
117ad2fc2cdSVitaly MayatskikhENDPROC(bad_from_user)
118185f3d38SThomas Gleixner	.previous
119185f3d38SThomas Gleixner
120185f3d38SThomas Gleixner/*
121185f3d38SThomas Gleixner * copy_user_generic_unrolled - memory copy with exception handling.
122ad2fc2cdSVitaly Mayatskikh * This version is for CPUs like P4 that don't have efficient micro
123ad2fc2cdSVitaly Mayatskikh * code for rep movsq
124185f3d38SThomas Gleixner *
125185f3d38SThomas Gleixner * Input:
126185f3d38SThomas Gleixner * rdi destination
127185f3d38SThomas Gleixner * rsi source
128185f3d38SThomas Gleixner * rdx count
129185f3d38SThomas Gleixner *
130185f3d38SThomas Gleixner * Output:
1310d2eb44fSLucas De Marchi * eax uncopied bytes or 0 if successful.
132ad2fc2cdSVitaly Mayatskikh */
133ad2fc2cdSVitaly MayatskikhENTRY(copy_user_generic_unrolled)
134ad2fc2cdSVitaly Mayatskikh	CFI_STARTPROC
135ad2fc2cdSVitaly Mayatskikh	cmpl $8,%edx
136ad2fc2cdSVitaly Mayatskikh	jb 20f		/* less then 8 bytes, go to byte copy loop */
137ad2fc2cdSVitaly Mayatskikh	ALIGN_DESTINATION
138ad2fc2cdSVitaly Mayatskikh	movl %edx,%ecx
139ad2fc2cdSVitaly Mayatskikh	andl $63,%edx
140ad2fc2cdSVitaly Mayatskikh	shrl $6,%ecx
141ad2fc2cdSVitaly Mayatskikh	jz 17f
142ad2fc2cdSVitaly Mayatskikh1:	movq (%rsi),%r8
143ad2fc2cdSVitaly Mayatskikh2:	movq 1*8(%rsi),%r9
144ad2fc2cdSVitaly Mayatskikh3:	movq 2*8(%rsi),%r10
145ad2fc2cdSVitaly Mayatskikh4:	movq 3*8(%rsi),%r11
146ad2fc2cdSVitaly Mayatskikh5:	movq %r8,(%rdi)
147ad2fc2cdSVitaly Mayatskikh6:	movq %r9,1*8(%rdi)
148ad2fc2cdSVitaly Mayatskikh7:	movq %r10,2*8(%rdi)
149ad2fc2cdSVitaly Mayatskikh8:	movq %r11,3*8(%rdi)
150ad2fc2cdSVitaly Mayatskikh9:	movq 4*8(%rsi),%r8
151ad2fc2cdSVitaly Mayatskikh10:	movq 5*8(%rsi),%r9
152ad2fc2cdSVitaly Mayatskikh11:	movq 6*8(%rsi),%r10
153ad2fc2cdSVitaly Mayatskikh12:	movq 7*8(%rsi),%r11
154ad2fc2cdSVitaly Mayatskikh13:	movq %r8,4*8(%rdi)
155ad2fc2cdSVitaly Mayatskikh14:	movq %r9,5*8(%rdi)
156ad2fc2cdSVitaly Mayatskikh15:	movq %r10,6*8(%rdi)
157ad2fc2cdSVitaly Mayatskikh16:	movq %r11,7*8(%rdi)
158ad2fc2cdSVitaly Mayatskikh	leaq 64(%rsi),%rsi
159ad2fc2cdSVitaly Mayatskikh	leaq 64(%rdi),%rdi
160ad2fc2cdSVitaly Mayatskikh	decl %ecx
161ad2fc2cdSVitaly Mayatskikh	jnz 1b
162ad2fc2cdSVitaly Mayatskikh17:	movl %edx,%ecx
163ad2fc2cdSVitaly Mayatskikh	andl $7,%edx
164ad2fc2cdSVitaly Mayatskikh	shrl $3,%ecx
165ad2fc2cdSVitaly Mayatskikh	jz 20f
166ad2fc2cdSVitaly Mayatskikh18:	movq (%rsi),%r8
167ad2fc2cdSVitaly Mayatskikh19:	movq %r8,(%rdi)
168ad2fc2cdSVitaly Mayatskikh	leaq 8(%rsi),%rsi
169ad2fc2cdSVitaly Mayatskikh	leaq 8(%rdi),%rdi
170ad2fc2cdSVitaly Mayatskikh	decl %ecx
171ad2fc2cdSVitaly Mayatskikh	jnz 18b
172ad2fc2cdSVitaly Mayatskikh20:	andl %edx,%edx
173ad2fc2cdSVitaly Mayatskikh	jz 23f
174ad2fc2cdSVitaly Mayatskikh	movl %edx,%ecx
175ad2fc2cdSVitaly Mayatskikh21:	movb (%rsi),%al
176ad2fc2cdSVitaly Mayatskikh22:	movb %al,(%rdi)
177ad2fc2cdSVitaly Mayatskikh	incq %rsi
178ad2fc2cdSVitaly Mayatskikh	incq %rdi
179ad2fc2cdSVitaly Mayatskikh	decl %ecx
180ad2fc2cdSVitaly Mayatskikh	jnz 21b
181ad2fc2cdSVitaly Mayatskikh23:	xor %eax,%eax
182ad2fc2cdSVitaly Mayatskikh	ret
183ad2fc2cdSVitaly Mayatskikh
184ad2fc2cdSVitaly Mayatskikh	.section .fixup,"ax"
185ad2fc2cdSVitaly Mayatskikh30:	shll $6,%ecx
186ad2fc2cdSVitaly Mayatskikh	addl %ecx,%edx
187ad2fc2cdSVitaly Mayatskikh	jmp 60f
18827cb0a75SJeremy Fitzhardinge40:	lea (%rdx,%rcx,8),%rdx
189ad2fc2cdSVitaly Mayatskikh	jmp 60f
190ad2fc2cdSVitaly Mayatskikh50:	movl %ecx,%edx
191ad2fc2cdSVitaly Mayatskikh60:	jmp copy_user_handle_tail /* ecx is zerorest also */
192ad2fc2cdSVitaly Mayatskikh	.previous
193ad2fc2cdSVitaly Mayatskikh
194ad2fc2cdSVitaly Mayatskikh	.section __ex_table,"a"
195ad2fc2cdSVitaly Mayatskikh	.align 8
196ad2fc2cdSVitaly Mayatskikh	.quad 1b,30b
197ad2fc2cdSVitaly Mayatskikh	.quad 2b,30b
198ad2fc2cdSVitaly Mayatskikh	.quad 3b,30b
199ad2fc2cdSVitaly Mayatskikh	.quad 4b,30b
200ad2fc2cdSVitaly Mayatskikh	.quad 5b,30b
201ad2fc2cdSVitaly Mayatskikh	.quad 6b,30b
202ad2fc2cdSVitaly Mayatskikh	.quad 7b,30b
203ad2fc2cdSVitaly Mayatskikh	.quad 8b,30b
204ad2fc2cdSVitaly Mayatskikh	.quad 9b,30b
205ad2fc2cdSVitaly Mayatskikh	.quad 10b,30b
206ad2fc2cdSVitaly Mayatskikh	.quad 11b,30b
207ad2fc2cdSVitaly Mayatskikh	.quad 12b,30b
208ad2fc2cdSVitaly Mayatskikh	.quad 13b,30b
209ad2fc2cdSVitaly Mayatskikh	.quad 14b,30b
210ad2fc2cdSVitaly Mayatskikh	.quad 15b,30b
211ad2fc2cdSVitaly Mayatskikh	.quad 16b,30b
212ad2fc2cdSVitaly Mayatskikh	.quad 18b,40b
213ad2fc2cdSVitaly Mayatskikh	.quad 19b,40b
214ad2fc2cdSVitaly Mayatskikh	.quad 21b,50b
215ad2fc2cdSVitaly Mayatskikh	.quad 22b,50b
216ad2fc2cdSVitaly Mayatskikh	.previous
217ad2fc2cdSVitaly Mayatskikh	CFI_ENDPROC
218ad2fc2cdSVitaly MayatskikhENDPROC(copy_user_generic_unrolled)
219ad2fc2cdSVitaly Mayatskikh
220ad2fc2cdSVitaly Mayatskikh/* Some CPUs run faster using the string copy instructions.
221ad2fc2cdSVitaly Mayatskikh * This is also a lot simpler. Use them when possible.
222185f3d38SThomas Gleixner *
223185f3d38SThomas Gleixner * Only 4GB of copy is supported. This shouldn't be a problem
224185f3d38SThomas Gleixner * because the kernel normally only writes from/to page sized chunks
225185f3d38SThomas Gleixner * even if user space passed a longer buffer.
226185f3d38SThomas Gleixner * And more would be dangerous because both Intel and AMD have
227185f3d38SThomas Gleixner * errata with rep movsq > 4GB. If someone feels the need to fix
228185f3d38SThomas Gleixner * this please consider this.
229ad2fc2cdSVitaly Mayatskikh *
230ad2fc2cdSVitaly Mayatskikh * Input:
231ad2fc2cdSVitaly Mayatskikh * rdi destination
232ad2fc2cdSVitaly Mayatskikh * rsi source
233ad2fc2cdSVitaly Mayatskikh * rdx count
234ad2fc2cdSVitaly Mayatskikh *
235ad2fc2cdSVitaly Mayatskikh * Output:
236ad2fc2cdSVitaly Mayatskikh * eax uncopied bytes or 0 if successful.
237185f3d38SThomas Gleixner */
238185f3d38SThomas GleixnerENTRY(copy_user_generic_string)
239185f3d38SThomas Gleixner	CFI_STARTPROC
240ad2fc2cdSVitaly Mayatskikh	andl %edx,%edx
241ad2fc2cdSVitaly Mayatskikh	jz 4f
242ad2fc2cdSVitaly Mayatskikh	cmpl $8,%edx
243ad2fc2cdSVitaly Mayatskikh	jb 2f		/* less than 8 bytes, go to byte copy loop */
244ad2fc2cdSVitaly Mayatskikh	ALIGN_DESTINATION
245185f3d38SThomas Gleixner	movl %edx,%ecx
246185f3d38SThomas Gleixner	shrl $3,%ecx
247185f3d38SThomas Gleixner	andl $7,%edx
248185f3d38SThomas Gleixner1:	rep
249185f3d38SThomas Gleixner	movsq
250ad2fc2cdSVitaly Mayatskikh2:	movl %edx,%ecx
251ad2fc2cdSVitaly Mayatskikh3:	rep
252185f3d38SThomas Gleixner	movsb
253ad2fc2cdSVitaly Mayatskikh4:	xorl %eax,%eax
254185f3d38SThomas Gleixner	ret
255185f3d38SThomas Gleixner
256ad2fc2cdSVitaly Mayatskikh	.section .fixup,"ax"
25727cb0a75SJeremy Fitzhardinge11:	lea (%rdx,%rcx,8),%rcx
258ad2fc2cdSVitaly Mayatskikh12:	movl %ecx,%edx		/* ecx is zerorest also */
259ad2fc2cdSVitaly Mayatskikh	jmp copy_user_handle_tail
260ad2fc2cdSVitaly Mayatskikh	.previous
261185f3d38SThomas Gleixner
262185f3d38SThomas Gleixner	.section __ex_table,"a"
263ad2fc2cdSVitaly Mayatskikh	.align 8
264ad2fc2cdSVitaly Mayatskikh	.quad 1b,11b
265ad2fc2cdSVitaly Mayatskikh	.quad 3b,12b
266185f3d38SThomas Gleixner	.previous
267ad2fc2cdSVitaly Mayatskikh	CFI_ENDPROC
268ad2fc2cdSVitaly MayatskikhENDPROC(copy_user_generic_string)
269*4307bec9SFenghua Yu
270*4307bec9SFenghua Yu/*
271*4307bec9SFenghua Yu * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
272*4307bec9SFenghua Yu * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
273*4307bec9SFenghua Yu *
274*4307bec9SFenghua Yu * Input:
275*4307bec9SFenghua Yu * rdi destination
276*4307bec9SFenghua Yu * rsi source
277*4307bec9SFenghua Yu * rdx count
278*4307bec9SFenghua Yu *
279*4307bec9SFenghua Yu * Output:
280*4307bec9SFenghua Yu * eax uncopied bytes or 0 if successful.
281*4307bec9SFenghua Yu */
282*4307bec9SFenghua YuENTRY(copy_user_enhanced_fast_string)
283*4307bec9SFenghua Yu	CFI_STARTPROC
284*4307bec9SFenghua Yu	andl %edx,%edx
285*4307bec9SFenghua Yu	jz 2f
286*4307bec9SFenghua Yu	movl %edx,%ecx
287*4307bec9SFenghua Yu1:	rep
288*4307bec9SFenghua Yu	movsb
289*4307bec9SFenghua Yu2:	xorl %eax,%eax
290*4307bec9SFenghua Yu	ret
291*4307bec9SFenghua Yu
292*4307bec9SFenghua Yu	.section .fixup,"ax"
293*4307bec9SFenghua Yu12:	movl %ecx,%edx		/* ecx is zerorest also */
294*4307bec9SFenghua Yu	jmp copy_user_handle_tail
295*4307bec9SFenghua Yu	.previous
296*4307bec9SFenghua Yu
297*4307bec9SFenghua Yu	.section __ex_table,"a"
298*4307bec9SFenghua Yu	.align 8
299*4307bec9SFenghua Yu	.quad 1b,12b
300*4307bec9SFenghua Yu	.previous
301*4307bec9SFenghua Yu	CFI_ENDPROC
302*4307bec9SFenghua YuENDPROC(copy_user_enhanced_fast_string)
303