xref: /openbmc/linux/arch/x86/lib/copy_user_64.S (revision 9732da8c)
1ad2fc2cdSVitaly Mayatskikh/*
2ad2fc2cdSVitaly Mayatskikh * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
3ad2fc2cdSVitaly Mayatskikh * Copyright 2002 Andi Kleen, SuSE Labs.
4185f3d38SThomas Gleixner * Subject to the GNU Public License v2.
5185f3d38SThomas Gleixner *
6185f3d38SThomas Gleixner * Functions to copy from and to user space.
7185f3d38SThomas Gleixner */
8185f3d38SThomas Gleixner
9185f3d38SThomas Gleixner#include <linux/linkage.h>
10185f3d38SThomas Gleixner#include <asm/dwarf2.h>
11185f3d38SThomas Gleixner
12185f3d38SThomas Gleixner#define FIX_ALIGNMENT 1
13185f3d38SThomas Gleixner
14185f3d38SThomas Gleixner#include <asm/current.h>
15185f3d38SThomas Gleixner#include <asm/asm-offsets.h>
16185f3d38SThomas Gleixner#include <asm/thread_info.h>
17185f3d38SThomas Gleixner#include <asm/cpufeature.h>
184307bec9SFenghua Yu#include <asm/alternative-asm.h>
199732da8cSH. Peter Anvin#include <asm/asm.h>
20185f3d38SThomas Gleixner
214307bec9SFenghua Yu/*
224307bec9SFenghua Yu * By placing feature2 after feature1 in altinstructions section, we logically
234307bec9SFenghua Yu * implement:
244307bec9SFenghua Yu * If CPU has feature2, jmp to alt2 is used
254307bec9SFenghua Yu * else if CPU has feature1, jmp to alt1 is used
264307bec9SFenghua Yu * else jmp to orig is used.
274307bec9SFenghua Yu */
284307bec9SFenghua Yu	.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
29185f3d38SThomas Gleixner0:
30185f3d38SThomas Gleixner	.byte 0xe9	/* 32bit jump */
31185f3d38SThomas Gleixner	.long \orig-1f	/* by default jump to orig */
32185f3d38SThomas Gleixner1:
33185f3d38SThomas Gleixner	.section .altinstr_replacement,"ax"
34185f3d38SThomas Gleixner2:	.byte 0xe9			/* near jump with 32bit immediate */
354307bec9SFenghua Yu	.long \alt1-1b /* offset */   /* or alternatively to alt1 */
364307bec9SFenghua Yu3:	.byte 0xe9			/* near jump with 32bit immediate */
374307bec9SFenghua Yu	.long \alt2-1b /* offset */   /* or alternatively to alt2 */
38185f3d38SThomas Gleixner	.previous
394307bec9SFenghua Yu
40185f3d38SThomas Gleixner	.section .altinstructions,"a"
414307bec9SFenghua Yu	altinstruction_entry 0b,2b,\feature1,5,5
424307bec9SFenghua Yu	altinstruction_entry 0b,3b,\feature2,5,5
43185f3d38SThomas Gleixner	.previous
44185f3d38SThomas Gleixner	.endm
45185f3d38SThomas Gleixner
46ad2fc2cdSVitaly Mayatskikh	.macro ALIGN_DESTINATION
47ad2fc2cdSVitaly Mayatskikh#ifdef FIX_ALIGNMENT
48ad2fc2cdSVitaly Mayatskikh	/* check for bad alignment of destination */
49ad2fc2cdSVitaly Mayatskikh	movl %edi,%ecx
50ad2fc2cdSVitaly Mayatskikh	andl $7,%ecx
51ad2fc2cdSVitaly Mayatskikh	jz 102f				/* already aligned */
52ad2fc2cdSVitaly Mayatskikh	subl $8,%ecx
53ad2fc2cdSVitaly Mayatskikh	negl %ecx
54ad2fc2cdSVitaly Mayatskikh	subl %ecx,%edx
55ad2fc2cdSVitaly Mayatskikh100:	movb (%rsi),%al
56ad2fc2cdSVitaly Mayatskikh101:	movb %al,(%rdi)
57ad2fc2cdSVitaly Mayatskikh	incq %rsi
58ad2fc2cdSVitaly Mayatskikh	incq %rdi
59ad2fc2cdSVitaly Mayatskikh	decl %ecx
60ad2fc2cdSVitaly Mayatskikh	jnz 100b
61ad2fc2cdSVitaly Mayatskikh102:
62ad2fc2cdSVitaly Mayatskikh	.section .fixup,"ax"
63afd962a9SVitaly Mayatskikh103:	addl %ecx,%edx			/* ecx is zerorest also */
64ad2fc2cdSVitaly Mayatskikh	jmp copy_user_handle_tail
65ad2fc2cdSVitaly Mayatskikh	.previous
66ad2fc2cdSVitaly Mayatskikh
679732da8cSH. Peter Anvin	_ASM_EXTABLE(100b,103b)
689732da8cSH. Peter Anvin	_ASM_EXTABLE(101b,103b)
69ad2fc2cdSVitaly Mayatskikh#endif
70ad2fc2cdSVitaly Mayatskikh	.endm
71ad2fc2cdSVitaly Mayatskikh
72185f3d38SThomas Gleixner/* Standard copy_to_user with segment limit checking */
733c93ca00SFrederic WeisbeckerENTRY(_copy_to_user)
74185f3d38SThomas Gleixner	CFI_STARTPROC
75185f3d38SThomas Gleixner	GET_THREAD_INFO(%rax)
76185f3d38SThomas Gleixner	movq %rdi,%rcx
77185f3d38SThomas Gleixner	addq %rdx,%rcx
78185f3d38SThomas Gleixner	jc bad_to_user
7926ccb8a7SGlauber Costa	cmpq TI_addr_limit(%rax),%rcx
8026afb7c6SJiri Olsa	ja bad_to_user
814307bec9SFenghua Yu	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
824307bec9SFenghua Yu		copy_user_generic_unrolled,copy_user_generic_string,	\
834307bec9SFenghua Yu		copy_user_enhanced_fast_string
84185f3d38SThomas Gleixner	CFI_ENDPROC
853c93ca00SFrederic WeisbeckerENDPROC(_copy_to_user)
86185f3d38SThomas Gleixner
87185f3d38SThomas Gleixner/* Standard copy_from_user with segment limit checking */
889f0cf4adSArjan van de VenENTRY(_copy_from_user)
89185f3d38SThomas Gleixner	CFI_STARTPROC
90185f3d38SThomas Gleixner	GET_THREAD_INFO(%rax)
91185f3d38SThomas Gleixner	movq %rsi,%rcx
92185f3d38SThomas Gleixner	addq %rdx,%rcx
93185f3d38SThomas Gleixner	jc bad_from_user
9426ccb8a7SGlauber Costa	cmpq TI_addr_limit(%rax),%rcx
9526afb7c6SJiri Olsa	ja bad_from_user
964307bec9SFenghua Yu	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
974307bec9SFenghua Yu		copy_user_generic_unrolled,copy_user_generic_string,	\
984307bec9SFenghua Yu		copy_user_enhanced_fast_string
99185f3d38SThomas Gleixner	CFI_ENDPROC
1009f0cf4adSArjan van de VenENDPROC(_copy_from_user)
101185f3d38SThomas Gleixner
102185f3d38SThomas Gleixner	.section .fixup,"ax"
103185f3d38SThomas Gleixner	/* must zero dest */
104ad2fc2cdSVitaly MayatskikhENTRY(bad_from_user)
105185f3d38SThomas Gleixnerbad_from_user:
106185f3d38SThomas Gleixner	CFI_STARTPROC
107185f3d38SThomas Gleixner	movl %edx,%ecx
108185f3d38SThomas Gleixner	xorl %eax,%eax
109185f3d38SThomas Gleixner	rep
110185f3d38SThomas Gleixner	stosb
111185f3d38SThomas Gleixnerbad_to_user:
112185f3d38SThomas Gleixner	movl %edx,%eax
113185f3d38SThomas Gleixner	ret
114185f3d38SThomas Gleixner	CFI_ENDPROC
115ad2fc2cdSVitaly MayatskikhENDPROC(bad_from_user)
116185f3d38SThomas Gleixner	.previous
117185f3d38SThomas Gleixner
118185f3d38SThomas Gleixner/*
119185f3d38SThomas Gleixner * copy_user_generic_unrolled - memory copy with exception handling.
120ad2fc2cdSVitaly Mayatskikh * This version is for CPUs like P4 that don't have efficient micro
121ad2fc2cdSVitaly Mayatskikh * code for rep movsq
122185f3d38SThomas Gleixner *
123185f3d38SThomas Gleixner * Input:
124185f3d38SThomas Gleixner * rdi destination
125185f3d38SThomas Gleixner * rsi source
126185f3d38SThomas Gleixner * rdx count
127185f3d38SThomas Gleixner *
128185f3d38SThomas Gleixner * Output:
1290d2eb44fSLucas De Marchi * eax uncopied bytes or 0 if successful.
130ad2fc2cdSVitaly Mayatskikh */
131ad2fc2cdSVitaly MayatskikhENTRY(copy_user_generic_unrolled)
132ad2fc2cdSVitaly Mayatskikh	CFI_STARTPROC
133ad2fc2cdSVitaly Mayatskikh	cmpl $8,%edx
134ad2fc2cdSVitaly Mayatskikh	jb 20f		/* less then 8 bytes, go to byte copy loop */
135ad2fc2cdSVitaly Mayatskikh	ALIGN_DESTINATION
136ad2fc2cdSVitaly Mayatskikh	movl %edx,%ecx
137ad2fc2cdSVitaly Mayatskikh	andl $63,%edx
138ad2fc2cdSVitaly Mayatskikh	shrl $6,%ecx
139ad2fc2cdSVitaly Mayatskikh	jz 17f
140ad2fc2cdSVitaly Mayatskikh1:	movq (%rsi),%r8
141ad2fc2cdSVitaly Mayatskikh2:	movq 1*8(%rsi),%r9
142ad2fc2cdSVitaly Mayatskikh3:	movq 2*8(%rsi),%r10
143ad2fc2cdSVitaly Mayatskikh4:	movq 3*8(%rsi),%r11
144ad2fc2cdSVitaly Mayatskikh5:	movq %r8,(%rdi)
145ad2fc2cdSVitaly Mayatskikh6:	movq %r9,1*8(%rdi)
146ad2fc2cdSVitaly Mayatskikh7:	movq %r10,2*8(%rdi)
147ad2fc2cdSVitaly Mayatskikh8:	movq %r11,3*8(%rdi)
148ad2fc2cdSVitaly Mayatskikh9:	movq 4*8(%rsi),%r8
149ad2fc2cdSVitaly Mayatskikh10:	movq 5*8(%rsi),%r9
150ad2fc2cdSVitaly Mayatskikh11:	movq 6*8(%rsi),%r10
151ad2fc2cdSVitaly Mayatskikh12:	movq 7*8(%rsi),%r11
152ad2fc2cdSVitaly Mayatskikh13:	movq %r8,4*8(%rdi)
153ad2fc2cdSVitaly Mayatskikh14:	movq %r9,5*8(%rdi)
154ad2fc2cdSVitaly Mayatskikh15:	movq %r10,6*8(%rdi)
155ad2fc2cdSVitaly Mayatskikh16:	movq %r11,7*8(%rdi)
156ad2fc2cdSVitaly Mayatskikh	leaq 64(%rsi),%rsi
157ad2fc2cdSVitaly Mayatskikh	leaq 64(%rdi),%rdi
158ad2fc2cdSVitaly Mayatskikh	decl %ecx
159ad2fc2cdSVitaly Mayatskikh	jnz 1b
160ad2fc2cdSVitaly Mayatskikh17:	movl %edx,%ecx
161ad2fc2cdSVitaly Mayatskikh	andl $7,%edx
162ad2fc2cdSVitaly Mayatskikh	shrl $3,%ecx
163ad2fc2cdSVitaly Mayatskikh	jz 20f
164ad2fc2cdSVitaly Mayatskikh18:	movq (%rsi),%r8
165ad2fc2cdSVitaly Mayatskikh19:	movq %r8,(%rdi)
166ad2fc2cdSVitaly Mayatskikh	leaq 8(%rsi),%rsi
167ad2fc2cdSVitaly Mayatskikh	leaq 8(%rdi),%rdi
168ad2fc2cdSVitaly Mayatskikh	decl %ecx
169ad2fc2cdSVitaly Mayatskikh	jnz 18b
170ad2fc2cdSVitaly Mayatskikh20:	andl %edx,%edx
171ad2fc2cdSVitaly Mayatskikh	jz 23f
172ad2fc2cdSVitaly Mayatskikh	movl %edx,%ecx
173ad2fc2cdSVitaly Mayatskikh21:	movb (%rsi),%al
174ad2fc2cdSVitaly Mayatskikh22:	movb %al,(%rdi)
175ad2fc2cdSVitaly Mayatskikh	incq %rsi
176ad2fc2cdSVitaly Mayatskikh	incq %rdi
177ad2fc2cdSVitaly Mayatskikh	decl %ecx
178ad2fc2cdSVitaly Mayatskikh	jnz 21b
179ad2fc2cdSVitaly Mayatskikh23:	xor %eax,%eax
180ad2fc2cdSVitaly Mayatskikh	ret
181ad2fc2cdSVitaly Mayatskikh
182ad2fc2cdSVitaly Mayatskikh	.section .fixup,"ax"
183ad2fc2cdSVitaly Mayatskikh30:	shll $6,%ecx
184ad2fc2cdSVitaly Mayatskikh	addl %ecx,%edx
185ad2fc2cdSVitaly Mayatskikh	jmp 60f
18627cb0a75SJeremy Fitzhardinge40:	lea (%rdx,%rcx,8),%rdx
187ad2fc2cdSVitaly Mayatskikh	jmp 60f
188ad2fc2cdSVitaly Mayatskikh50:	movl %ecx,%edx
189ad2fc2cdSVitaly Mayatskikh60:	jmp copy_user_handle_tail /* ecx is zerorest also */
190ad2fc2cdSVitaly Mayatskikh	.previous
191ad2fc2cdSVitaly Mayatskikh
1929732da8cSH. Peter Anvin	_ASM_EXTABLE(1b,30b)
1939732da8cSH. Peter Anvin	_ASM_EXTABLE(2b,30b)
1949732da8cSH. Peter Anvin	_ASM_EXTABLE(3b,30b)
1959732da8cSH. Peter Anvin	_ASM_EXTABLE(4b,30b)
1969732da8cSH. Peter Anvin	_ASM_EXTABLE(5b,30b)
1979732da8cSH. Peter Anvin	_ASM_EXTABLE(6b,30b)
1989732da8cSH. Peter Anvin	_ASM_EXTABLE(7b,30b)
1999732da8cSH. Peter Anvin	_ASM_EXTABLE(8b,30b)
2009732da8cSH. Peter Anvin	_ASM_EXTABLE(9b,30b)
2019732da8cSH. Peter Anvin	_ASM_EXTABLE(10b,30b)
2029732da8cSH. Peter Anvin	_ASM_EXTABLE(11b,30b)
2039732da8cSH. Peter Anvin	_ASM_EXTABLE(12b,30b)
2049732da8cSH. Peter Anvin	_ASM_EXTABLE(13b,30b)
2059732da8cSH. Peter Anvin	_ASM_EXTABLE(14b,30b)
2069732da8cSH. Peter Anvin	_ASM_EXTABLE(15b,30b)
2079732da8cSH. Peter Anvin	_ASM_EXTABLE(16b,30b)
2089732da8cSH. Peter Anvin	_ASM_EXTABLE(18b,40b)
2099732da8cSH. Peter Anvin	_ASM_EXTABLE(19b,40b)
2109732da8cSH. Peter Anvin	_ASM_EXTABLE(21b,50b)
2119732da8cSH. Peter Anvin	_ASM_EXTABLE(22b,50b)
212ad2fc2cdSVitaly Mayatskikh	CFI_ENDPROC
213ad2fc2cdSVitaly MayatskikhENDPROC(copy_user_generic_unrolled)
214ad2fc2cdSVitaly Mayatskikh
215ad2fc2cdSVitaly Mayatskikh/* Some CPUs run faster using the string copy instructions.
216ad2fc2cdSVitaly Mayatskikh * This is also a lot simpler. Use them when possible.
217185f3d38SThomas Gleixner *
218185f3d38SThomas Gleixner * Only 4GB of copy is supported. This shouldn't be a problem
219185f3d38SThomas Gleixner * because the kernel normally only writes from/to page sized chunks
220185f3d38SThomas Gleixner * even if user space passed a longer buffer.
221185f3d38SThomas Gleixner * And more would be dangerous because both Intel and AMD have
222185f3d38SThomas Gleixner * errata with rep movsq > 4GB. If someone feels the need to fix
223185f3d38SThomas Gleixner * this please consider this.
224ad2fc2cdSVitaly Mayatskikh *
225ad2fc2cdSVitaly Mayatskikh * Input:
226ad2fc2cdSVitaly Mayatskikh * rdi destination
227ad2fc2cdSVitaly Mayatskikh * rsi source
228ad2fc2cdSVitaly Mayatskikh * rdx count
229ad2fc2cdSVitaly Mayatskikh *
230ad2fc2cdSVitaly Mayatskikh * Output:
231ad2fc2cdSVitaly Mayatskikh * eax uncopied bytes or 0 if successful.
232185f3d38SThomas Gleixner */
233185f3d38SThomas GleixnerENTRY(copy_user_generic_string)
234185f3d38SThomas Gleixner	CFI_STARTPROC
235ad2fc2cdSVitaly Mayatskikh	andl %edx,%edx
236ad2fc2cdSVitaly Mayatskikh	jz 4f
237ad2fc2cdSVitaly Mayatskikh	cmpl $8,%edx
238ad2fc2cdSVitaly Mayatskikh	jb 2f		/* less than 8 bytes, go to byte copy loop */
239ad2fc2cdSVitaly Mayatskikh	ALIGN_DESTINATION
240185f3d38SThomas Gleixner	movl %edx,%ecx
241185f3d38SThomas Gleixner	shrl $3,%ecx
242185f3d38SThomas Gleixner	andl $7,%edx
243185f3d38SThomas Gleixner1:	rep
244185f3d38SThomas Gleixner	movsq
245ad2fc2cdSVitaly Mayatskikh2:	movl %edx,%ecx
246ad2fc2cdSVitaly Mayatskikh3:	rep
247185f3d38SThomas Gleixner	movsb
248ad2fc2cdSVitaly Mayatskikh4:	xorl %eax,%eax
249185f3d38SThomas Gleixner	ret
250185f3d38SThomas Gleixner
251ad2fc2cdSVitaly Mayatskikh	.section .fixup,"ax"
25227cb0a75SJeremy Fitzhardinge11:	lea (%rdx,%rcx,8),%rcx
253ad2fc2cdSVitaly Mayatskikh12:	movl %ecx,%edx		/* ecx is zerorest also */
254ad2fc2cdSVitaly Mayatskikh	jmp copy_user_handle_tail
255ad2fc2cdSVitaly Mayatskikh	.previous
256185f3d38SThomas Gleixner
2579732da8cSH. Peter Anvin	_ASM_EXTABLE(1b,11b)
2589732da8cSH. Peter Anvin	_ASM_EXTABLE(3b,12b)
259ad2fc2cdSVitaly Mayatskikh	CFI_ENDPROC
260ad2fc2cdSVitaly MayatskikhENDPROC(copy_user_generic_string)
2614307bec9SFenghua Yu
2624307bec9SFenghua Yu/*
2634307bec9SFenghua Yu * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
2644307bec9SFenghua Yu * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
2654307bec9SFenghua Yu *
2664307bec9SFenghua Yu * Input:
2674307bec9SFenghua Yu * rdi destination
2684307bec9SFenghua Yu * rsi source
2694307bec9SFenghua Yu * rdx count
2704307bec9SFenghua Yu *
2714307bec9SFenghua Yu * Output:
2724307bec9SFenghua Yu * eax uncopied bytes or 0 if successful.
2734307bec9SFenghua Yu */
2744307bec9SFenghua YuENTRY(copy_user_enhanced_fast_string)
2754307bec9SFenghua Yu	CFI_STARTPROC
2764307bec9SFenghua Yu	andl %edx,%edx
2774307bec9SFenghua Yu	jz 2f
2784307bec9SFenghua Yu	movl %edx,%ecx
2794307bec9SFenghua Yu1:	rep
2804307bec9SFenghua Yu	movsb
2814307bec9SFenghua Yu2:	xorl %eax,%eax
2824307bec9SFenghua Yu	ret
2834307bec9SFenghua Yu
2844307bec9SFenghua Yu	.section .fixup,"ax"
2854307bec9SFenghua Yu12:	movl %ecx,%edx		/* ecx is zerorest also */
2864307bec9SFenghua Yu	jmp copy_user_handle_tail
2874307bec9SFenghua Yu	.previous
2884307bec9SFenghua Yu
2899732da8cSH. Peter Anvin	_ASM_EXTABLE(1b,12b)
2904307bec9SFenghua Yu	CFI_ENDPROC
2914307bec9SFenghua YuENDPROC(copy_user_enhanced_fast_string)
292