xref: /openbmc/linux/arch/x86/lib/clear_page_64.S (revision 1ac731c529cd4d6adbce134754b51ff7d822b145)
1457c8996SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
2185f3d38SThomas Gleixner#include <linux/linkage.h>
30db7058eSBorislav Petkov#include <asm/asm.h>
4784d5699SAl Viro#include <asm/export.h>
5185f3d38SThomas Gleixner
6185f3d38SThomas Gleixner/*
76620ef28SBorislav Petkov * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
86620ef28SBorislav Petkov * recommended to use this when possible and we do use them by default.
96620ef28SBorislav Petkov * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
106620ef28SBorislav Petkov * Otherwise, use original.
11185f3d38SThomas Gleixner */
126620ef28SBorislav Petkov
136620ef28SBorislav Petkov/*
146620ef28SBorislav Petkov * Zero a page.
156620ef28SBorislav Petkov * %rdi	- page
166620ef28SBorislav Petkov */
176dcc5627SJiri SlabySYM_FUNC_START(clear_page_rep)
18185f3d38SThomas Gleixner	movl $4096/8,%ecx
19185f3d38SThomas Gleixner	xorl %eax,%eax
20185f3d38SThomas Gleixner	rep stosq
21f94909ceSPeter Zijlstra	RET
226dcc5627SJiri SlabySYM_FUNC_END(clear_page_rep)
23f25d3847SBorislav PetkovEXPORT_SYMBOL_GPL(clear_page_rep)
24185f3d38SThomas Gleixner
256dcc5627SJiri SlabySYM_FUNC_START(clear_page_orig)
26185f3d38SThomas Gleixner	xorl   %eax,%eax
27185f3d38SThomas Gleixner	movl   $4096/64,%ecx
28185f3d38SThomas Gleixner	.p2align 4
29185f3d38SThomas Gleixner.Lloop:
30185f3d38SThomas Gleixner	decl	%ecx
31185f3d38SThomas Gleixner#define PUT(x) movq %rax,x*8(%rdi)
32185f3d38SThomas Gleixner	movq %rax,(%rdi)
33185f3d38SThomas Gleixner	PUT(1)
34185f3d38SThomas Gleixner	PUT(2)
35185f3d38SThomas Gleixner	PUT(3)
36185f3d38SThomas Gleixner	PUT(4)
37185f3d38SThomas Gleixner	PUT(5)
38185f3d38SThomas Gleixner	PUT(6)
39185f3d38SThomas Gleixner	PUT(7)
40185f3d38SThomas Gleixner	leaq	64(%rdi),%rdi
41185f3d38SThomas Gleixner	jnz	.Lloop
42185f3d38SThomas Gleixner	nop
43f94909ceSPeter Zijlstra	RET
446dcc5627SJiri SlabySYM_FUNC_END(clear_page_orig)
45f25d3847SBorislav PetkovEXPORT_SYMBOL_GPL(clear_page_orig)
46185f3d38SThomas Gleixner
476dcc5627SJiri SlabySYM_FUNC_START(clear_page_erms)
486620ef28SBorislav Petkov	movl $4096,%ecx
496620ef28SBorislav Petkov	xorl %eax,%eax
506620ef28SBorislav Petkov	rep stosb
51f94909ceSPeter Zijlstra	RET
526dcc5627SJiri SlabySYM_FUNC_END(clear_page_erms)
53f25d3847SBorislav PetkovEXPORT_SYMBOL_GPL(clear_page_erms)
540db7058eSBorislav Petkov
550db7058eSBorislav Petkov/*
560db7058eSBorislav Petkov * Default clear user-space.
570db7058eSBorislav Petkov * Input:
580db7058eSBorislav Petkov * rdi destination
590db7058eSBorislav Petkov * rcx count
60*8c9b6a88SLinus Torvalds * rax is zero
610db7058eSBorislav Petkov *
620db7058eSBorislav Petkov * Output:
630db7058eSBorislav Petkov * rcx: uncleared bytes or 0 if successful.
640db7058eSBorislav Petkov */
65*8c9b6a88SLinus TorvaldsSYM_FUNC_START(rep_stos_alternative)
66*8c9b6a88SLinus Torvalds	cmpq $64,%rcx
67*8c9b6a88SLinus Torvalds	jae .Lunrolled
680db7058eSBorislav Petkov
69*8c9b6a88SLinus Torvalds	cmp $8,%ecx
70*8c9b6a88SLinus Torvalds	jae .Lword
710db7058eSBorislav Petkov
72*8c9b6a88SLinus Torvalds	testl %ecx,%ecx
73*8c9b6a88SLinus Torvalds	je .Lexit
740db7058eSBorislav Petkov
75*8c9b6a88SLinus Torvalds.Lclear_user_tail:
76*8c9b6a88SLinus Torvalds0:	movb %al,(%rdi)
770db7058eSBorislav Petkov	inc %rdi
78*8c9b6a88SLinus Torvalds	dec %rcx
79*8c9b6a88SLinus Torvalds	jnz .Lclear_user_tail
800db7058eSBorislav Petkov.Lexit:
810db7058eSBorislav Petkov	RET
820db7058eSBorislav Petkov
83*8c9b6a88SLinus Torvalds	_ASM_EXTABLE_UA( 0b, .Lexit)
840db7058eSBorislav Petkov
85*8c9b6a88SLinus Torvalds.Lword:
86*8c9b6a88SLinus Torvalds1:	movq %rax,(%rdi)
87*8c9b6a88SLinus Torvalds	addq $8,%rdi
88*8c9b6a88SLinus Torvalds	sub $8,%ecx
89*8c9b6a88SLinus Torvalds	je .Lexit
90*8c9b6a88SLinus Torvalds	cmp $8,%ecx
91*8c9b6a88SLinus Torvalds	jae .Lword
92*8c9b6a88SLinus Torvalds	jmp .Lclear_user_tail
930db7058eSBorislav Petkov
94*8c9b6a88SLinus Torvalds	.p2align 4
95*8c9b6a88SLinus Torvalds.Lunrolled:
96*8c9b6a88SLinus Torvalds10:	movq %rax,(%rdi)
97*8c9b6a88SLinus Torvalds11:	movq %rax,8(%rdi)
98*8c9b6a88SLinus Torvalds12:	movq %rax,16(%rdi)
99*8c9b6a88SLinus Torvalds13:	movq %rax,24(%rdi)
100*8c9b6a88SLinus Torvalds14:	movq %rax,32(%rdi)
101*8c9b6a88SLinus Torvalds15:	movq %rax,40(%rdi)
102*8c9b6a88SLinus Torvalds16:	movq %rax,48(%rdi)
103*8c9b6a88SLinus Torvalds17:	movq %rax,56(%rdi)
104*8c9b6a88SLinus Torvalds	addq $64,%rdi
105*8c9b6a88SLinus Torvalds	subq $64,%rcx
106*8c9b6a88SLinus Torvalds	cmpq $64,%rcx
107*8c9b6a88SLinus Torvalds	jae .Lunrolled
108*8c9b6a88SLinus Torvalds	cmpl $8,%ecx
109*8c9b6a88SLinus Torvalds	jae .Lword
110*8c9b6a88SLinus Torvalds	testl %ecx,%ecx
111*8c9b6a88SLinus Torvalds	jne .Lclear_user_tail
112*8c9b6a88SLinus Torvalds	RET
113*8c9b6a88SLinus Torvalds
114*8c9b6a88SLinus Torvalds	/*
115*8c9b6a88SLinus Torvalds	 * If we take an exception on any of the
116*8c9b6a88SLinus Torvalds	 * word stores, we know that %rcx isn't zero,
117*8c9b6a88SLinus Torvalds	 * so we can just go to the tail clearing to
118*8c9b6a88SLinus Torvalds	 * get the exact count.
119*8c9b6a88SLinus Torvalds	 *
120*8c9b6a88SLinus Torvalds	 * The unrolled case might end up clearing
121*8c9b6a88SLinus Torvalds	 * some bytes twice. Don't care.
122*8c9b6a88SLinus Torvalds	 *
123*8c9b6a88SLinus Torvalds	 * We could use the value in %rdi to avoid
124*8c9b6a88SLinus Torvalds	 * a second fault on the exact count case,
125*8c9b6a88SLinus Torvalds	 * but do we really care? No.
126*8c9b6a88SLinus Torvalds	 *
127*8c9b6a88SLinus Torvalds	 * Finally, we could try to align %rdi at the
128*8c9b6a88SLinus Torvalds	 * top of the unrolling. But unaligned stores
129*8c9b6a88SLinus Torvalds	 * just aren't that common or expensive.
130*8c9b6a88SLinus Torvalds	 */
131*8c9b6a88SLinus Torvalds	_ASM_EXTABLE_UA( 1b, .Lclear_user_tail)
132*8c9b6a88SLinus Torvalds	_ASM_EXTABLE_UA(10b, .Lclear_user_tail)
133*8c9b6a88SLinus Torvalds	_ASM_EXTABLE_UA(11b, .Lclear_user_tail)
134*8c9b6a88SLinus Torvalds	_ASM_EXTABLE_UA(12b, .Lclear_user_tail)
135*8c9b6a88SLinus Torvalds	_ASM_EXTABLE_UA(13b, .Lclear_user_tail)
136*8c9b6a88SLinus Torvalds	_ASM_EXTABLE_UA(14b, .Lclear_user_tail)
137*8c9b6a88SLinus Torvalds	_ASM_EXTABLE_UA(15b, .Lclear_user_tail)
138*8c9b6a88SLinus Torvalds	_ASM_EXTABLE_UA(16b, .Lclear_user_tail)
139*8c9b6a88SLinus Torvalds	_ASM_EXTABLE_UA(17b, .Lclear_user_tail)
140*8c9b6a88SLinus TorvaldsSYM_FUNC_END(rep_stos_alternative)
141*8c9b6a88SLinus TorvaldsEXPORT_SYMBOL(rep_stos_alternative)
142