1457c8996SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 2185f3d38SThomas Gleixner#include <linux/linkage.h> 30db7058eSBorislav Petkov#include <asm/asm.h> 4784d5699SAl Viro#include <asm/export.h> 5185f3d38SThomas Gleixner 6185f3d38SThomas Gleixner/* 76620ef28SBorislav Petkov * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is 86620ef28SBorislav Petkov * recommended to use this when possible and we do use them by default. 96620ef28SBorislav Petkov * If enhanced REP MOVSB/STOSB is not available, try to use fast string. 106620ef28SBorislav Petkov * Otherwise, use original. 11185f3d38SThomas Gleixner */ 126620ef28SBorislav Petkov 136620ef28SBorislav Petkov/* 146620ef28SBorislav Petkov * Zero a page. 156620ef28SBorislav Petkov * %rdi - page 166620ef28SBorislav Petkov */ 176dcc5627SJiri SlabySYM_FUNC_START(clear_page_rep) 18185f3d38SThomas Gleixner movl $4096/8,%ecx 19185f3d38SThomas Gleixner xorl %eax,%eax 20185f3d38SThomas Gleixner rep stosq 21f94909ceSPeter Zijlstra RET 226dcc5627SJiri SlabySYM_FUNC_END(clear_page_rep) 23f25d3847SBorislav PetkovEXPORT_SYMBOL_GPL(clear_page_rep) 24185f3d38SThomas Gleixner 256dcc5627SJiri SlabySYM_FUNC_START(clear_page_orig) 26185f3d38SThomas Gleixner xorl %eax,%eax 27185f3d38SThomas Gleixner movl $4096/64,%ecx 28185f3d38SThomas Gleixner .p2align 4 29185f3d38SThomas Gleixner.Lloop: 30185f3d38SThomas Gleixner decl %ecx 31185f3d38SThomas Gleixner#define PUT(x) movq %rax,x*8(%rdi) 32185f3d38SThomas Gleixner movq %rax,(%rdi) 33185f3d38SThomas Gleixner PUT(1) 34185f3d38SThomas Gleixner PUT(2) 35185f3d38SThomas Gleixner PUT(3) 36185f3d38SThomas Gleixner PUT(4) 37185f3d38SThomas Gleixner PUT(5) 38185f3d38SThomas Gleixner PUT(6) 39185f3d38SThomas Gleixner PUT(7) 40185f3d38SThomas Gleixner leaq 64(%rdi),%rdi 41185f3d38SThomas Gleixner jnz .Lloop 42185f3d38SThomas Gleixner nop 43f94909ceSPeter Zijlstra RET 446dcc5627SJiri SlabySYM_FUNC_END(clear_page_orig) 45f25d3847SBorislav PetkovEXPORT_SYMBOL_GPL(clear_page_orig) 46185f3d38SThomas Gleixner 476dcc5627SJiri SlabySYM_FUNC_START(clear_page_erms) 486620ef28SBorislav Petkov movl $4096,%ecx 496620ef28SBorislav Petkov xorl %eax,%eax 506620ef28SBorislav Petkov rep stosb 51f94909ceSPeter Zijlstra RET 526dcc5627SJiri SlabySYM_FUNC_END(clear_page_erms) 53f25d3847SBorislav PetkovEXPORT_SYMBOL_GPL(clear_page_erms) 540db7058eSBorislav Petkov 550db7058eSBorislav Petkov/* 560db7058eSBorislav Petkov * Default clear user-space. 570db7058eSBorislav Petkov * Input: 580db7058eSBorislav Petkov * rdi destination 590db7058eSBorislav Petkov * rcx count 60*8c9b6a88SLinus Torvalds * rax is zero 610db7058eSBorislav Petkov * 620db7058eSBorislav Petkov * Output: 630db7058eSBorislav Petkov * rcx: uncleared bytes or 0 if successful. 640db7058eSBorislav Petkov */ 65*8c9b6a88SLinus TorvaldsSYM_FUNC_START(rep_stos_alternative) 66*8c9b6a88SLinus Torvalds cmpq $64,%rcx 67*8c9b6a88SLinus Torvalds jae .Lunrolled 680db7058eSBorislav Petkov 69*8c9b6a88SLinus Torvalds cmp $8,%ecx 70*8c9b6a88SLinus Torvalds jae .Lword 710db7058eSBorislav Petkov 72*8c9b6a88SLinus Torvalds testl %ecx,%ecx 73*8c9b6a88SLinus Torvalds je .Lexit 740db7058eSBorislav Petkov 75*8c9b6a88SLinus Torvalds.Lclear_user_tail: 76*8c9b6a88SLinus Torvalds0: movb %al,(%rdi) 770db7058eSBorislav Petkov inc %rdi 78*8c9b6a88SLinus Torvalds dec %rcx 79*8c9b6a88SLinus Torvalds jnz .Lclear_user_tail 800db7058eSBorislav Petkov.Lexit: 810db7058eSBorislav Petkov RET 820db7058eSBorislav Petkov 83*8c9b6a88SLinus Torvalds _ASM_EXTABLE_UA( 0b, .Lexit) 840db7058eSBorislav Petkov 85*8c9b6a88SLinus Torvalds.Lword: 86*8c9b6a88SLinus Torvalds1: movq %rax,(%rdi) 87*8c9b6a88SLinus Torvalds addq $8,%rdi 88*8c9b6a88SLinus Torvalds sub $8,%ecx 89*8c9b6a88SLinus Torvalds je .Lexit 90*8c9b6a88SLinus Torvalds cmp $8,%ecx 91*8c9b6a88SLinus Torvalds jae .Lword 92*8c9b6a88SLinus Torvalds jmp .Lclear_user_tail 930db7058eSBorislav Petkov 94*8c9b6a88SLinus Torvalds .p2align 4 95*8c9b6a88SLinus Torvalds.Lunrolled: 96*8c9b6a88SLinus Torvalds10: movq %rax,(%rdi) 97*8c9b6a88SLinus Torvalds11: movq %rax,8(%rdi) 98*8c9b6a88SLinus Torvalds12: movq %rax,16(%rdi) 99*8c9b6a88SLinus Torvalds13: movq %rax,24(%rdi) 100*8c9b6a88SLinus Torvalds14: movq %rax,32(%rdi) 101*8c9b6a88SLinus Torvalds15: movq %rax,40(%rdi) 102*8c9b6a88SLinus Torvalds16: movq %rax,48(%rdi) 103*8c9b6a88SLinus Torvalds17: movq %rax,56(%rdi) 104*8c9b6a88SLinus Torvalds addq $64,%rdi 105*8c9b6a88SLinus Torvalds subq $64,%rcx 106*8c9b6a88SLinus Torvalds cmpq $64,%rcx 107*8c9b6a88SLinus Torvalds jae .Lunrolled 108*8c9b6a88SLinus Torvalds cmpl $8,%ecx 109*8c9b6a88SLinus Torvalds jae .Lword 110*8c9b6a88SLinus Torvalds testl %ecx,%ecx 111*8c9b6a88SLinus Torvalds jne .Lclear_user_tail 112*8c9b6a88SLinus Torvalds RET 113*8c9b6a88SLinus Torvalds 114*8c9b6a88SLinus Torvalds /* 115*8c9b6a88SLinus Torvalds * If we take an exception on any of the 116*8c9b6a88SLinus Torvalds * word stores, we know that %rcx isn't zero, 117*8c9b6a88SLinus Torvalds * so we can just go to the tail clearing to 118*8c9b6a88SLinus Torvalds * get the exact count. 119*8c9b6a88SLinus Torvalds * 120*8c9b6a88SLinus Torvalds * The unrolled case might end up clearing 121*8c9b6a88SLinus Torvalds * some bytes twice. Don't care. 122*8c9b6a88SLinus Torvalds * 123*8c9b6a88SLinus Torvalds * We could use the value in %rdi to avoid 124*8c9b6a88SLinus Torvalds * a second fault on the exact count case, 125*8c9b6a88SLinus Torvalds * but do we really care? No. 126*8c9b6a88SLinus Torvalds * 127*8c9b6a88SLinus Torvalds * Finally, we could try to align %rdi at the 128*8c9b6a88SLinus Torvalds * top of the unrolling. But unaligned stores 129*8c9b6a88SLinus Torvalds * just aren't that common or expensive. 130*8c9b6a88SLinus Torvalds */ 131*8c9b6a88SLinus Torvalds _ASM_EXTABLE_UA( 1b, .Lclear_user_tail) 132*8c9b6a88SLinus Torvalds _ASM_EXTABLE_UA(10b, .Lclear_user_tail) 133*8c9b6a88SLinus Torvalds _ASM_EXTABLE_UA(11b, .Lclear_user_tail) 134*8c9b6a88SLinus Torvalds _ASM_EXTABLE_UA(12b, .Lclear_user_tail) 135*8c9b6a88SLinus Torvalds _ASM_EXTABLE_UA(13b, .Lclear_user_tail) 136*8c9b6a88SLinus Torvalds _ASM_EXTABLE_UA(14b, .Lclear_user_tail) 137*8c9b6a88SLinus Torvalds _ASM_EXTABLE_UA(15b, .Lclear_user_tail) 138*8c9b6a88SLinus Torvalds _ASM_EXTABLE_UA(16b, .Lclear_user_tail) 139*8c9b6a88SLinus Torvalds _ASM_EXTABLE_UA(17b, .Lclear_user_tail) 140*8c9b6a88SLinus TorvaldsSYM_FUNC_END(rep_stos_alternative) 141*8c9b6a88SLinus TorvaldsEXPORT_SYMBOL(rep_stos_alternative) 142