1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */ 2185f3d38SThomas Gleixner/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ 3185f3d38SThomas Gleixner 4185f3d38SThomas Gleixner#include <linux/linkage.h> 5cd4d09ecSBorislav Petkov#include <asm/cpufeatures.h> 659e97e4dSAndy Lutomirski#include <asm/alternative-asm.h> 7784d5699SAl Viro#include <asm/export.h> 8185f3d38SThomas Gleixner 9090a3f61SBorislav Petkov/* 10090a3f61SBorislav Petkov * Some CPUs run faster using the string copy instructions (sane microcode). 11090a3f61SBorislav Petkov * It is also a lot simpler. Use this when possible. But, don't use streaming 12090a3f61SBorislav Petkov * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the 13090a3f61SBorislav Petkov * prefetch distance based on SMP/UP. 14090a3f61SBorislav Petkov */ 15185f3d38SThomas Gleixner ALIGN 16090a3f61SBorislav PetkovENTRY(copy_page) 17090a3f61SBorislav Petkov ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD 18185f3d38SThomas Gleixner movl $4096/8, %ecx 19185f3d38SThomas Gleixner rep movsq 20185f3d38SThomas Gleixner ret 21090a3f61SBorislav PetkovENDPROC(copy_page) 22784d5699SAl ViroEXPORT_SYMBOL(copy_page) 23185f3d38SThomas Gleixner 24090a3f61SBorislav PetkovENTRY(copy_page_regs) 2542693290SJan Beulich subq $2*8, %rsp 26185f3d38SThomas Gleixner movq %rbx, (%rsp) 27185f3d38SThomas Gleixner movq %r12, 1*8(%rsp) 28185f3d38SThomas Gleixner 29185f3d38SThomas Gleixner movl $(4096/64)-5, %ecx 30185f3d38SThomas Gleixner .p2align 4 31185f3d38SThomas Gleixner.Loop64: 32185f3d38SThomas Gleixner dec %rcx 33269833bdSMa Ling movq 0x8*0(%rsi), %rax 34269833bdSMa Ling movq 0x8*1(%rsi), %rbx 35269833bdSMa Ling movq 0x8*2(%rsi), %rdx 36269833bdSMa Ling movq 0x8*3(%rsi), %r8 37269833bdSMa Ling movq 0x8*4(%rsi), %r9 38269833bdSMa Ling movq 0x8*5(%rsi), %r10 39269833bdSMa Ling movq 0x8*6(%rsi), %r11 40269833bdSMa Ling movq 0x8*7(%rsi), %r12 41185f3d38SThomas Gleixner 42185f3d38SThomas Gleixner prefetcht0 5*64(%rsi) 43185f3d38SThomas Gleixner 44269833bdSMa Ling movq %rax, 0x8*0(%rdi) 45269833bdSMa Ling movq %rbx, 0x8*1(%rdi) 46269833bdSMa Ling movq %rdx, 0x8*2(%rdi) 47269833bdSMa Ling movq %r8, 0x8*3(%rdi) 48269833bdSMa Ling movq %r9, 0x8*4(%rdi) 49269833bdSMa Ling movq %r10, 0x8*5(%rdi) 50269833bdSMa Ling movq %r11, 0x8*6(%rdi) 51269833bdSMa Ling movq %r12, 0x8*7(%rdi) 52185f3d38SThomas Gleixner 53185f3d38SThomas Gleixner leaq 64 (%rsi), %rsi 54185f3d38SThomas Gleixner leaq 64 (%rdi), %rdi 55185f3d38SThomas Gleixner 56185f3d38SThomas Gleixner jnz .Loop64 57185f3d38SThomas Gleixner 58185f3d38SThomas Gleixner movl $5, %ecx 59185f3d38SThomas Gleixner .p2align 4 60185f3d38SThomas Gleixner.Loop2: 61185f3d38SThomas Gleixner decl %ecx 62185f3d38SThomas Gleixner 63269833bdSMa Ling movq 0x8*0(%rsi), %rax 64269833bdSMa Ling movq 0x8*1(%rsi), %rbx 65269833bdSMa Ling movq 0x8*2(%rsi), %rdx 66269833bdSMa Ling movq 0x8*3(%rsi), %r8 67269833bdSMa Ling movq 0x8*4(%rsi), %r9 68269833bdSMa Ling movq 0x8*5(%rsi), %r10 69269833bdSMa Ling movq 0x8*6(%rsi), %r11 70269833bdSMa Ling movq 0x8*7(%rsi), %r12 71185f3d38SThomas Gleixner 72269833bdSMa Ling movq %rax, 0x8*0(%rdi) 73269833bdSMa Ling movq %rbx, 0x8*1(%rdi) 74269833bdSMa Ling movq %rdx, 0x8*2(%rdi) 75269833bdSMa Ling movq %r8, 0x8*3(%rdi) 76269833bdSMa Ling movq %r9, 0x8*4(%rdi) 77269833bdSMa Ling movq %r10, 0x8*5(%rdi) 78269833bdSMa Ling movq %r11, 0x8*6(%rdi) 79269833bdSMa Ling movq %r12, 0x8*7(%rdi) 80185f3d38SThomas Gleixner 81185f3d38SThomas Gleixner leaq 64(%rdi), %rdi 82185f3d38SThomas Gleixner leaq 64(%rsi), %rsi 83185f3d38SThomas Gleixner jnz .Loop2 84185f3d38SThomas Gleixner 85185f3d38SThomas Gleixner movq (%rsp), %rbx 86185f3d38SThomas Gleixner movq 1*8(%rsp), %r12 8742693290SJan Beulich addq $2*8, %rsp 88185f3d38SThomas Gleixner ret 89090a3f61SBorislav PetkovENDPROC(copy_page_regs) 90