1185f3d38SThomas Gleixner/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ 2185f3d38SThomas Gleixner 3185f3d38SThomas Gleixner#include <linux/linkage.h> 4cd4d09ecSBorislav Petkov#include <asm/cpufeatures.h> 559e97e4dSAndy Lutomirski#include <asm/alternative-asm.h> 6185f3d38SThomas Gleixner 7090a3f61SBorislav Petkov/* 8090a3f61SBorislav Petkov * Some CPUs run faster using the string copy instructions (sane microcode). 9090a3f61SBorislav Petkov * It is also a lot simpler. Use this when possible. But, don't use streaming 10090a3f61SBorislav Petkov * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the 11090a3f61SBorislav Petkov * prefetch distance based on SMP/UP. 12090a3f61SBorislav Petkov */ 13185f3d38SThomas Gleixner ALIGN 14090a3f61SBorislav PetkovENTRY(copy_page) 15090a3f61SBorislav Petkov ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD 16185f3d38SThomas Gleixner movl $4096/8, %ecx 17185f3d38SThomas Gleixner rep movsq 18185f3d38SThomas Gleixner ret 19090a3f61SBorislav PetkovENDPROC(copy_page) 20185f3d38SThomas Gleixner 21090a3f61SBorislav PetkovENTRY(copy_page_regs) 2242693290SJan Beulich subq $2*8, %rsp 23185f3d38SThomas Gleixner movq %rbx, (%rsp) 24185f3d38SThomas Gleixner movq %r12, 1*8(%rsp) 25185f3d38SThomas Gleixner 26185f3d38SThomas Gleixner movl $(4096/64)-5, %ecx 27185f3d38SThomas Gleixner .p2align 4 28185f3d38SThomas Gleixner.Loop64: 29185f3d38SThomas Gleixner dec %rcx 30269833bdSMa Ling movq 0x8*0(%rsi), %rax 31269833bdSMa Ling movq 0x8*1(%rsi), %rbx 32269833bdSMa Ling movq 0x8*2(%rsi), %rdx 33269833bdSMa Ling movq 0x8*3(%rsi), %r8 34269833bdSMa Ling movq 0x8*4(%rsi), %r9 35269833bdSMa Ling movq 0x8*5(%rsi), %r10 36269833bdSMa Ling movq 0x8*6(%rsi), %r11 37269833bdSMa Ling movq 0x8*7(%rsi), %r12 38185f3d38SThomas Gleixner 39185f3d38SThomas Gleixner prefetcht0 5*64(%rsi) 40185f3d38SThomas Gleixner 41269833bdSMa Ling movq %rax, 0x8*0(%rdi) 42269833bdSMa Ling movq %rbx, 0x8*1(%rdi) 43269833bdSMa Ling movq %rdx, 0x8*2(%rdi) 44269833bdSMa Ling movq %r8, 0x8*3(%rdi) 45269833bdSMa Ling movq %r9, 0x8*4(%rdi) 46269833bdSMa Ling movq %r10, 0x8*5(%rdi) 47269833bdSMa Ling movq %r11, 0x8*6(%rdi) 48269833bdSMa Ling movq %r12, 0x8*7(%rdi) 49185f3d38SThomas Gleixner 50185f3d38SThomas Gleixner leaq 64 (%rsi), %rsi 51185f3d38SThomas Gleixner leaq 64 (%rdi), %rdi 52185f3d38SThomas Gleixner 53185f3d38SThomas Gleixner jnz .Loop64 54185f3d38SThomas Gleixner 55185f3d38SThomas Gleixner movl $5, %ecx 56185f3d38SThomas Gleixner .p2align 4 57185f3d38SThomas Gleixner.Loop2: 58185f3d38SThomas Gleixner decl %ecx 59185f3d38SThomas Gleixner 60269833bdSMa Ling movq 0x8*0(%rsi), %rax 61269833bdSMa Ling movq 0x8*1(%rsi), %rbx 62269833bdSMa Ling movq 0x8*2(%rsi), %rdx 63269833bdSMa Ling movq 0x8*3(%rsi), %r8 64269833bdSMa Ling movq 0x8*4(%rsi), %r9 65269833bdSMa Ling movq 0x8*5(%rsi), %r10 66269833bdSMa Ling movq 0x8*6(%rsi), %r11 67269833bdSMa Ling movq 0x8*7(%rsi), %r12 68185f3d38SThomas Gleixner 69269833bdSMa Ling movq %rax, 0x8*0(%rdi) 70269833bdSMa Ling movq %rbx, 0x8*1(%rdi) 71269833bdSMa Ling movq %rdx, 0x8*2(%rdi) 72269833bdSMa Ling movq %r8, 0x8*3(%rdi) 73269833bdSMa Ling movq %r9, 0x8*4(%rdi) 74269833bdSMa Ling movq %r10, 0x8*5(%rdi) 75269833bdSMa Ling movq %r11, 0x8*6(%rdi) 76269833bdSMa Ling movq %r12, 0x8*7(%rdi) 77185f3d38SThomas Gleixner 78185f3d38SThomas Gleixner leaq 64(%rdi), %rdi 79185f3d38SThomas Gleixner leaq 64(%rsi), %rsi 80185f3d38SThomas Gleixner jnz .Loop2 81185f3d38SThomas Gleixner 82185f3d38SThomas Gleixner movq (%rsp), %rbx 83185f3d38SThomas Gleixner movq 1*8(%rsp), %r12 8442693290SJan Beulich addq $2*8, %rsp 85185f3d38SThomas Gleixner ret 86090a3f61SBorislav PetkovENDPROC(copy_page_regs) 87