1/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ 2 3#include <linux/linkage.h> 4#include <asm/cpufeatures.h> 5#include <asm/alternative-asm.h> 6#include <asm/export.h> 7 8/* 9 * Some CPUs run faster using the string copy instructions (sane microcode). 10 * It is also a lot simpler. Use this when possible. But, don't use streaming 11 * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the 12 * prefetch distance based on SMP/UP. 13 */ 14 ALIGN 15ENTRY(copy_page) 16 ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD 17 movl $4096/8, %ecx 18 rep movsq 19 ret 20ENDPROC(copy_page) 21EXPORT_SYMBOL(copy_page) 22 23ENTRY(copy_page_regs) 24 subq $2*8, %rsp 25 movq %rbx, (%rsp) 26 movq %r12, 1*8(%rsp) 27 28 movl $(4096/64)-5, %ecx 29 .p2align 4 30.Loop64: 31 dec %rcx 32 movq 0x8*0(%rsi), %rax 33 movq 0x8*1(%rsi), %rbx 34 movq 0x8*2(%rsi), %rdx 35 movq 0x8*3(%rsi), %r8 36 movq 0x8*4(%rsi), %r9 37 movq 0x8*5(%rsi), %r10 38 movq 0x8*6(%rsi), %r11 39 movq 0x8*7(%rsi), %r12 40 41 prefetcht0 5*64(%rsi) 42 43 movq %rax, 0x8*0(%rdi) 44 movq %rbx, 0x8*1(%rdi) 45 movq %rdx, 0x8*2(%rdi) 46 movq %r8, 0x8*3(%rdi) 47 movq %r9, 0x8*4(%rdi) 48 movq %r10, 0x8*5(%rdi) 49 movq %r11, 0x8*6(%rdi) 50 movq %r12, 0x8*7(%rdi) 51 52 leaq 64 (%rsi), %rsi 53 leaq 64 (%rdi), %rdi 54 55 jnz .Loop64 56 57 movl $5, %ecx 58 .p2align 4 59.Loop2: 60 decl %ecx 61 62 movq 0x8*0(%rsi), %rax 63 movq 0x8*1(%rsi), %rbx 64 movq 0x8*2(%rsi), %rdx 65 movq 0x8*3(%rsi), %r8 66 movq 0x8*4(%rsi), %r9 67 movq 0x8*5(%rsi), %r10 68 movq 0x8*6(%rsi), %r11 69 movq 0x8*7(%rsi), %r12 70 71 movq %rax, 0x8*0(%rdi) 72 movq %rbx, 0x8*1(%rdi) 73 movq %rdx, 0x8*2(%rdi) 74 movq %r8, 0x8*3(%rdi) 75 movq %r9, 0x8*4(%rdi) 76 movq %r10, 0x8*5(%rdi) 77 movq %r11, 0x8*6(%rdi) 78 movq %r12, 0x8*7(%rdi) 79 80 leaq 64(%rdi), %rdi 81 leaq 64(%rsi), %rsi 82 jnz .Loop2 83 84 movq (%rsp), %rbx 85 movq 1*8(%rsp), %r12 86 addq $2*8, %rsp 87 ret 88ENDPROC(copy_page_regs) 89