1/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ 2 3#include <linux/linkage.h> 4#include <asm/dwarf2.h> 5#include <asm/alternative-asm.h> 6 7 ALIGN 8copy_page_rep: 9 CFI_STARTPROC 10 movl $4096/8, %ecx 11 rep movsq 12 ret 13 CFI_ENDPROC 14ENDPROC(copy_page_rep) 15 16/* 17 * Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD. 18 * Could vary the prefetch distance based on SMP/UP. 19*/ 20 21ENTRY(copy_page) 22 CFI_STARTPROC 23 subq $2*8, %rsp 24 CFI_ADJUST_CFA_OFFSET 2*8 25 movq %rbx, (%rsp) 26 CFI_REL_OFFSET rbx, 0 27 movq %r12, 1*8(%rsp) 28 CFI_REL_OFFSET r12, 1*8 29 30 movl $(4096/64)-5, %ecx 31 .p2align 4 32.Loop64: 33 dec %rcx 34 movq 0x8*0(%rsi), %rax 35 movq 0x8*1(%rsi), %rbx 36 movq 0x8*2(%rsi), %rdx 37 movq 0x8*3(%rsi), %r8 38 movq 0x8*4(%rsi), %r9 39 movq 0x8*5(%rsi), %r10 40 movq 0x8*6(%rsi), %r11 41 movq 0x8*7(%rsi), %r12 42 43 prefetcht0 5*64(%rsi) 44 45 movq %rax, 0x8*0(%rdi) 46 movq %rbx, 0x8*1(%rdi) 47 movq %rdx, 0x8*2(%rdi) 48 movq %r8, 0x8*3(%rdi) 49 movq %r9, 0x8*4(%rdi) 50 movq %r10, 0x8*5(%rdi) 51 movq %r11, 0x8*6(%rdi) 52 movq %r12, 0x8*7(%rdi) 53 54 leaq 64 (%rsi), %rsi 55 leaq 64 (%rdi), %rdi 56 57 jnz .Loop64 58 59 movl $5, %ecx 60 .p2align 4 61.Loop2: 62 decl %ecx 63 64 movq 0x8*0(%rsi), %rax 65 movq 0x8*1(%rsi), %rbx 66 movq 0x8*2(%rsi), %rdx 67 movq 0x8*3(%rsi), %r8 68 movq 0x8*4(%rsi), %r9 69 movq 0x8*5(%rsi), %r10 70 movq 0x8*6(%rsi), %r11 71 movq 0x8*7(%rsi), %r12 72 73 movq %rax, 0x8*0(%rdi) 74 movq %rbx, 0x8*1(%rdi) 75 movq %rdx, 0x8*2(%rdi) 76 movq %r8, 0x8*3(%rdi) 77 movq %r9, 0x8*4(%rdi) 78 movq %r10, 0x8*5(%rdi) 79 movq %r11, 0x8*6(%rdi) 80 movq %r12, 0x8*7(%rdi) 81 82 leaq 64(%rdi), %rdi 83 leaq 64(%rsi), %rsi 84 jnz .Loop2 85 86 movq (%rsp), %rbx 87 CFI_RESTORE rbx 88 movq 1*8(%rsp), %r12 89 CFI_RESTORE r12 90 addq $2*8, %rsp 91 CFI_ADJUST_CFA_OFFSET -2*8 92 ret 93.Lcopy_page_end: 94 CFI_ENDPROC 95ENDPROC(copy_page) 96 97 /* Some CPUs run faster using the string copy instructions. 98 It is also a lot simpler. Use this when possible */ 99 100#include <asm/cpufeature.h> 101 102 .section .altinstr_replacement,"ax" 1031: .byte 0xeb /* jmp <disp8> */ 104 .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */ 1052: 106 .previous 107 .section .altinstructions,"a" 108 altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \ 109 .Lcopy_page_end-copy_page, 2b-1b 110 .previous 111