1/* Copyright 2002 Andi Kleen */ 2 3#include <linux/linkage.h> 4#include <asm/dwarf2.h> 5#include <asm/cpufeature.h> 6 7/* 8 * memcpy - Copy a memory block. 9 * 10 * Input: 11 * rdi destination 12 * rsi source 13 * rdx count 14 * 15 * Output: 16 * rax original destination 17 */ 18 19 ALIGN 20memcpy_c: 21 CFI_STARTPROC 22 movq %rdi,%rax 23 movl %edx,%ecx 24 shrl $3,%ecx 25 andl $7,%edx 26 rep movsq 27 movl %edx,%ecx 28 rep movsb 29 ret 30 CFI_ENDPROC 31ENDPROC(memcpy_c) 32 33ENTRY(__memcpy) 34ENTRY(memcpy) 35 CFI_STARTPROC 36 pushq %rbx 37 CFI_ADJUST_CFA_OFFSET 8 38 CFI_REL_OFFSET rbx, 0 39 movq %rdi,%rax 40 41 movl %edx,%ecx 42 shrl $6,%ecx 43 jz .Lhandle_tail 44 45 .p2align 4 46.Lloop_64: 47 decl %ecx 48 49 movq (%rsi),%r11 50 movq 8(%rsi),%r8 51 52 movq %r11,(%rdi) 53 movq %r8,1*8(%rdi) 54 55 movq 2*8(%rsi),%r9 56 movq 3*8(%rsi),%r10 57 58 movq %r9,2*8(%rdi) 59 movq %r10,3*8(%rdi) 60 61 movq 4*8(%rsi),%r11 62 movq 5*8(%rsi),%r8 63 64 movq %r11,4*8(%rdi) 65 movq %r8,5*8(%rdi) 66 67 movq 6*8(%rsi),%r9 68 movq 7*8(%rsi),%r10 69 70 movq %r9,6*8(%rdi) 71 movq %r10,7*8(%rdi) 72 73 leaq 64(%rsi),%rsi 74 leaq 64(%rdi),%rdi 75 jnz .Lloop_64 76 77.Lhandle_tail: 78 movl %edx,%ecx 79 andl $63,%ecx 80 shrl $3,%ecx 81 jz .Lhandle_7 82 .p2align 4 83.Lloop_8: 84 decl %ecx 85 movq (%rsi),%r8 86 movq %r8,(%rdi) 87 leaq 8(%rdi),%rdi 88 leaq 8(%rsi),%rsi 89 jnz .Lloop_8 90 91.Lhandle_7: 92 movl %edx,%ecx 93 andl $7,%ecx 94 jz .Lende 95 .p2align 4 96.Lloop_1: 97 movb (%rsi),%r8b 98 movb %r8b,(%rdi) 99 incq %rdi 100 incq %rsi 101 decl %ecx 102 jnz .Lloop_1 103 104.Lende: 105 popq %rbx 106 CFI_ADJUST_CFA_OFFSET -8 107 CFI_RESTORE rbx 108 ret 109.Lfinal: 110 CFI_ENDPROC 111ENDPROC(memcpy) 112ENDPROC(__memcpy) 113 114 /* Some CPUs run faster using the string copy instructions. 115 It is also a lot simpler. Use this when possible */ 116 117 .section .altinstr_replacement,"ax" 1181: .byte 0xeb /* jmp <disp8> */ 119 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ 1202: 121 .previous 122 .section .altinstructions,"a" 123 .align 8 124 .quad memcpy 125 .quad 1b 126 .byte X86_FEATURE_REP_GOOD 127 /* Replace only beginning, memcpy is used to apply alternatives, so it 128 * is silly to overwrite itself with nops - reboot is only outcome... */ 129 .byte 2b - 1b 130 .byte 2b - 1b 131 .previous 132