1/* 2 * Normally compiler builtins are used, but sometimes the compiler calls out 3 * of line code. Based on asm-i386/string.h. 4 * 5 * This assembly file is re-written from memmove_64.c file. 6 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> 7 */ 8#define _STRING_C 9#include <linux/linkage.h> 10#include <asm/dwarf2.h> 11 12#undef memmove 13 14/* 15 * Implement memmove(). This can handle overlap between src and dst. 16 * 17 * Input: 18 * rdi: dest 19 * rsi: src 20 * rdx: count 21 * 22 * Output: 23 * rax: dest 24 */ 25ENTRY(memmove) 26 CFI_STARTPROC 27 /* Handle more 32bytes in loop */ 28 mov %rdi, %rax 29 cmp $0x20, %rdx 30 jb 1f 31 32 /* Decide forward/backward copy mode */ 33 cmp %rdi, %rsi 34 jb 2f 35 36 /* 37 * movsq instruction have many startup latency 38 * so we handle small size by general register. 39 */ 40 cmp $680, %rdx 41 jb 3f 42 /* 43 * movsq instruction is only good for aligned case. 44 */ 45 46 cmpb %dil, %sil 47 je 4f 483: 49 sub $0x20, %rdx 50 /* 51 * We gobble 32byts forward in each loop. 52 */ 535: 54 sub $0x20, %rdx 55 movq 0*8(%rsi), %r11 56 movq 1*8(%rsi), %r10 57 movq 2*8(%rsi), %r9 58 movq 3*8(%rsi), %r8 59 leaq 4*8(%rsi), %rsi 60 61 movq %r11, 0*8(%rdi) 62 movq %r10, 1*8(%rdi) 63 movq %r9, 2*8(%rdi) 64 movq %r8, 3*8(%rdi) 65 leaq 4*8(%rdi), %rdi 66 jae 5b 67 addq $0x20, %rdx 68 jmp 1f 69 /* 70 * Handle data forward by movsq. 71 */ 72 .p2align 4 734: 74 movq %rdx, %rcx 75 movq -8(%rsi, %rdx), %r11 76 lea -8(%rdi, %rdx), %r10 77 shrq $3, %rcx 78 rep movsq 79 movq %r11, (%r10) 80 jmp 13f 81 /* 82 * Handle data backward by movsq. 83 */ 84 .p2align 4 857: 86 movq %rdx, %rcx 87 movq (%rsi), %r11 88 movq %rdi, %r10 89 leaq -8(%rsi, %rdx), %rsi 90 leaq -8(%rdi, %rdx), %rdi 91 shrq $3, %rcx 92 std 93 rep movsq 94 cld 95 movq %r11, (%r10) 96 jmp 13f 97 98 /* 99 * Start to prepare for backward copy. 100 */ 101 .p2align 4 1022: 103 cmp $680, %rdx 104 jb 6f 105 cmp %dil, %sil 106 je 7b 1076: 108 /* 109 * Calculate copy position to tail. 110 */ 111 addq %rdx, %rsi 112 addq %rdx, %rdi 113 subq $0x20, %rdx 114 /* 115 * We gobble 32byts backward in each loop. 116 */ 1178: 118 subq $0x20, %rdx 119 movq -1*8(%rsi), %r11 120 movq -2*8(%rsi), %r10 121 movq -3*8(%rsi), %r9 122 movq -4*8(%rsi), %r8 123 leaq -4*8(%rsi), %rsi 124 125 movq %r11, -1*8(%rdi) 126 movq %r10, -2*8(%rdi) 127 movq %r9, -3*8(%rdi) 128 movq %r8, -4*8(%rdi) 129 leaq -4*8(%rdi), %rdi 130 jae 8b 131 /* 132 * Calculate copy position to head. 133 */ 134 addq $0x20, %rdx 135 subq %rdx, %rsi 136 subq %rdx, %rdi 1371: 138 cmpq $16, %rdx 139 jb 9f 140 /* 141 * Move data from 16 bytes to 31 bytes. 142 */ 143 movq 0*8(%rsi), %r11 144 movq 1*8(%rsi), %r10 145 movq -2*8(%rsi, %rdx), %r9 146 movq -1*8(%rsi, %rdx), %r8 147 movq %r11, 0*8(%rdi) 148 movq %r10, 1*8(%rdi) 149 movq %r9, -2*8(%rdi, %rdx) 150 movq %r8, -1*8(%rdi, %rdx) 151 jmp 13f 152 .p2align 4 1539: 154 cmpq $8, %rdx 155 jb 10f 156 /* 157 * Move data from 8 bytes to 15 bytes. 158 */ 159 movq 0*8(%rsi), %r11 160 movq -1*8(%rsi, %rdx), %r10 161 movq %r11, 0*8(%rdi) 162 movq %r10, -1*8(%rdi, %rdx) 163 jmp 13f 16410: 165 cmpq $4, %rdx 166 jb 11f 167 /* 168 * Move data from 4 bytes to 7 bytes. 169 */ 170 movl (%rsi), %r11d 171 movl -4(%rsi, %rdx), %r10d 172 movl %r11d, (%rdi) 173 movl %r10d, -4(%rdi, %rdx) 174 jmp 13f 17511: 176 cmp $2, %rdx 177 jb 12f 178 /* 179 * Move data from 2 bytes to 3 bytes. 180 */ 181 movw (%rsi), %r11w 182 movw -2(%rsi, %rdx), %r10w 183 movw %r11w, (%rdi) 184 movw %r10w, -2(%rdi, %rdx) 185 jmp 13f 18612: 187 cmp $1, %rdx 188 jb 13f 189 /* 190 * Move data for 1 byte. 191 */ 192 movb (%rsi), %r11b 193 movb %r11b, (%rdi) 19413: 195 retq 196 CFI_ENDPROC 197ENDPROC(memmove) 198