1/* 2 * Normally compiler builtins are used, but sometimes the compiler calls out 3 * of line code. Based on asm-i386/string.h. 4 * 5 * This assembly file is re-written from memmove_64.c file. 6 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> 7 */ 8#define _STRING_C 9#include <linux/linkage.h> 10#include <asm/dwarf2.h> 11#include <asm/cpufeature.h> 12#include <asm/alternative-asm.h> 13 14#undef memmove 15 16/* 17 * Implement memmove(). This can handle overlap between src and dst. 18 * 19 * Input: 20 * rdi: dest 21 * rsi: src 22 * rdx: count 23 * 24 * Output: 25 * rax: dest 26 */ 27ENTRY(memmove) 28 CFI_STARTPROC 29 30 /* Handle more 32 bytes in loop */ 31 mov %rdi, %rax 32 cmp $0x20, %rdx 33 jb 1f 34 35 /* Decide forward/backward copy mode */ 36 cmp %rdi, %rsi 37 jge .Lmemmove_begin_forward 38 mov %rsi, %r8 39 add %rdx, %r8 40 cmp %rdi, %r8 41 jg 2f 42 43.Lmemmove_begin_forward: 44 /* 45 * movsq instruction have many startup latency 46 * so we handle small size by general register. 47 */ 48 cmp $680, %rdx 49 jb 3f 50 /* 51 * movsq instruction is only good for aligned case. 52 */ 53 54 cmpb %dil, %sil 55 je 4f 563: 57 sub $0x20, %rdx 58 /* 59 * We gobble 32 bytes forward in each loop. 60 */ 615: 62 sub $0x20, %rdx 63 movq 0*8(%rsi), %r11 64 movq 1*8(%rsi), %r10 65 movq 2*8(%rsi), %r9 66 movq 3*8(%rsi), %r8 67 leaq 4*8(%rsi), %rsi 68 69 movq %r11, 0*8(%rdi) 70 movq %r10, 1*8(%rdi) 71 movq %r9, 2*8(%rdi) 72 movq %r8, 3*8(%rdi) 73 leaq 4*8(%rdi), %rdi 74 jae 5b 75 addq $0x20, %rdx 76 jmp 1f 77 /* 78 * Handle data forward by movsq. 79 */ 80 .p2align 4 814: 82 movq %rdx, %rcx 83 movq -8(%rsi, %rdx), %r11 84 lea -8(%rdi, %rdx), %r10 85 shrq $3, %rcx 86 rep movsq 87 movq %r11, (%r10) 88 jmp 13f 89.Lmemmove_end_forward: 90 91 /* 92 * Handle data backward by movsq. 93 */ 94 .p2align 4 957: 96 movq %rdx, %rcx 97 movq (%rsi), %r11 98 movq %rdi, %r10 99 leaq -8(%rsi, %rdx), %rsi 100 leaq -8(%rdi, %rdx), %rdi 101 shrq $3, %rcx 102 std 103 rep movsq 104 cld 105 movq %r11, (%r10) 106 jmp 13f 107 108 /* 109 * Start to prepare for backward copy. 110 */ 111 .p2align 4 1122: 113 cmp $680, %rdx 114 jb 6f 115 cmp %dil, %sil 116 je 7b 1176: 118 /* 119 * Calculate copy position to tail. 120 */ 121 addq %rdx, %rsi 122 addq %rdx, %rdi 123 subq $0x20, %rdx 124 /* 125 * We gobble 32 bytes backward in each loop. 126 */ 1278: 128 subq $0x20, %rdx 129 movq -1*8(%rsi), %r11 130 movq -2*8(%rsi), %r10 131 movq -3*8(%rsi), %r9 132 movq -4*8(%rsi), %r8 133 leaq -4*8(%rsi), %rsi 134 135 movq %r11, -1*8(%rdi) 136 movq %r10, -2*8(%rdi) 137 movq %r9, -3*8(%rdi) 138 movq %r8, -4*8(%rdi) 139 leaq -4*8(%rdi), %rdi 140 jae 8b 141 /* 142 * Calculate copy position to head. 143 */ 144 addq $0x20, %rdx 145 subq %rdx, %rsi 146 subq %rdx, %rdi 1471: 148 cmpq $16, %rdx 149 jb 9f 150 /* 151 * Move data from 16 bytes to 31 bytes. 152 */ 153 movq 0*8(%rsi), %r11 154 movq 1*8(%rsi), %r10 155 movq -2*8(%rsi, %rdx), %r9 156 movq -1*8(%rsi, %rdx), %r8 157 movq %r11, 0*8(%rdi) 158 movq %r10, 1*8(%rdi) 159 movq %r9, -2*8(%rdi, %rdx) 160 movq %r8, -1*8(%rdi, %rdx) 161 jmp 13f 162 .p2align 4 1639: 164 cmpq $8, %rdx 165 jb 10f 166 /* 167 * Move data from 8 bytes to 15 bytes. 168 */ 169 movq 0*8(%rsi), %r11 170 movq -1*8(%rsi, %rdx), %r10 171 movq %r11, 0*8(%rdi) 172 movq %r10, -1*8(%rdi, %rdx) 173 jmp 13f 17410: 175 cmpq $4, %rdx 176 jb 11f 177 /* 178 * Move data from 4 bytes to 7 bytes. 179 */ 180 movl (%rsi), %r11d 181 movl -4(%rsi, %rdx), %r10d 182 movl %r11d, (%rdi) 183 movl %r10d, -4(%rdi, %rdx) 184 jmp 13f 18511: 186 cmp $2, %rdx 187 jb 12f 188 /* 189 * Move data from 2 bytes to 3 bytes. 190 */ 191 movw (%rsi), %r11w 192 movw -2(%rsi, %rdx), %r10w 193 movw %r11w, (%rdi) 194 movw %r10w, -2(%rdi, %rdx) 195 jmp 13f 19612: 197 cmp $1, %rdx 198 jb 13f 199 /* 200 * Move data for 1 byte. 201 */ 202 movb (%rsi), %r11b 203 movb %r11b, (%rdi) 20413: 205 retq 206 CFI_ENDPROC 207 208 .section .altinstr_replacement,"ax" 209.Lmemmove_begin_forward_efs: 210 /* Forward moving data. */ 211 movq %rdx, %rcx 212 rep movsb 213 retq 214.Lmemmove_end_forward_efs: 215 .previous 216 217 .section .altinstructions,"a" 218 altinstruction_entry .Lmemmove_begin_forward, \ 219 .Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \ 220 .Lmemmove_end_forward-.Lmemmove_begin_forward, \ 221 .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs 222 .previous 223ENDPROC(memmove) 224