1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Normally compiler builtins are used, but sometimes the compiler calls out 4 * of line code. Based on asm-i386/string.h. 5 * 6 * This assembly file is re-written from memmove_64.c file. 7 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> 8 */ 9#include <linux/linkage.h> 10#include <asm/cpufeatures.h> 11#include <asm/alternative.h> 12#include <asm/export.h> 13 14#undef memmove 15 16/* 17 * Implement memmove(). This can handle overlap between src and dst. 18 * 19 * Input: 20 * rdi: dest 21 * rsi: src 22 * rdx: count 23 * 24 * Output: 25 * rax: dest 26 */ 27SYM_FUNC_START(__memmove) 28 29 mov %rdi, %rax 30 31 /* Decide forward/backward copy mode */ 32 cmp %rdi, %rsi 33 jge .Lmemmove_begin_forward 34 mov %rsi, %r8 35 add %rdx, %r8 36 cmp %rdi, %r8 37 jg 2f 38 39 /* FSRM implies ERMS => no length checks, do the copy directly */ 40.Lmemmove_begin_forward: 41 ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM 42 ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS 43 44 /* 45 * movsq instruction have many startup latency 46 * so we handle small size by general register. 47 */ 48 cmp $680, %rdx 49 jb 3f 50 /* 51 * movsq instruction is only good for aligned case. 52 */ 53 54 cmpb %dil, %sil 55 je 4f 563: 57 sub $0x20, %rdx 58 /* 59 * We gobble 32 bytes forward in each loop. 60 */ 615: 62 sub $0x20, %rdx 63 movq 0*8(%rsi), %r11 64 movq 1*8(%rsi), %r10 65 movq 2*8(%rsi), %r9 66 movq 3*8(%rsi), %r8 67 leaq 4*8(%rsi), %rsi 68 69 movq %r11, 0*8(%rdi) 70 movq %r10, 1*8(%rdi) 71 movq %r9, 2*8(%rdi) 72 movq %r8, 3*8(%rdi) 73 leaq 4*8(%rdi), %rdi 74 jae 5b 75 addq $0x20, %rdx 76 jmp 1f 77 /* 78 * Handle data forward by movsq. 79 */ 80 .p2align 4 814: 82 movq %rdx, %rcx 83 movq -8(%rsi, %rdx), %r11 84 lea -8(%rdi, %rdx), %r10 85 shrq $3, %rcx 86 rep movsq 87 movq %r11, (%r10) 88 jmp 13f 89.Lmemmove_end_forward: 90 91 /* 92 * Handle data backward by movsq. 93 */ 94 .p2align 4 957: 96 movq %rdx, %rcx 97 movq (%rsi), %r11 98 movq %rdi, %r10 99 leaq -8(%rsi, %rdx), %rsi 100 leaq -8(%rdi, %rdx), %rdi 101 shrq $3, %rcx 102 std 103 rep movsq 104 cld 105 movq %r11, (%r10) 106 jmp 13f 107 108 /* 109 * Start to prepare for backward copy. 110 */ 111 .p2align 4 1122: 113 cmp $0x20, %rdx 114 jb 1f 115 cmp $680, %rdx 116 jb 6f 117 cmp %dil, %sil 118 je 7b 1196: 120 /* 121 * Calculate copy position to tail. 122 */ 123 addq %rdx, %rsi 124 addq %rdx, %rdi 125 subq $0x20, %rdx 126 /* 127 * We gobble 32 bytes backward in each loop. 128 */ 1298: 130 subq $0x20, %rdx 131 movq -1*8(%rsi), %r11 132 movq -2*8(%rsi), %r10 133 movq -3*8(%rsi), %r9 134 movq -4*8(%rsi), %r8 135 leaq -4*8(%rsi), %rsi 136 137 movq %r11, -1*8(%rdi) 138 movq %r10, -2*8(%rdi) 139 movq %r9, -3*8(%rdi) 140 movq %r8, -4*8(%rdi) 141 leaq -4*8(%rdi), %rdi 142 jae 8b 143 /* 144 * Calculate copy position to head. 145 */ 146 addq $0x20, %rdx 147 subq %rdx, %rsi 148 subq %rdx, %rdi 1491: 150 cmpq $16, %rdx 151 jb 9f 152 /* 153 * Move data from 16 bytes to 31 bytes. 154 */ 155 movq 0*8(%rsi), %r11 156 movq 1*8(%rsi), %r10 157 movq -2*8(%rsi, %rdx), %r9 158 movq -1*8(%rsi, %rdx), %r8 159 movq %r11, 0*8(%rdi) 160 movq %r10, 1*8(%rdi) 161 movq %r9, -2*8(%rdi, %rdx) 162 movq %r8, -1*8(%rdi, %rdx) 163 jmp 13f 164 .p2align 4 1659: 166 cmpq $8, %rdx 167 jb 10f 168 /* 169 * Move data from 8 bytes to 15 bytes. 170 */ 171 movq 0*8(%rsi), %r11 172 movq -1*8(%rsi, %rdx), %r10 173 movq %r11, 0*8(%rdi) 174 movq %r10, -1*8(%rdi, %rdx) 175 jmp 13f 17610: 177 cmpq $4, %rdx 178 jb 11f 179 /* 180 * Move data from 4 bytes to 7 bytes. 181 */ 182 movl (%rsi), %r11d 183 movl -4(%rsi, %rdx), %r10d 184 movl %r11d, (%rdi) 185 movl %r10d, -4(%rdi, %rdx) 186 jmp 13f 18711: 188 cmp $2, %rdx 189 jb 12f 190 /* 191 * Move data from 2 bytes to 3 bytes. 192 */ 193 movw (%rsi), %r11w 194 movw -2(%rsi, %rdx), %r10w 195 movw %r11w, (%rdi) 196 movw %r10w, -2(%rdi, %rdx) 197 jmp 13f 19812: 199 cmp $1, %rdx 200 jb 13f 201 /* 202 * Move data for 1 byte. 203 */ 204 movb (%rsi), %r11b 205 movb %r11b, (%rdi) 20613: 207 RET 208SYM_FUNC_END(__memmove) 209EXPORT_SYMBOL(__memmove) 210 211SYM_FUNC_ALIAS_WEAK(memmove, __memmove) 212EXPORT_SYMBOL(memmove) 213