1/* SPDX-License-Identifier: GPL-2.0 */ 2/* Copyright 2002 Andi Kleen */ 3 4#include <linux/linkage.h> 5#include <asm/errno.h> 6#include <asm/cpufeatures.h> 7#include <asm/alternative-asm.h> 8 9/* 10 * We build a jump to memcpy_orig by default which gets NOPped out on 11 * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which 12 * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs 13 * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. 14 */ 15 16.weak memcpy 17 18/* 19 * memcpy - Copy a memory block. 20 * 21 * Input: 22 * rdi destination 23 * rsi source 24 * rdx count 25 * 26 * Output: 27 * rax original destination 28 */ 29ENTRY(__memcpy) 30ENTRY(memcpy) 31 ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ 32 "jmp memcpy_erms", X86_FEATURE_ERMS 33 34 movq %rdi, %rax 35 movq %rdx, %rcx 36 shrq $3, %rcx 37 andl $7, %edx 38 rep movsq 39 movl %edx, %ecx 40 rep movsb 41 ret 42ENDPROC(memcpy) 43ENDPROC(__memcpy) 44 45/* 46 * memcpy_erms() - enhanced fast string memcpy. This is faster and 47 * simpler than memcpy. Use memcpy_erms when possible. 48 */ 49ENTRY(memcpy_erms) 50 movq %rdi, %rax 51 movq %rdx, %rcx 52 rep movsb 53 ret 54ENDPROC(memcpy_erms) 55 56ENTRY(memcpy_orig) 57 movq %rdi, %rax 58 59 cmpq $0x20, %rdx 60 jb .Lhandle_tail 61 62 /* 63 * We check whether memory false dependence could occur, 64 * then jump to corresponding copy mode. 65 */ 66 cmp %dil, %sil 67 jl .Lcopy_backward 68 subq $0x20, %rdx 69.Lcopy_forward_loop: 70 subq $0x20, %rdx 71 72 /* 73 * Move in blocks of 4x8 bytes: 74 */ 75 movq 0*8(%rsi), %r8 76 movq 1*8(%rsi), %r9 77 movq 2*8(%rsi), %r10 78 movq 3*8(%rsi), %r11 79 leaq 4*8(%rsi), %rsi 80 81 movq %r8, 0*8(%rdi) 82 movq %r9, 1*8(%rdi) 83 movq %r10, 2*8(%rdi) 84 movq %r11, 3*8(%rdi) 85 leaq 4*8(%rdi), %rdi 86 jae .Lcopy_forward_loop 87 addl $0x20, %edx 88 jmp .Lhandle_tail 89 90.Lcopy_backward: 91 /* 92 * Calculate copy position to tail. 93 */ 94 addq %rdx, %rsi 95 addq %rdx, %rdi 96 subq $0x20, %rdx 97 /* 98 * At most 3 ALU operations in one cycle, 99 * so append NOPS in the same 16 bytes trunk. 100 */ 101 .p2align 4 102.Lcopy_backward_loop: 103 subq $0x20, %rdx 104 movq -1*8(%rsi), %r8 105 movq -2*8(%rsi), %r9 106 movq -3*8(%rsi), %r10 107 movq -4*8(%rsi), %r11 108 leaq -4*8(%rsi), %rsi 109 movq %r8, -1*8(%rdi) 110 movq %r9, -2*8(%rdi) 111 movq %r10, -3*8(%rdi) 112 movq %r11, -4*8(%rdi) 113 leaq -4*8(%rdi), %rdi 114 jae .Lcopy_backward_loop 115 116 /* 117 * Calculate copy position to head. 118 */ 119 addl $0x20, %edx 120 subq %rdx, %rsi 121 subq %rdx, %rdi 122.Lhandle_tail: 123 cmpl $16, %edx 124 jb .Lless_16bytes 125 126 /* 127 * Move data from 16 bytes to 31 bytes. 128 */ 129 movq 0*8(%rsi), %r8 130 movq 1*8(%rsi), %r9 131 movq -2*8(%rsi, %rdx), %r10 132 movq -1*8(%rsi, %rdx), %r11 133 movq %r8, 0*8(%rdi) 134 movq %r9, 1*8(%rdi) 135 movq %r10, -2*8(%rdi, %rdx) 136 movq %r11, -1*8(%rdi, %rdx) 137 retq 138 .p2align 4 139.Lless_16bytes: 140 cmpl $8, %edx 141 jb .Lless_8bytes 142 /* 143 * Move data from 8 bytes to 15 bytes. 144 */ 145 movq 0*8(%rsi), %r8 146 movq -1*8(%rsi, %rdx), %r9 147 movq %r8, 0*8(%rdi) 148 movq %r9, -1*8(%rdi, %rdx) 149 retq 150 .p2align 4 151.Lless_8bytes: 152 cmpl $4, %edx 153 jb .Lless_3bytes 154 155 /* 156 * Move data from 4 bytes to 7 bytes. 157 */ 158 movl (%rsi), %ecx 159 movl -4(%rsi, %rdx), %r8d 160 movl %ecx, (%rdi) 161 movl %r8d, -4(%rdi, %rdx) 162 retq 163 .p2align 4 164.Lless_3bytes: 165 subl $1, %edx 166 jb .Lend 167 /* 168 * Move data from 1 bytes to 3 bytes. 169 */ 170 movzbl (%rsi), %ecx 171 jz .Lstore_1byte 172 movzbq 1(%rsi), %r8 173 movzbq (%rsi, %rdx), %r9 174 movb %r8b, 1(%rdi) 175 movb %r9b, (%rdi, %rdx) 176.Lstore_1byte: 177 movb %cl, (%rdi) 178 179.Lend: 180 retq 181ENDPROC(memcpy_orig) 182 183#ifndef CONFIG_UML 184/* 185 * memcpy_mcsafe_unrolled - memory copy with machine check exception handling 186 * Note that we only catch machine checks when reading the source addresses. 187 * Writes to target are posted and don't generate machine checks. 188 */ 189ENTRY(memcpy_mcsafe_unrolled) 190 cmpl $8, %edx 191 /* Less than 8 bytes? Go to byte copy loop */ 192 jb .L_no_whole_words 193 194 /* Check for bad alignment of source */ 195 testl $7, %esi 196 /* Already aligned */ 197 jz .L_8byte_aligned 198 199 /* Copy one byte at a time until source is 8-byte aligned */ 200 movl %esi, %ecx 201 andl $7, %ecx 202 subl $8, %ecx 203 negl %ecx 204 subl %ecx, %edx 205.L_copy_leading_bytes: 206 movb (%rsi), %al 207 movb %al, (%rdi) 208 incq %rsi 209 incq %rdi 210 decl %ecx 211 jnz .L_copy_leading_bytes 212 213.L_8byte_aligned: 214 /* Figure out how many whole cache lines (64-bytes) to copy */ 215 movl %edx, %ecx 216 andl $63, %edx 217 shrl $6, %ecx 218 jz .L_no_whole_cache_lines 219 220 /* Loop copying whole cache lines */ 221.L_cache_w0: movq (%rsi), %r8 222.L_cache_w1: movq 1*8(%rsi), %r9 223.L_cache_w2: movq 2*8(%rsi), %r10 224.L_cache_w3: movq 3*8(%rsi), %r11 225 movq %r8, (%rdi) 226 movq %r9, 1*8(%rdi) 227 movq %r10, 2*8(%rdi) 228 movq %r11, 3*8(%rdi) 229.L_cache_w4: movq 4*8(%rsi), %r8 230.L_cache_w5: movq 5*8(%rsi), %r9 231.L_cache_w6: movq 6*8(%rsi), %r10 232.L_cache_w7: movq 7*8(%rsi), %r11 233 movq %r8, 4*8(%rdi) 234 movq %r9, 5*8(%rdi) 235 movq %r10, 6*8(%rdi) 236 movq %r11, 7*8(%rdi) 237 leaq 64(%rsi), %rsi 238 leaq 64(%rdi), %rdi 239 decl %ecx 240 jnz .L_cache_w0 241 242 /* Are there any trailing 8-byte words? */ 243.L_no_whole_cache_lines: 244 movl %edx, %ecx 245 andl $7, %edx 246 shrl $3, %ecx 247 jz .L_no_whole_words 248 249 /* Copy trailing words */ 250.L_copy_trailing_words: 251 movq (%rsi), %r8 252 mov %r8, (%rdi) 253 leaq 8(%rsi), %rsi 254 leaq 8(%rdi), %rdi 255 decl %ecx 256 jnz .L_copy_trailing_words 257 258 /* Any trailing bytes? */ 259.L_no_whole_words: 260 andl %edx, %edx 261 jz .L_done_memcpy_trap 262 263 /* Copy trailing bytes */ 264 movl %edx, %ecx 265.L_copy_trailing_bytes: 266 movb (%rsi), %al 267 movb %al, (%rdi) 268 incq %rsi 269 incq %rdi 270 decl %ecx 271 jnz .L_copy_trailing_bytes 272 273 /* Copy successful. Return zero */ 274.L_done_memcpy_trap: 275 xorq %rax, %rax 276 ret 277ENDPROC(memcpy_mcsafe_unrolled) 278 279 .section .fixup, "ax" 280 /* Return -EFAULT for any failure */ 281.L_memcpy_mcsafe_fail: 282 mov $-EFAULT, %rax 283 ret 284 285 .previous 286 287 _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) 288 _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) 289 _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) 290 _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) 291 _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) 292 _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) 293 _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) 294 _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail) 295 _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail) 296 _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail) 297 _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) 298#endif 299