1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* Copyright 2002 Andi Kleen */ 3 4#include <linux/linkage.h> 5#include <asm/errno.h> 6#include <asm/cpufeatures.h> 7#include <asm/mcsafe_test.h> 8#include <asm/alternative-asm.h> 9#include <asm/export.h> 10 11.pushsection .noinstr.text, "ax" 12 13/* 14 * We build a jump to memcpy_orig by default which gets NOPped out on 15 * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which 16 * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs 17 * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. 18 */ 19 20.weak memcpy 21 22/* 23 * memcpy - Copy a memory block. 24 * 25 * Input: 26 * rdi destination 27 * rsi source 28 * rdx count 29 * 30 * Output: 31 * rax original destination 32 */ 33SYM_FUNC_START_ALIAS(__memcpy) 34SYM_FUNC_START_LOCAL(memcpy) 35 ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ 36 "jmp memcpy_erms", X86_FEATURE_ERMS 37 38 movq %rdi, %rax 39 movq %rdx, %rcx 40 shrq $3, %rcx 41 andl $7, %edx 42 rep movsq 43 movl %edx, %ecx 44 rep movsb 45 ret 46SYM_FUNC_END(memcpy) 47SYM_FUNC_END_ALIAS(__memcpy) 48EXPORT_SYMBOL(memcpy) 49EXPORT_SYMBOL(__memcpy) 50 51/* 52 * memcpy_erms() - enhanced fast string memcpy. This is faster and 53 * simpler than memcpy. Use memcpy_erms when possible. 54 */ 55SYM_FUNC_START_LOCAL(memcpy_erms) 56 movq %rdi, %rax 57 movq %rdx, %rcx 58 rep movsb 59 ret 60SYM_FUNC_END(memcpy_erms) 61 62SYM_FUNC_START_LOCAL(memcpy_orig) 63 movq %rdi, %rax 64 65 cmpq $0x20, %rdx 66 jb .Lhandle_tail 67 68 /* 69 * We check whether memory false dependence could occur, 70 * then jump to corresponding copy mode. 71 */ 72 cmp %dil, %sil 73 jl .Lcopy_backward 74 subq $0x20, %rdx 75.Lcopy_forward_loop: 76 subq $0x20, %rdx 77 78 /* 79 * Move in blocks of 4x8 bytes: 80 */ 81 movq 0*8(%rsi), %r8 82 movq 1*8(%rsi), %r9 83 movq 2*8(%rsi), %r10 84 movq 3*8(%rsi), %r11 85 leaq 4*8(%rsi), %rsi 86 87 movq %r8, 0*8(%rdi) 88 movq %r9, 1*8(%rdi) 89 movq %r10, 2*8(%rdi) 90 movq %r11, 3*8(%rdi) 91 leaq 4*8(%rdi), %rdi 92 jae .Lcopy_forward_loop 93 addl $0x20, %edx 94 jmp .Lhandle_tail 95 96.Lcopy_backward: 97 /* 98 * Calculate copy position to tail. 99 */ 100 addq %rdx, %rsi 101 addq %rdx, %rdi 102 subq $0x20, %rdx 103 /* 104 * At most 3 ALU operations in one cycle, 105 * so append NOPS in the same 16 bytes trunk. 106 */ 107 .p2align 4 108.Lcopy_backward_loop: 109 subq $0x20, %rdx 110 movq -1*8(%rsi), %r8 111 movq -2*8(%rsi), %r9 112 movq -3*8(%rsi), %r10 113 movq -4*8(%rsi), %r11 114 leaq -4*8(%rsi), %rsi 115 movq %r8, -1*8(%rdi) 116 movq %r9, -2*8(%rdi) 117 movq %r10, -3*8(%rdi) 118 movq %r11, -4*8(%rdi) 119 leaq -4*8(%rdi), %rdi 120 jae .Lcopy_backward_loop 121 122 /* 123 * Calculate copy position to head. 124 */ 125 addl $0x20, %edx 126 subq %rdx, %rsi 127 subq %rdx, %rdi 128.Lhandle_tail: 129 cmpl $16, %edx 130 jb .Lless_16bytes 131 132 /* 133 * Move data from 16 bytes to 31 bytes. 134 */ 135 movq 0*8(%rsi), %r8 136 movq 1*8(%rsi), %r9 137 movq -2*8(%rsi, %rdx), %r10 138 movq -1*8(%rsi, %rdx), %r11 139 movq %r8, 0*8(%rdi) 140 movq %r9, 1*8(%rdi) 141 movq %r10, -2*8(%rdi, %rdx) 142 movq %r11, -1*8(%rdi, %rdx) 143 retq 144 .p2align 4 145.Lless_16bytes: 146 cmpl $8, %edx 147 jb .Lless_8bytes 148 /* 149 * Move data from 8 bytes to 15 bytes. 150 */ 151 movq 0*8(%rsi), %r8 152 movq -1*8(%rsi, %rdx), %r9 153 movq %r8, 0*8(%rdi) 154 movq %r9, -1*8(%rdi, %rdx) 155 retq 156 .p2align 4 157.Lless_8bytes: 158 cmpl $4, %edx 159 jb .Lless_3bytes 160 161 /* 162 * Move data from 4 bytes to 7 bytes. 163 */ 164 movl (%rsi), %ecx 165 movl -4(%rsi, %rdx), %r8d 166 movl %ecx, (%rdi) 167 movl %r8d, -4(%rdi, %rdx) 168 retq 169 .p2align 4 170.Lless_3bytes: 171 subl $1, %edx 172 jb .Lend 173 /* 174 * Move data from 1 bytes to 3 bytes. 175 */ 176 movzbl (%rsi), %ecx 177 jz .Lstore_1byte 178 movzbq 1(%rsi), %r8 179 movzbq (%rsi, %rdx), %r9 180 movb %r8b, 1(%rdi) 181 movb %r9b, (%rdi, %rdx) 182.Lstore_1byte: 183 movb %cl, (%rdi) 184 185.Lend: 186 retq 187SYM_FUNC_END(memcpy_orig) 188 189.popsection 190 191#ifndef CONFIG_UML 192 193MCSAFE_TEST_CTL 194 195/* 196 * __memcpy_mcsafe - memory copy with machine check exception handling 197 * Note that we only catch machine checks when reading the source addresses. 198 * Writes to target are posted and don't generate machine checks. 199 */ 200SYM_FUNC_START(__memcpy_mcsafe) 201 cmpl $8, %edx 202 /* Less than 8 bytes? Go to byte copy loop */ 203 jb .L_no_whole_words 204 205 /* Check for bad alignment of source */ 206 testl $7, %esi 207 /* Already aligned */ 208 jz .L_8byte_aligned 209 210 /* Copy one byte at a time until source is 8-byte aligned */ 211 movl %esi, %ecx 212 andl $7, %ecx 213 subl $8, %ecx 214 negl %ecx 215 subl %ecx, %edx 216.L_read_leading_bytes: 217 movb (%rsi), %al 218 MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes 219 MCSAFE_TEST_DST %rdi 1 .E_leading_bytes 220.L_write_leading_bytes: 221 movb %al, (%rdi) 222 incq %rsi 223 incq %rdi 224 decl %ecx 225 jnz .L_read_leading_bytes 226 227.L_8byte_aligned: 228 movl %edx, %ecx 229 andl $7, %edx 230 shrl $3, %ecx 231 jz .L_no_whole_words 232 233.L_read_words: 234 movq (%rsi), %r8 235 MCSAFE_TEST_SRC %rsi 8 .E_read_words 236 MCSAFE_TEST_DST %rdi 8 .E_write_words 237.L_write_words: 238 movq %r8, (%rdi) 239 addq $8, %rsi 240 addq $8, %rdi 241 decl %ecx 242 jnz .L_read_words 243 244 /* Any trailing bytes? */ 245.L_no_whole_words: 246 andl %edx, %edx 247 jz .L_done_memcpy_trap 248 249 /* Copy trailing bytes */ 250 movl %edx, %ecx 251.L_read_trailing_bytes: 252 movb (%rsi), %al 253 MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes 254 MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes 255.L_write_trailing_bytes: 256 movb %al, (%rdi) 257 incq %rsi 258 incq %rdi 259 decl %ecx 260 jnz .L_read_trailing_bytes 261 262 /* Copy successful. Return zero */ 263.L_done_memcpy_trap: 264 xorl %eax, %eax 265.L_done: 266 ret 267SYM_FUNC_END(__memcpy_mcsafe) 268EXPORT_SYMBOL_GPL(__memcpy_mcsafe) 269 270 .section .fixup, "ax" 271 /* 272 * Return number of bytes not copied for any failure. Note that 273 * there is no "tail" handling since the source buffer is 8-byte 274 * aligned and poison is cacheline aligned. 275 */ 276.E_read_words: 277 shll $3, %ecx 278.E_leading_bytes: 279 addl %edx, %ecx 280.E_trailing_bytes: 281 mov %ecx, %eax 282 jmp .L_done 283 284 /* 285 * For write fault handling, given the destination is unaligned, 286 * we handle faults on multi-byte writes with a byte-by-byte 287 * copy up to the write-protected page. 288 */ 289.E_write_words: 290 shll $3, %ecx 291 addl %edx, %ecx 292 movl %ecx, %edx 293 jmp mcsafe_handle_tail 294 295 .previous 296 297 _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) 298 _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) 299 _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) 300 _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) 301 _ASM_EXTABLE(.L_write_words, .E_write_words) 302 _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) 303#endif 304