1/* Copyright 2002 Andi Kleen */ 2 3#include <linux/linkage.h> 4#include <asm/errno.h> 5#include <asm/cpufeatures.h> 6#include <asm/mcsafe_test.h> 7#include <asm/alternative-asm.h> 8#include <asm/export.h> 9 10/* 11 * We build a jump to memcpy_orig by default which gets NOPped out on 12 * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which 13 * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs 14 * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. 15 */ 16 17.weak memcpy 18 19/* 20 * memcpy - Copy a memory block. 21 * 22 * Input: 23 * rdi destination 24 * rsi source 25 * rdx count 26 * 27 * Output: 28 * rax original destination 29 */ 30ENTRY(__memcpy) 31ENTRY(memcpy) 32 ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ 33 "jmp memcpy_erms", X86_FEATURE_ERMS 34 35 movq %rdi, %rax 36 movq %rdx, %rcx 37 shrq $3, %rcx 38 andl $7, %edx 39 rep movsq 40 movl %edx, %ecx 41 rep movsb 42 ret 43ENDPROC(memcpy) 44ENDPROC(__memcpy) 45EXPORT_SYMBOL(memcpy) 46EXPORT_SYMBOL(__memcpy) 47 48/* 49 * memcpy_erms() - enhanced fast string memcpy. This is faster and 50 * simpler than memcpy. Use memcpy_erms when possible. 51 */ 52ENTRY(memcpy_erms) 53 movq %rdi, %rax 54 movq %rdx, %rcx 55 rep movsb 56 ret 57ENDPROC(memcpy_erms) 58 59ENTRY(memcpy_orig) 60 movq %rdi, %rax 61 62 cmpq $0x20, %rdx 63 jb .Lhandle_tail 64 65 /* 66 * We check whether memory false dependence could occur, 67 * then jump to corresponding copy mode. 68 */ 69 cmp %dil, %sil 70 jl .Lcopy_backward 71 subq $0x20, %rdx 72.Lcopy_forward_loop: 73 subq $0x20, %rdx 74 75 /* 76 * Move in blocks of 4x8 bytes: 77 */ 78 movq 0*8(%rsi), %r8 79 movq 1*8(%rsi), %r9 80 movq 2*8(%rsi), %r10 81 movq 3*8(%rsi), %r11 82 leaq 4*8(%rsi), %rsi 83 84 movq %r8, 0*8(%rdi) 85 movq %r9, 1*8(%rdi) 86 movq %r10, 2*8(%rdi) 87 movq %r11, 3*8(%rdi) 88 leaq 4*8(%rdi), %rdi 89 jae .Lcopy_forward_loop 90 addl $0x20, %edx 91 jmp .Lhandle_tail 92 93.Lcopy_backward: 94 /* 95 * Calculate copy position to tail. 96 */ 97 addq %rdx, %rsi 98 addq %rdx, %rdi 99 subq $0x20, %rdx 100 /* 101 * At most 3 ALU operations in one cycle, 102 * so append NOPS in the same 16 bytes trunk. 103 */ 104 .p2align 4 105.Lcopy_backward_loop: 106 subq $0x20, %rdx 107 movq -1*8(%rsi), %r8 108 movq -2*8(%rsi), %r9 109 movq -3*8(%rsi), %r10 110 movq -4*8(%rsi), %r11 111 leaq -4*8(%rsi), %rsi 112 movq %r8, -1*8(%rdi) 113 movq %r9, -2*8(%rdi) 114 movq %r10, -3*8(%rdi) 115 movq %r11, -4*8(%rdi) 116 leaq -4*8(%rdi), %rdi 117 jae .Lcopy_backward_loop 118 119 /* 120 * Calculate copy position to head. 121 */ 122 addl $0x20, %edx 123 subq %rdx, %rsi 124 subq %rdx, %rdi 125.Lhandle_tail: 126 cmpl $16, %edx 127 jb .Lless_16bytes 128 129 /* 130 * Move data from 16 bytes to 31 bytes. 131 */ 132 movq 0*8(%rsi), %r8 133 movq 1*8(%rsi), %r9 134 movq -2*8(%rsi, %rdx), %r10 135 movq -1*8(%rsi, %rdx), %r11 136 movq %r8, 0*8(%rdi) 137 movq %r9, 1*8(%rdi) 138 movq %r10, -2*8(%rdi, %rdx) 139 movq %r11, -1*8(%rdi, %rdx) 140 retq 141 .p2align 4 142.Lless_16bytes: 143 cmpl $8, %edx 144 jb .Lless_8bytes 145 /* 146 * Move data from 8 bytes to 15 bytes. 147 */ 148 movq 0*8(%rsi), %r8 149 movq -1*8(%rsi, %rdx), %r9 150 movq %r8, 0*8(%rdi) 151 movq %r9, -1*8(%rdi, %rdx) 152 retq 153 .p2align 4 154.Lless_8bytes: 155 cmpl $4, %edx 156 jb .Lless_3bytes 157 158 /* 159 * Move data from 4 bytes to 7 bytes. 160 */ 161 movl (%rsi), %ecx 162 movl -4(%rsi, %rdx), %r8d 163 movl %ecx, (%rdi) 164 movl %r8d, -4(%rdi, %rdx) 165 retq 166 .p2align 4 167.Lless_3bytes: 168 subl $1, %edx 169 jb .Lend 170 /* 171 * Move data from 1 bytes to 3 bytes. 172 */ 173 movzbl (%rsi), %ecx 174 jz .Lstore_1byte 175 movzbq 1(%rsi), %r8 176 movzbq (%rsi, %rdx), %r9 177 movb %r8b, 1(%rdi) 178 movb %r9b, (%rdi, %rdx) 179.Lstore_1byte: 180 movb %cl, (%rdi) 181 182.Lend: 183 retq 184ENDPROC(memcpy_orig) 185 186#ifndef CONFIG_UML 187 188MCSAFE_TEST_CTL 189 190/* 191 * __memcpy_mcsafe - memory copy with machine check exception handling 192 * Note that we only catch machine checks when reading the source addresses. 193 * Writes to target are posted and don't generate machine checks. 194 */ 195ENTRY(__memcpy_mcsafe) 196 cmpl $8, %edx 197 /* Less than 8 bytes? Go to byte copy loop */ 198 jb .L_no_whole_words 199 200 /* Check for bad alignment of source */ 201 testl $7, %esi 202 /* Already aligned */ 203 jz .L_8byte_aligned 204 205 /* Copy one byte at a time until source is 8-byte aligned */ 206 movl %esi, %ecx 207 andl $7, %ecx 208 subl $8, %ecx 209 negl %ecx 210 subl %ecx, %edx 211.L_read_leading_bytes: 212 movb (%rsi), %al 213 MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes 214 MCSAFE_TEST_DST %rdi 1 .E_leading_bytes 215.L_write_leading_bytes: 216 movb %al, (%rdi) 217 incq %rsi 218 incq %rdi 219 decl %ecx 220 jnz .L_read_leading_bytes 221 222.L_8byte_aligned: 223 movl %edx, %ecx 224 andl $7, %edx 225 shrl $3, %ecx 226 jz .L_no_whole_words 227 228.L_read_words: 229 movq (%rsi), %r8 230 MCSAFE_TEST_SRC %rsi 8 .E_read_words 231 MCSAFE_TEST_DST %rdi 8 .E_write_words 232.L_write_words: 233 movq %r8, (%rdi) 234 addq $8, %rsi 235 addq $8, %rdi 236 decl %ecx 237 jnz .L_read_words 238 239 /* Any trailing bytes? */ 240.L_no_whole_words: 241 andl %edx, %edx 242 jz .L_done_memcpy_trap 243 244 /* Copy trailing bytes */ 245 movl %edx, %ecx 246.L_read_trailing_bytes: 247 movb (%rsi), %al 248 MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes 249 MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes 250.L_write_trailing_bytes: 251 movb %al, (%rdi) 252 incq %rsi 253 incq %rdi 254 decl %ecx 255 jnz .L_read_trailing_bytes 256 257 /* Copy successful. Return zero */ 258.L_done_memcpy_trap: 259 xorl %eax, %eax 260 ret 261ENDPROC(__memcpy_mcsafe) 262EXPORT_SYMBOL_GPL(__memcpy_mcsafe) 263 264 .section .fixup, "ax" 265 /* 266 * Return number of bytes not copied for any failure. Note that 267 * there is no "tail" handling since the source buffer is 8-byte 268 * aligned and poison is cacheline aligned. 269 */ 270.E_read_words: 271 shll $3, %ecx 272.E_leading_bytes: 273 addl %edx, %ecx 274.E_trailing_bytes: 275 mov %ecx, %eax 276 ret 277 278 /* 279 * For write fault handling, given the destination is unaligned, 280 * we handle faults on multi-byte writes with a byte-by-byte 281 * copy up to the write-protected page. 282 */ 283.E_write_words: 284 shll $3, %ecx 285 addl %edx, %ecx 286 movl %ecx, %edx 287 jmp mcsafe_handle_tail 288 289 .previous 290 291 _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) 292 _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) 293 _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) 294 _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) 295 _ASM_EXTABLE(.L_write_words, .E_write_words) 296 _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) 297#endif 298