1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> 4 * Copyright 2002 Andi Kleen, SuSE Labs. 5 * 6 * Functions to copy from and to user space. 7 */ 8 9#include <linux/linkage.h> 10#include <asm/current.h> 11#include <asm/asm-offsets.h> 12#include <asm/thread_info.h> 13#include <asm/cpufeatures.h> 14#include <asm/alternative.h> 15#include <asm/asm.h> 16#include <asm/smap.h> 17#include <asm/export.h> 18#include <asm/trapnr.h> 19 20.macro ALIGN_DESTINATION 21 /* check for bad alignment of destination */ 22 movl %edi,%ecx 23 andl $7,%ecx 24 jz 102f /* already aligned */ 25 subl $8,%ecx 26 negl %ecx 27 subl %ecx,%edx 28100: movb (%rsi),%al 29101: movb %al,(%rdi) 30 incq %rsi 31 incq %rdi 32 decl %ecx 33 jnz 100b 34102: 35 .section .fixup,"ax" 36103: addl %ecx,%edx /* ecx is zerorest also */ 37 jmp .Lcopy_user_handle_tail 38 .previous 39 40 _ASM_EXTABLE_CPY(100b, 103b) 41 _ASM_EXTABLE_CPY(101b, 103b) 42 .endm 43 44/* 45 * copy_user_generic_unrolled - memory copy with exception handling. 46 * This version is for CPUs like P4 that don't have efficient micro 47 * code for rep movsq 48 * 49 * Input: 50 * rdi destination 51 * rsi source 52 * rdx count 53 * 54 * Output: 55 * eax uncopied bytes or 0 if successful. 56 */ 57SYM_FUNC_START(copy_user_generic_unrolled) 58 ASM_STAC 59 cmpl $8,%edx 60 jb 20f /* less then 8 bytes, go to byte copy loop */ 61 ALIGN_DESTINATION 62 movl %edx,%ecx 63 andl $63,%edx 64 shrl $6,%ecx 65 jz .L_copy_short_string 661: movq (%rsi),%r8 672: movq 1*8(%rsi),%r9 683: movq 2*8(%rsi),%r10 694: movq 3*8(%rsi),%r11 705: movq %r8,(%rdi) 716: movq %r9,1*8(%rdi) 727: movq %r10,2*8(%rdi) 738: movq %r11,3*8(%rdi) 749: movq 4*8(%rsi),%r8 7510: movq 5*8(%rsi),%r9 7611: movq 6*8(%rsi),%r10 7712: movq 7*8(%rsi),%r11 7813: movq %r8,4*8(%rdi) 7914: movq %r9,5*8(%rdi) 8015: movq %r10,6*8(%rdi) 8116: movq %r11,7*8(%rdi) 82 leaq 64(%rsi),%rsi 83 leaq 64(%rdi),%rdi 84 decl %ecx 85 jnz 1b 86.L_copy_short_string: 87 movl %edx,%ecx 88 andl $7,%edx 89 shrl $3,%ecx 90 jz 20f 9118: movq (%rsi),%r8 9219: movq %r8,(%rdi) 93 leaq 8(%rsi),%rsi 94 leaq 8(%rdi),%rdi 95 decl %ecx 96 jnz 18b 9720: andl %edx,%edx 98 jz 23f 99 movl %edx,%ecx 10021: movb (%rsi),%al 10122: movb %al,(%rdi) 102 incq %rsi 103 incq %rdi 104 decl %ecx 105 jnz 21b 10623: xor %eax,%eax 107 ASM_CLAC 108 ret 109 110 .section .fixup,"ax" 11130: shll $6,%ecx 112 addl %ecx,%edx 113 jmp 60f 11440: leal (%rdx,%rcx,8),%edx 115 jmp 60f 11650: movl %ecx,%edx 11760: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */ 118 .previous 119 120 _ASM_EXTABLE_CPY(1b, 30b) 121 _ASM_EXTABLE_CPY(2b, 30b) 122 _ASM_EXTABLE_CPY(3b, 30b) 123 _ASM_EXTABLE_CPY(4b, 30b) 124 _ASM_EXTABLE_CPY(5b, 30b) 125 _ASM_EXTABLE_CPY(6b, 30b) 126 _ASM_EXTABLE_CPY(7b, 30b) 127 _ASM_EXTABLE_CPY(8b, 30b) 128 _ASM_EXTABLE_CPY(9b, 30b) 129 _ASM_EXTABLE_CPY(10b, 30b) 130 _ASM_EXTABLE_CPY(11b, 30b) 131 _ASM_EXTABLE_CPY(12b, 30b) 132 _ASM_EXTABLE_CPY(13b, 30b) 133 _ASM_EXTABLE_CPY(14b, 30b) 134 _ASM_EXTABLE_CPY(15b, 30b) 135 _ASM_EXTABLE_CPY(16b, 30b) 136 _ASM_EXTABLE_CPY(18b, 40b) 137 _ASM_EXTABLE_CPY(19b, 40b) 138 _ASM_EXTABLE_CPY(21b, 50b) 139 _ASM_EXTABLE_CPY(22b, 50b) 140SYM_FUNC_END(copy_user_generic_unrolled) 141EXPORT_SYMBOL(copy_user_generic_unrolled) 142 143/* Some CPUs run faster using the string copy instructions. 144 * This is also a lot simpler. Use them when possible. 145 * 146 * Only 4GB of copy is supported. This shouldn't be a problem 147 * because the kernel normally only writes from/to page sized chunks 148 * even if user space passed a longer buffer. 149 * And more would be dangerous because both Intel and AMD have 150 * errata with rep movsq > 4GB. If someone feels the need to fix 151 * this please consider this. 152 * 153 * Input: 154 * rdi destination 155 * rsi source 156 * rdx count 157 * 158 * Output: 159 * eax uncopied bytes or 0 if successful. 160 */ 161SYM_FUNC_START(copy_user_generic_string) 162 ASM_STAC 163 cmpl $8,%edx 164 jb 2f /* less than 8 bytes, go to byte copy loop */ 165 ALIGN_DESTINATION 166 movl %edx,%ecx 167 shrl $3,%ecx 168 andl $7,%edx 1691: rep 170 movsq 1712: movl %edx,%ecx 1723: rep 173 movsb 174 xorl %eax,%eax 175 ASM_CLAC 176 ret 177 178 .section .fixup,"ax" 17911: leal (%rdx,%rcx,8),%ecx 18012: movl %ecx,%edx /* ecx is zerorest also */ 181 jmp .Lcopy_user_handle_tail 182 .previous 183 184 _ASM_EXTABLE_CPY(1b, 11b) 185 _ASM_EXTABLE_CPY(3b, 12b) 186SYM_FUNC_END(copy_user_generic_string) 187EXPORT_SYMBOL(copy_user_generic_string) 188 189/* 190 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. 191 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. 192 * 193 * Input: 194 * rdi destination 195 * rsi source 196 * rdx count 197 * 198 * Output: 199 * eax uncopied bytes or 0 if successful. 200 */ 201SYM_FUNC_START(copy_user_enhanced_fast_string) 202 ASM_STAC 203 /* CPUs without FSRM should avoid rep movsb for short copies */ 204 ALTERNATIVE "cmpl $64, %edx; jb .L_copy_short_string", "", X86_FEATURE_FSRM 205 movl %edx,%ecx 2061: rep 207 movsb 208 xorl %eax,%eax 209 ASM_CLAC 210 ret 211 212 .section .fixup,"ax" 21312: movl %ecx,%edx /* ecx is zerorest also */ 214 jmp .Lcopy_user_handle_tail 215 .previous 216 217 _ASM_EXTABLE_CPY(1b, 12b) 218SYM_FUNC_END(copy_user_enhanced_fast_string) 219EXPORT_SYMBOL(copy_user_enhanced_fast_string) 220 221/* 222 * Try to copy last bytes and clear the rest if needed. 223 * Since protection fault in copy_from/to_user is not a normal situation, 224 * it is not necessary to optimize tail handling. 225 * Don't try to copy the tail if machine check happened 226 * 227 * Input: 228 * eax trap number written by ex_handler_copy() 229 * rdi destination 230 * rsi source 231 * rdx count 232 * 233 * Output: 234 * eax uncopied bytes or 0 if successful. 235 */ 236SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) 237 cmp $X86_TRAP_MC,%eax 238 je 3f 239 240 movl %edx,%ecx 2411: rep movsb 2422: mov %ecx,%eax 243 ASM_CLAC 244 ret 245 2463: 247 movl %edx,%eax 248 ASM_CLAC 249 RET 250 251 _ASM_EXTABLE_CPY(1b, 2b) 252SYM_CODE_END(.Lcopy_user_handle_tail) 253 254/* 255 * copy_user_nocache - Uncached memory copy with exception handling 256 * This will force destination out of cache for more performance. 257 * 258 * Note: Cached memory copy is used when destination or size is not 259 * naturally aligned. That is: 260 * - Require 8-byte alignment when size is 8 bytes or larger. 261 * - Require 4-byte alignment when size is 4 bytes. 262 */ 263SYM_FUNC_START(__copy_user_nocache) 264 ASM_STAC 265 266 /* If size is less than 8 bytes, go to 4-byte copy */ 267 cmpl $8,%edx 268 jb .L_4b_nocache_copy_entry 269 270 /* If destination is not 8-byte aligned, "cache" copy to align it */ 271 ALIGN_DESTINATION 272 273 /* Set 4x8-byte copy count and remainder */ 274 movl %edx,%ecx 275 andl $63,%edx 276 shrl $6,%ecx 277 jz .L_8b_nocache_copy_entry /* jump if count is 0 */ 278 279 /* Perform 4x8-byte nocache loop-copy */ 280.L_4x8b_nocache_copy_loop: 2811: movq (%rsi),%r8 2822: movq 1*8(%rsi),%r9 2833: movq 2*8(%rsi),%r10 2844: movq 3*8(%rsi),%r11 2855: movnti %r8,(%rdi) 2866: movnti %r9,1*8(%rdi) 2877: movnti %r10,2*8(%rdi) 2888: movnti %r11,3*8(%rdi) 2899: movq 4*8(%rsi),%r8 29010: movq 5*8(%rsi),%r9 29111: movq 6*8(%rsi),%r10 29212: movq 7*8(%rsi),%r11 29313: movnti %r8,4*8(%rdi) 29414: movnti %r9,5*8(%rdi) 29515: movnti %r10,6*8(%rdi) 29616: movnti %r11,7*8(%rdi) 297 leaq 64(%rsi),%rsi 298 leaq 64(%rdi),%rdi 299 decl %ecx 300 jnz .L_4x8b_nocache_copy_loop 301 302 /* Set 8-byte copy count and remainder */ 303.L_8b_nocache_copy_entry: 304 movl %edx,%ecx 305 andl $7,%edx 306 shrl $3,%ecx 307 jz .L_4b_nocache_copy_entry /* jump if count is 0 */ 308 309 /* Perform 8-byte nocache loop-copy */ 310.L_8b_nocache_copy_loop: 31120: movq (%rsi),%r8 31221: movnti %r8,(%rdi) 313 leaq 8(%rsi),%rsi 314 leaq 8(%rdi),%rdi 315 decl %ecx 316 jnz .L_8b_nocache_copy_loop 317 318 /* If no byte left, we're done */ 319.L_4b_nocache_copy_entry: 320 andl %edx,%edx 321 jz .L_finish_copy 322 323 /* If destination is not 4-byte aligned, go to byte copy: */ 324 movl %edi,%ecx 325 andl $3,%ecx 326 jnz .L_1b_cache_copy_entry 327 328 /* Set 4-byte copy count (1 or 0) and remainder */ 329 movl %edx,%ecx 330 andl $3,%edx 331 shrl $2,%ecx 332 jz .L_1b_cache_copy_entry /* jump if count is 0 */ 333 334 /* Perform 4-byte nocache copy: */ 33530: movl (%rsi),%r8d 33631: movnti %r8d,(%rdi) 337 leaq 4(%rsi),%rsi 338 leaq 4(%rdi),%rdi 339 340 /* If no bytes left, we're done: */ 341 andl %edx,%edx 342 jz .L_finish_copy 343 344 /* Perform byte "cache" loop-copy for the remainder */ 345.L_1b_cache_copy_entry: 346 movl %edx,%ecx 347.L_1b_cache_copy_loop: 34840: movb (%rsi),%al 34941: movb %al,(%rdi) 350 incq %rsi 351 incq %rdi 352 decl %ecx 353 jnz .L_1b_cache_copy_loop 354 355 /* Finished copying; fence the prior stores */ 356.L_finish_copy: 357 xorl %eax,%eax 358 ASM_CLAC 359 sfence 360 ret 361 362 .section .fixup,"ax" 363.L_fixup_4x8b_copy: 364 shll $6,%ecx 365 addl %ecx,%edx 366 jmp .L_fixup_handle_tail 367.L_fixup_8b_copy: 368 lea (%rdx,%rcx,8),%rdx 369 jmp .L_fixup_handle_tail 370.L_fixup_4b_copy: 371 lea (%rdx,%rcx,4),%rdx 372 jmp .L_fixup_handle_tail 373.L_fixup_1b_copy: 374 movl %ecx,%edx 375.L_fixup_handle_tail: 376 sfence 377 jmp .Lcopy_user_handle_tail 378 .previous 379 380 _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy) 381 _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy) 382 _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy) 383 _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy) 384 _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy) 385 _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy) 386 _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy) 387 _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy) 388 _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy) 389 _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy) 390 _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy) 391 _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy) 392 _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy) 393 _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy) 394 _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy) 395 _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy) 396 _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy) 397 _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy) 398 _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy) 399 _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy) 400 _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy) 401 _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy) 402SYM_FUNC_END(__copy_user_nocache) 403EXPORT_SYMBOL(__copy_user_nocache) 404