1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> 4 * Copyright 2002 Andi Kleen, SuSE Labs. 5 * 6 * Functions to copy from and to user space. 7 */ 8 9#include <linux/linkage.h> 10#include <asm/current.h> 11#include <asm/asm-offsets.h> 12#include <asm/thread_info.h> 13#include <asm/cpufeatures.h> 14#include <asm/alternative.h> 15#include <asm/asm.h> 16#include <asm/smap.h> 17#include <asm/export.h> 18#include <asm/trapnr.h> 19 20.macro ALIGN_DESTINATION 21 /* check for bad alignment of destination */ 22 movl %edi,%ecx 23 andl $7,%ecx 24 jz 102f /* already aligned */ 25 subl $8,%ecx 26 negl %ecx 27 subl %ecx,%edx 28100: movb (%rsi),%al 29101: movb %al,(%rdi) 30 incq %rsi 31 incq %rdi 32 decl %ecx 33 jnz 100b 34102: 35 .section .fixup,"ax" 36103: addl %ecx,%edx /* ecx is zerorest also */ 37 jmp .Lcopy_user_handle_tail 38 .previous 39 40 _ASM_EXTABLE_CPY(100b, 103b) 41 _ASM_EXTABLE_CPY(101b, 103b) 42 .endm 43 44/* 45 * copy_user_generic_unrolled - memory copy with exception handling. 46 * This version is for CPUs like P4 that don't have efficient micro 47 * code for rep movsq 48 * 49 * Input: 50 * rdi destination 51 * rsi source 52 * rdx count 53 * 54 * Output: 55 * eax uncopied bytes or 0 if successful. 56 */ 57SYM_FUNC_START(copy_user_generic_unrolled) 58 ASM_STAC 59 cmpl $8,%edx 60 jb 20f /* less then 8 bytes, go to byte copy loop */ 61 ALIGN_DESTINATION 62 movl %edx,%ecx 63 andl $63,%edx 64 shrl $6,%ecx 65 jz .L_copy_short_string 661: movq (%rsi),%r8 672: movq 1*8(%rsi),%r9 683: movq 2*8(%rsi),%r10 694: movq 3*8(%rsi),%r11 705: movq %r8,(%rdi) 716: movq %r9,1*8(%rdi) 727: movq %r10,2*8(%rdi) 738: movq %r11,3*8(%rdi) 749: movq 4*8(%rsi),%r8 7510: movq 5*8(%rsi),%r9 7611: movq 6*8(%rsi),%r10 7712: movq 7*8(%rsi),%r11 7813: movq %r8,4*8(%rdi) 7914: movq %r9,5*8(%rdi) 8015: movq %r10,6*8(%rdi) 8116: movq %r11,7*8(%rdi) 82 leaq 64(%rsi),%rsi 83 leaq 64(%rdi),%rdi 84 decl %ecx 85 jnz 1b 86.L_copy_short_string: 87 movl %edx,%ecx 88 andl $7,%edx 89 shrl $3,%ecx 90 jz 20f 9118: movq (%rsi),%r8 9219: movq %r8,(%rdi) 93 leaq 8(%rsi),%rsi 94 leaq 8(%rdi),%rdi 95 decl %ecx 96 jnz 18b 9720: andl %edx,%edx 98 jz 23f 99 movl %edx,%ecx 10021: movb (%rsi),%al 10122: movb %al,(%rdi) 102 incq %rsi 103 incq %rdi 104 decl %ecx 105 jnz 21b 10623: xor %eax,%eax 107 ASM_CLAC 108 ret 109 110 .section .fixup,"ax" 11130: shll $6,%ecx 112 addl %ecx,%edx 113 jmp 60f 11440: leal (%rdx,%rcx,8),%edx 115 jmp 60f 11650: movl %ecx,%edx 11760: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */ 118 .previous 119 120 _ASM_EXTABLE_CPY(1b, 30b) 121 _ASM_EXTABLE_CPY(2b, 30b) 122 _ASM_EXTABLE_CPY(3b, 30b) 123 _ASM_EXTABLE_CPY(4b, 30b) 124 _ASM_EXTABLE_CPY(5b, 30b) 125 _ASM_EXTABLE_CPY(6b, 30b) 126 _ASM_EXTABLE_CPY(7b, 30b) 127 _ASM_EXTABLE_CPY(8b, 30b) 128 _ASM_EXTABLE_CPY(9b, 30b) 129 _ASM_EXTABLE_CPY(10b, 30b) 130 _ASM_EXTABLE_CPY(11b, 30b) 131 _ASM_EXTABLE_CPY(12b, 30b) 132 _ASM_EXTABLE_CPY(13b, 30b) 133 _ASM_EXTABLE_CPY(14b, 30b) 134 _ASM_EXTABLE_CPY(15b, 30b) 135 _ASM_EXTABLE_CPY(16b, 30b) 136 _ASM_EXTABLE_CPY(18b, 40b) 137 _ASM_EXTABLE_CPY(19b, 40b) 138 _ASM_EXTABLE_CPY(21b, 50b) 139 _ASM_EXTABLE_CPY(22b, 50b) 140SYM_FUNC_END(copy_user_generic_unrolled) 141EXPORT_SYMBOL(copy_user_generic_unrolled) 142 143/* Some CPUs run faster using the string copy instructions. 144 * This is also a lot simpler. Use them when possible. 145 * 146 * Only 4GB of copy is supported. This shouldn't be a problem 147 * because the kernel normally only writes from/to page sized chunks 148 * even if user space passed a longer buffer. 149 * And more would be dangerous because both Intel and AMD have 150 * errata with rep movsq > 4GB. If someone feels the need to fix 151 * this please consider this. 152 * 153 * Input: 154 * rdi destination 155 * rsi source 156 * rdx count 157 * 158 * Output: 159 * eax uncopied bytes or 0 if successful. 160 */ 161SYM_FUNC_START(copy_user_generic_string) 162 ASM_STAC 163 cmpl $8,%edx 164 jb 2f /* less than 8 bytes, go to byte copy loop */ 165 ALIGN_DESTINATION 166 movl %edx,%ecx 167 shrl $3,%ecx 168 andl $7,%edx 1691: rep 170 movsq 1712: movl %edx,%ecx 1723: rep 173 movsb 174 xorl %eax,%eax 175 ASM_CLAC 176 ret 177 178 .section .fixup,"ax" 17911: leal (%rdx,%rcx,8),%ecx 18012: movl %ecx,%edx /* ecx is zerorest also */ 181 jmp .Lcopy_user_handle_tail 182 .previous 183 184 _ASM_EXTABLE_CPY(1b, 11b) 185 _ASM_EXTABLE_CPY(3b, 12b) 186SYM_FUNC_END(copy_user_generic_string) 187EXPORT_SYMBOL(copy_user_generic_string) 188 189/* 190 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. 191 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. 192 * 193 * Input: 194 * rdi destination 195 * rsi source 196 * rdx count 197 * 198 * Output: 199 * eax uncopied bytes or 0 if successful. 200 */ 201SYM_FUNC_START(copy_user_enhanced_fast_string) 202 ASM_STAC 203 cmpl $64,%edx 204 jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ 205 movl %edx,%ecx 2061: rep 207 movsb 208 xorl %eax,%eax 209 ASM_CLAC 210 ret 211 212 .section .fixup,"ax" 21312: movl %ecx,%edx /* ecx is zerorest also */ 214 jmp .Lcopy_user_handle_tail 215 .previous 216 217 _ASM_EXTABLE_CPY(1b, 12b) 218SYM_FUNC_END(copy_user_enhanced_fast_string) 219EXPORT_SYMBOL(copy_user_enhanced_fast_string) 220 221/* 222 * Try to copy last bytes and clear the rest if needed. 223 * Since protection fault in copy_from/to_user is not a normal situation, 224 * it is not necessary to optimize tail handling. 225 * Don't try to copy the tail if machine check happened 226 * 227 * Input: 228 * rdi destination 229 * rsi source 230 * rdx count 231 * 232 * Output: 233 * eax uncopied bytes or 0 if successful. 234 */ 235SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) 236 movl %edx,%ecx 237 cmp $X86_TRAP_MC,%eax /* check if X86_TRAP_MC */ 238 je 3f 2391: rep movsb 2402: mov %ecx,%eax 241 ASM_CLAC 242 ret 243 244 /* 245 * Return zero to pretend that this copy succeeded. This 246 * is counter-intuitive, but needed to prevent the code 247 * in lib/iov_iter.c from retrying and running back into 248 * the poison cache line again. The machine check handler 249 * will ensure that a SIGBUS is sent to the task. 250 */ 2513: xorl %eax,%eax 252 ASM_CLAC 253 ret 254 255 _ASM_EXTABLE_CPY(1b, 2b) 256SYM_CODE_END(.Lcopy_user_handle_tail) 257 258/* 259 * copy_user_nocache - Uncached memory copy with exception handling 260 * This will force destination out of cache for more performance. 261 * 262 * Note: Cached memory copy is used when destination or size is not 263 * naturally aligned. That is: 264 * - Require 8-byte alignment when size is 8 bytes or larger. 265 * - Require 4-byte alignment when size is 4 bytes. 266 */ 267SYM_FUNC_START(__copy_user_nocache) 268 ASM_STAC 269 270 /* If size is less than 8 bytes, go to 4-byte copy */ 271 cmpl $8,%edx 272 jb .L_4b_nocache_copy_entry 273 274 /* If destination is not 8-byte aligned, "cache" copy to align it */ 275 ALIGN_DESTINATION 276 277 /* Set 4x8-byte copy count and remainder */ 278 movl %edx,%ecx 279 andl $63,%edx 280 shrl $6,%ecx 281 jz .L_8b_nocache_copy_entry /* jump if count is 0 */ 282 283 /* Perform 4x8-byte nocache loop-copy */ 284.L_4x8b_nocache_copy_loop: 2851: movq (%rsi),%r8 2862: movq 1*8(%rsi),%r9 2873: movq 2*8(%rsi),%r10 2884: movq 3*8(%rsi),%r11 2895: movnti %r8,(%rdi) 2906: movnti %r9,1*8(%rdi) 2917: movnti %r10,2*8(%rdi) 2928: movnti %r11,3*8(%rdi) 2939: movq 4*8(%rsi),%r8 29410: movq 5*8(%rsi),%r9 29511: movq 6*8(%rsi),%r10 29612: movq 7*8(%rsi),%r11 29713: movnti %r8,4*8(%rdi) 29814: movnti %r9,5*8(%rdi) 29915: movnti %r10,6*8(%rdi) 30016: movnti %r11,7*8(%rdi) 301 leaq 64(%rsi),%rsi 302 leaq 64(%rdi),%rdi 303 decl %ecx 304 jnz .L_4x8b_nocache_copy_loop 305 306 /* Set 8-byte copy count and remainder */ 307.L_8b_nocache_copy_entry: 308 movl %edx,%ecx 309 andl $7,%edx 310 shrl $3,%ecx 311 jz .L_4b_nocache_copy_entry /* jump if count is 0 */ 312 313 /* Perform 8-byte nocache loop-copy */ 314.L_8b_nocache_copy_loop: 31520: movq (%rsi),%r8 31621: movnti %r8,(%rdi) 317 leaq 8(%rsi),%rsi 318 leaq 8(%rdi),%rdi 319 decl %ecx 320 jnz .L_8b_nocache_copy_loop 321 322 /* If no byte left, we're done */ 323.L_4b_nocache_copy_entry: 324 andl %edx,%edx 325 jz .L_finish_copy 326 327 /* If destination is not 4-byte aligned, go to byte copy: */ 328 movl %edi,%ecx 329 andl $3,%ecx 330 jnz .L_1b_cache_copy_entry 331 332 /* Set 4-byte copy count (1 or 0) and remainder */ 333 movl %edx,%ecx 334 andl $3,%edx 335 shrl $2,%ecx 336 jz .L_1b_cache_copy_entry /* jump if count is 0 */ 337 338 /* Perform 4-byte nocache copy: */ 33930: movl (%rsi),%r8d 34031: movnti %r8d,(%rdi) 341 leaq 4(%rsi),%rsi 342 leaq 4(%rdi),%rdi 343 344 /* If no bytes left, we're done: */ 345 andl %edx,%edx 346 jz .L_finish_copy 347 348 /* Perform byte "cache" loop-copy for the remainder */ 349.L_1b_cache_copy_entry: 350 movl %edx,%ecx 351.L_1b_cache_copy_loop: 35240: movb (%rsi),%al 35341: movb %al,(%rdi) 354 incq %rsi 355 incq %rdi 356 decl %ecx 357 jnz .L_1b_cache_copy_loop 358 359 /* Finished copying; fence the prior stores */ 360.L_finish_copy: 361 xorl %eax,%eax 362 ASM_CLAC 363 sfence 364 ret 365 366 .section .fixup,"ax" 367.L_fixup_4x8b_copy: 368 shll $6,%ecx 369 addl %ecx,%edx 370 jmp .L_fixup_handle_tail 371.L_fixup_8b_copy: 372 lea (%rdx,%rcx,8),%rdx 373 jmp .L_fixup_handle_tail 374.L_fixup_4b_copy: 375 lea (%rdx,%rcx,4),%rdx 376 jmp .L_fixup_handle_tail 377.L_fixup_1b_copy: 378 movl %ecx,%edx 379.L_fixup_handle_tail: 380 sfence 381 jmp .Lcopy_user_handle_tail 382 .previous 383 384 _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy) 385 _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy) 386 _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy) 387 _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy) 388 _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy) 389 _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy) 390 _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy) 391 _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy) 392 _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy) 393 _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy) 394 _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy) 395 _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy) 396 _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy) 397 _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy) 398 _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy) 399 _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy) 400 _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy) 401 _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy) 402 _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy) 403 _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy) 404 _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy) 405 _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy) 406SYM_FUNC_END(__copy_user_nocache) 407EXPORT_SYMBOL(__copy_user_nocache) 408