1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> 4 * Copyright 2002 Andi Kleen, SuSE Labs. 5 * 6 * Functions to copy from and to user space. 7 */ 8 9#include <linux/linkage.h> 10#include <asm/current.h> 11#include <asm/asm-offsets.h> 12#include <asm/thread_info.h> 13#include <asm/cpufeatures.h> 14#include <asm/alternative-asm.h> 15#include <asm/asm.h> 16#include <asm/smap.h> 17#include <asm/export.h> 18 19.macro ALIGN_DESTINATION 20 /* check for bad alignment of destination */ 21 movl %edi,%ecx 22 andl $7,%ecx 23 jz 102f /* already aligned */ 24 subl $8,%ecx 25 negl %ecx 26 subl %ecx,%edx 27100: movb (%rsi),%al 28101: movb %al,(%rdi) 29 incq %rsi 30 incq %rdi 31 decl %ecx 32 jnz 100b 33102: 34 .section .fixup,"ax" 35103: addl %ecx,%edx /* ecx is zerorest also */ 36 jmp .Lcopy_user_handle_tail 37 .previous 38 39 _ASM_EXTABLE_UA(100b, 103b) 40 _ASM_EXTABLE_UA(101b, 103b) 41 .endm 42 43/* 44 * copy_user_generic_unrolled - memory copy with exception handling. 45 * This version is for CPUs like P4 that don't have efficient micro 46 * code for rep movsq 47 * 48 * Input: 49 * rdi destination 50 * rsi source 51 * rdx count 52 * 53 * Output: 54 * eax uncopied bytes or 0 if successful. 55 */ 56ENTRY(copy_user_generic_unrolled) 57 ASM_STAC 58 cmpl $8,%edx 59 jb 20f /* less then 8 bytes, go to byte copy loop */ 60 ALIGN_DESTINATION 61 movl %edx,%ecx 62 andl $63,%edx 63 shrl $6,%ecx 64 jz .L_copy_short_string 651: movq (%rsi),%r8 662: movq 1*8(%rsi),%r9 673: movq 2*8(%rsi),%r10 684: movq 3*8(%rsi),%r11 695: movq %r8,(%rdi) 706: movq %r9,1*8(%rdi) 717: movq %r10,2*8(%rdi) 728: movq %r11,3*8(%rdi) 739: movq 4*8(%rsi),%r8 7410: movq 5*8(%rsi),%r9 7511: movq 6*8(%rsi),%r10 7612: movq 7*8(%rsi),%r11 7713: movq %r8,4*8(%rdi) 7814: movq %r9,5*8(%rdi) 7915: movq %r10,6*8(%rdi) 8016: movq %r11,7*8(%rdi) 81 leaq 64(%rsi),%rsi 82 leaq 64(%rdi),%rdi 83 decl %ecx 84 jnz 1b 85.L_copy_short_string: 86 movl %edx,%ecx 87 andl $7,%edx 88 shrl $3,%ecx 89 jz 20f 9018: movq (%rsi),%r8 9119: movq %r8,(%rdi) 92 leaq 8(%rsi),%rsi 93 leaq 8(%rdi),%rdi 94 decl %ecx 95 jnz 18b 9620: andl %edx,%edx 97 jz 23f 98 movl %edx,%ecx 9921: movb (%rsi),%al 10022: movb %al,(%rdi) 101 incq %rsi 102 incq %rdi 103 decl %ecx 104 jnz 21b 10523: xor %eax,%eax 106 ASM_CLAC 107 ret 108 109 .section .fixup,"ax" 11030: shll $6,%ecx 111 addl %ecx,%edx 112 jmp 60f 11340: leal (%rdx,%rcx,8),%edx 114 jmp 60f 11550: movl %ecx,%edx 11660: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */ 117 .previous 118 119 _ASM_EXTABLE_UA(1b, 30b) 120 _ASM_EXTABLE_UA(2b, 30b) 121 _ASM_EXTABLE_UA(3b, 30b) 122 _ASM_EXTABLE_UA(4b, 30b) 123 _ASM_EXTABLE_UA(5b, 30b) 124 _ASM_EXTABLE_UA(6b, 30b) 125 _ASM_EXTABLE_UA(7b, 30b) 126 _ASM_EXTABLE_UA(8b, 30b) 127 _ASM_EXTABLE_UA(9b, 30b) 128 _ASM_EXTABLE_UA(10b, 30b) 129 _ASM_EXTABLE_UA(11b, 30b) 130 _ASM_EXTABLE_UA(12b, 30b) 131 _ASM_EXTABLE_UA(13b, 30b) 132 _ASM_EXTABLE_UA(14b, 30b) 133 _ASM_EXTABLE_UA(15b, 30b) 134 _ASM_EXTABLE_UA(16b, 30b) 135 _ASM_EXTABLE_UA(18b, 40b) 136 _ASM_EXTABLE_UA(19b, 40b) 137 _ASM_EXTABLE_UA(21b, 50b) 138 _ASM_EXTABLE_UA(22b, 50b) 139ENDPROC(copy_user_generic_unrolled) 140EXPORT_SYMBOL(copy_user_generic_unrolled) 141 142/* Some CPUs run faster using the string copy instructions. 143 * This is also a lot simpler. Use them when possible. 144 * 145 * Only 4GB of copy is supported. This shouldn't be a problem 146 * because the kernel normally only writes from/to page sized chunks 147 * even if user space passed a longer buffer. 148 * And more would be dangerous because both Intel and AMD have 149 * errata with rep movsq > 4GB. If someone feels the need to fix 150 * this please consider this. 151 * 152 * Input: 153 * rdi destination 154 * rsi source 155 * rdx count 156 * 157 * Output: 158 * eax uncopied bytes or 0 if successful. 159 */ 160ENTRY(copy_user_generic_string) 161 ASM_STAC 162 cmpl $8,%edx 163 jb 2f /* less than 8 bytes, go to byte copy loop */ 164 ALIGN_DESTINATION 165 movl %edx,%ecx 166 shrl $3,%ecx 167 andl $7,%edx 1681: rep 169 movsq 1702: movl %edx,%ecx 1713: rep 172 movsb 173 xorl %eax,%eax 174 ASM_CLAC 175 ret 176 177 .section .fixup,"ax" 17811: leal (%rdx,%rcx,8),%ecx 17912: movl %ecx,%edx /* ecx is zerorest also */ 180 jmp .Lcopy_user_handle_tail 181 .previous 182 183 _ASM_EXTABLE_UA(1b, 11b) 184 _ASM_EXTABLE_UA(3b, 12b) 185ENDPROC(copy_user_generic_string) 186EXPORT_SYMBOL(copy_user_generic_string) 187 188/* 189 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. 190 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. 191 * 192 * Input: 193 * rdi destination 194 * rsi source 195 * rdx count 196 * 197 * Output: 198 * eax uncopied bytes or 0 if successful. 199 */ 200ENTRY(copy_user_enhanced_fast_string) 201 ASM_STAC 202 cmpl $64,%edx 203 jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ 204 movl %edx,%ecx 2051: rep 206 movsb 207 xorl %eax,%eax 208 ASM_CLAC 209 ret 210 211 .section .fixup,"ax" 21212: movl %ecx,%edx /* ecx is zerorest also */ 213 jmp .Lcopy_user_handle_tail 214 .previous 215 216 _ASM_EXTABLE_UA(1b, 12b) 217ENDPROC(copy_user_enhanced_fast_string) 218EXPORT_SYMBOL(copy_user_enhanced_fast_string) 219 220/* 221 * Try to copy last bytes and clear the rest if needed. 222 * Since protection fault in copy_from/to_user is not a normal situation, 223 * it is not necessary to optimize tail handling. 224 * 225 * Input: 226 * rdi destination 227 * rsi source 228 * rdx count 229 * 230 * Output: 231 * eax uncopied bytes or 0 if successful. 232 */ 233ALIGN; 234.Lcopy_user_handle_tail: 235 movl %edx,%ecx 2361: rep movsb 2372: mov %ecx,%eax 238 ASM_CLAC 239 ret 240 241 _ASM_EXTABLE_UA(1b, 2b) 242END(.Lcopy_user_handle_tail) 243 244/* 245 * copy_user_nocache - Uncached memory copy with exception handling 246 * This will force destination out of cache for more performance. 247 * 248 * Note: Cached memory copy is used when destination or size is not 249 * naturally aligned. That is: 250 * - Require 8-byte alignment when size is 8 bytes or larger. 251 * - Require 4-byte alignment when size is 4 bytes. 252 */ 253ENTRY(__copy_user_nocache) 254 ASM_STAC 255 256 /* If size is less than 8 bytes, go to 4-byte copy */ 257 cmpl $8,%edx 258 jb .L_4b_nocache_copy_entry 259 260 /* If destination is not 8-byte aligned, "cache" copy to align it */ 261 ALIGN_DESTINATION 262 263 /* Set 4x8-byte copy count and remainder */ 264 movl %edx,%ecx 265 andl $63,%edx 266 shrl $6,%ecx 267 jz .L_8b_nocache_copy_entry /* jump if count is 0 */ 268 269 /* Perform 4x8-byte nocache loop-copy */ 270.L_4x8b_nocache_copy_loop: 2711: movq (%rsi),%r8 2722: movq 1*8(%rsi),%r9 2733: movq 2*8(%rsi),%r10 2744: movq 3*8(%rsi),%r11 2755: movnti %r8,(%rdi) 2766: movnti %r9,1*8(%rdi) 2777: movnti %r10,2*8(%rdi) 2788: movnti %r11,3*8(%rdi) 2799: movq 4*8(%rsi),%r8 28010: movq 5*8(%rsi),%r9 28111: movq 6*8(%rsi),%r10 28212: movq 7*8(%rsi),%r11 28313: movnti %r8,4*8(%rdi) 28414: movnti %r9,5*8(%rdi) 28515: movnti %r10,6*8(%rdi) 28616: movnti %r11,7*8(%rdi) 287 leaq 64(%rsi),%rsi 288 leaq 64(%rdi),%rdi 289 decl %ecx 290 jnz .L_4x8b_nocache_copy_loop 291 292 /* Set 8-byte copy count and remainder */ 293.L_8b_nocache_copy_entry: 294 movl %edx,%ecx 295 andl $7,%edx 296 shrl $3,%ecx 297 jz .L_4b_nocache_copy_entry /* jump if count is 0 */ 298 299 /* Perform 8-byte nocache loop-copy */ 300.L_8b_nocache_copy_loop: 30120: movq (%rsi),%r8 30221: movnti %r8,(%rdi) 303 leaq 8(%rsi),%rsi 304 leaq 8(%rdi),%rdi 305 decl %ecx 306 jnz .L_8b_nocache_copy_loop 307 308 /* If no byte left, we're done */ 309.L_4b_nocache_copy_entry: 310 andl %edx,%edx 311 jz .L_finish_copy 312 313 /* If destination is not 4-byte aligned, go to byte copy: */ 314 movl %edi,%ecx 315 andl $3,%ecx 316 jnz .L_1b_cache_copy_entry 317 318 /* Set 4-byte copy count (1 or 0) and remainder */ 319 movl %edx,%ecx 320 andl $3,%edx 321 shrl $2,%ecx 322 jz .L_1b_cache_copy_entry /* jump if count is 0 */ 323 324 /* Perform 4-byte nocache copy: */ 32530: movl (%rsi),%r8d 32631: movnti %r8d,(%rdi) 327 leaq 4(%rsi),%rsi 328 leaq 4(%rdi),%rdi 329 330 /* If no bytes left, we're done: */ 331 andl %edx,%edx 332 jz .L_finish_copy 333 334 /* Perform byte "cache" loop-copy for the remainder */ 335.L_1b_cache_copy_entry: 336 movl %edx,%ecx 337.L_1b_cache_copy_loop: 33840: movb (%rsi),%al 33941: movb %al,(%rdi) 340 incq %rsi 341 incq %rdi 342 decl %ecx 343 jnz .L_1b_cache_copy_loop 344 345 /* Finished copying; fence the prior stores */ 346.L_finish_copy: 347 xorl %eax,%eax 348 ASM_CLAC 349 sfence 350 ret 351 352 .section .fixup,"ax" 353.L_fixup_4x8b_copy: 354 shll $6,%ecx 355 addl %ecx,%edx 356 jmp .L_fixup_handle_tail 357.L_fixup_8b_copy: 358 lea (%rdx,%rcx,8),%rdx 359 jmp .L_fixup_handle_tail 360.L_fixup_4b_copy: 361 lea (%rdx,%rcx,4),%rdx 362 jmp .L_fixup_handle_tail 363.L_fixup_1b_copy: 364 movl %ecx,%edx 365.L_fixup_handle_tail: 366 sfence 367 jmp .Lcopy_user_handle_tail 368 .previous 369 370 _ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy) 371 _ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy) 372 _ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy) 373 _ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy) 374 _ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy) 375 _ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy) 376 _ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy) 377 _ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy) 378 _ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy) 379 _ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy) 380 _ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy) 381 _ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy) 382 _ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy) 383 _ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy) 384 _ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy) 385 _ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy) 386 _ASM_EXTABLE_UA(20b, .L_fixup_8b_copy) 387 _ASM_EXTABLE_UA(21b, .L_fixup_8b_copy) 388 _ASM_EXTABLE_UA(30b, .L_fixup_4b_copy) 389 _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy) 390 _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy) 391 _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy) 392ENDPROC(__copy_user_nocache) 393EXPORT_SYMBOL(__copy_user_nocache) 394