1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> 4 * Copyright 2002 Andi Kleen, SuSE Labs. 5 * 6 * Functions to copy from and to user space. 7 */ 8 9#include <linux/linkage.h> 10#include <asm/current.h> 11#include <asm/asm-offsets.h> 12#include <asm/thread_info.h> 13#include <asm/cpufeatures.h> 14#include <asm/alternative.h> 15#include <asm/asm.h> 16#include <asm/smap.h> 17#include <asm/export.h> 18#include <asm/trapnr.h> 19 20.macro ALIGN_DESTINATION 21 /* check for bad alignment of destination */ 22 movl %edi,%ecx 23 andl $7,%ecx 24 jz 102f /* already aligned */ 25 subl $8,%ecx 26 negl %ecx 27 subl %ecx,%edx 28100: movb (%rsi),%al 29101: movb %al,(%rdi) 30 incq %rsi 31 incq %rdi 32 decl %ecx 33 jnz 100b 34102: 35 .section .fixup,"ax" 36103: addl %ecx,%edx /* ecx is zerorest also */ 37 jmp .Lcopy_user_handle_tail 38 .previous 39 40 _ASM_EXTABLE_CPY(100b, 103b) 41 _ASM_EXTABLE_CPY(101b, 103b) 42 .endm 43 44/* 45 * copy_user_generic_unrolled - memory copy with exception handling. 46 * This version is for CPUs like P4 that don't have efficient micro 47 * code for rep movsq 48 * 49 * Input: 50 * rdi destination 51 * rsi source 52 * rdx count 53 * 54 * Output: 55 * eax uncopied bytes or 0 if successful. 56 */ 57SYM_FUNC_START(copy_user_generic_unrolled) 58 ASM_STAC 59 cmpl $8,%edx 60 jb 20f /* less then 8 bytes, go to byte copy loop */ 61 ALIGN_DESTINATION 62 movl %edx,%ecx 63 andl $63,%edx 64 shrl $6,%ecx 65 jz .L_copy_short_string 661: movq (%rsi),%r8 672: movq 1*8(%rsi),%r9 683: movq 2*8(%rsi),%r10 694: movq 3*8(%rsi),%r11 705: movq %r8,(%rdi) 716: movq %r9,1*8(%rdi) 727: movq %r10,2*8(%rdi) 738: movq %r11,3*8(%rdi) 749: movq 4*8(%rsi),%r8 7510: movq 5*8(%rsi),%r9 7611: movq 6*8(%rsi),%r10 7712: movq 7*8(%rsi),%r11 7813: movq %r8,4*8(%rdi) 7914: movq %r9,5*8(%rdi) 8015: movq %r10,6*8(%rdi) 8116: movq %r11,7*8(%rdi) 82 leaq 64(%rsi),%rsi 83 leaq 64(%rdi),%rdi 84 decl %ecx 85 jnz 1b 86.L_copy_short_string: 87 movl %edx,%ecx 88 andl $7,%edx 89 shrl $3,%ecx 90 jz 20f 9118: movq (%rsi),%r8 9219: movq %r8,(%rdi) 93 leaq 8(%rsi),%rsi 94 leaq 8(%rdi),%rdi 95 decl %ecx 96 jnz 18b 9720: andl %edx,%edx 98 jz 23f 99 movl %edx,%ecx 10021: movb (%rsi),%al 10122: movb %al,(%rdi) 102 incq %rsi 103 incq %rdi 104 decl %ecx 105 jnz 21b 10623: xor %eax,%eax 107 ASM_CLAC 108 ret 109 110 .section .fixup,"ax" 11130: shll $6,%ecx 112 addl %ecx,%edx 113 jmp 60f 11440: leal (%rdx,%rcx,8),%edx 115 jmp 60f 11650: movl %ecx,%edx 11760: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */ 118 .previous 119 120 _ASM_EXTABLE_CPY(1b, 30b) 121 _ASM_EXTABLE_CPY(2b, 30b) 122 _ASM_EXTABLE_CPY(3b, 30b) 123 _ASM_EXTABLE_CPY(4b, 30b) 124 _ASM_EXTABLE_CPY(5b, 30b) 125 _ASM_EXTABLE_CPY(6b, 30b) 126 _ASM_EXTABLE_CPY(7b, 30b) 127 _ASM_EXTABLE_CPY(8b, 30b) 128 _ASM_EXTABLE_CPY(9b, 30b) 129 _ASM_EXTABLE_CPY(10b, 30b) 130 _ASM_EXTABLE_CPY(11b, 30b) 131 _ASM_EXTABLE_CPY(12b, 30b) 132 _ASM_EXTABLE_CPY(13b, 30b) 133 _ASM_EXTABLE_CPY(14b, 30b) 134 _ASM_EXTABLE_CPY(15b, 30b) 135 _ASM_EXTABLE_CPY(16b, 30b) 136 _ASM_EXTABLE_CPY(18b, 40b) 137 _ASM_EXTABLE_CPY(19b, 40b) 138 _ASM_EXTABLE_CPY(21b, 50b) 139 _ASM_EXTABLE_CPY(22b, 50b) 140SYM_FUNC_END(copy_user_generic_unrolled) 141EXPORT_SYMBOL(copy_user_generic_unrolled) 142 143/* Some CPUs run faster using the string copy instructions. 144 * This is also a lot simpler. Use them when possible. 145 * 146 * Only 4GB of copy is supported. This shouldn't be a problem 147 * because the kernel normally only writes from/to page sized chunks 148 * even if user space passed a longer buffer. 149 * And more would be dangerous because both Intel and AMD have 150 * errata with rep movsq > 4GB. If someone feels the need to fix 151 * this please consider this. 152 * 153 * Input: 154 * rdi destination 155 * rsi source 156 * rdx count 157 * 158 * Output: 159 * eax uncopied bytes or 0 if successful. 160 */ 161SYM_FUNC_START(copy_user_generic_string) 162 ASM_STAC 163 cmpl $8,%edx 164 jb 2f /* less than 8 bytes, go to byte copy loop */ 165 ALIGN_DESTINATION 166 movl %edx,%ecx 167 shrl $3,%ecx 168 andl $7,%edx 1691: rep 170 movsq 1712: movl %edx,%ecx 1723: rep 173 movsb 174 xorl %eax,%eax 175 ASM_CLAC 176 ret 177 178 .section .fixup,"ax" 17911: leal (%rdx,%rcx,8),%ecx 18012: movl %ecx,%edx /* ecx is zerorest also */ 181 jmp .Lcopy_user_handle_tail 182 .previous 183 184 _ASM_EXTABLE_CPY(1b, 11b) 185 _ASM_EXTABLE_CPY(3b, 12b) 186SYM_FUNC_END(copy_user_generic_string) 187EXPORT_SYMBOL(copy_user_generic_string) 188 189/* 190 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. 191 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. 192 * 193 * Input: 194 * rdi destination 195 * rsi source 196 * rdx count 197 * 198 * Output: 199 * eax uncopied bytes or 0 if successful. 200 */ 201SYM_FUNC_START(copy_user_enhanced_fast_string) 202 ASM_STAC 203 cmpl $64,%edx 204 jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ 205 movl %edx,%ecx 2061: rep 207 movsb 208 xorl %eax,%eax 209 ASM_CLAC 210 ret 211 212 .section .fixup,"ax" 21312: movl %ecx,%edx /* ecx is zerorest also */ 214 jmp .Lcopy_user_handle_tail 215 .previous 216 217 _ASM_EXTABLE_CPY(1b, 12b) 218SYM_FUNC_END(copy_user_enhanced_fast_string) 219EXPORT_SYMBOL(copy_user_enhanced_fast_string) 220 221/* 222 * Try to copy last bytes and clear the rest if needed. 223 * Since protection fault in copy_from/to_user is not a normal situation, 224 * it is not necessary to optimize tail handling. 225 * Don't try to copy the tail if machine check happened 226 * 227 * Input: 228 * rdi destination 229 * rsi source 230 * rdx count 231 * 232 * Output: 233 * eax uncopied bytes or 0 if successful. 234 */ 235SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) 236 movl %edx,%ecx 2371: rep movsb 2382: mov %ecx,%eax 239 ASM_CLAC 240 ret 241 242 _ASM_EXTABLE_CPY(1b, 2b) 243SYM_CODE_END(.Lcopy_user_handle_tail) 244 245/* 246 * copy_user_nocache - Uncached memory copy with exception handling 247 * This will force destination out of cache for more performance. 248 * 249 * Note: Cached memory copy is used when destination or size is not 250 * naturally aligned. That is: 251 * - Require 8-byte alignment when size is 8 bytes or larger. 252 * - Require 4-byte alignment when size is 4 bytes. 253 */ 254SYM_FUNC_START(__copy_user_nocache) 255 ASM_STAC 256 257 /* If size is less than 8 bytes, go to 4-byte copy */ 258 cmpl $8,%edx 259 jb .L_4b_nocache_copy_entry 260 261 /* If destination is not 8-byte aligned, "cache" copy to align it */ 262 ALIGN_DESTINATION 263 264 /* Set 4x8-byte copy count and remainder */ 265 movl %edx,%ecx 266 andl $63,%edx 267 shrl $6,%ecx 268 jz .L_8b_nocache_copy_entry /* jump if count is 0 */ 269 270 /* Perform 4x8-byte nocache loop-copy */ 271.L_4x8b_nocache_copy_loop: 2721: movq (%rsi),%r8 2732: movq 1*8(%rsi),%r9 2743: movq 2*8(%rsi),%r10 2754: movq 3*8(%rsi),%r11 2765: movnti %r8,(%rdi) 2776: movnti %r9,1*8(%rdi) 2787: movnti %r10,2*8(%rdi) 2798: movnti %r11,3*8(%rdi) 2809: movq 4*8(%rsi),%r8 28110: movq 5*8(%rsi),%r9 28211: movq 6*8(%rsi),%r10 28312: movq 7*8(%rsi),%r11 28413: movnti %r8,4*8(%rdi) 28514: movnti %r9,5*8(%rdi) 28615: movnti %r10,6*8(%rdi) 28716: movnti %r11,7*8(%rdi) 288 leaq 64(%rsi),%rsi 289 leaq 64(%rdi),%rdi 290 decl %ecx 291 jnz .L_4x8b_nocache_copy_loop 292 293 /* Set 8-byte copy count and remainder */ 294.L_8b_nocache_copy_entry: 295 movl %edx,%ecx 296 andl $7,%edx 297 shrl $3,%ecx 298 jz .L_4b_nocache_copy_entry /* jump if count is 0 */ 299 300 /* Perform 8-byte nocache loop-copy */ 301.L_8b_nocache_copy_loop: 30220: movq (%rsi),%r8 30321: movnti %r8,(%rdi) 304 leaq 8(%rsi),%rsi 305 leaq 8(%rdi),%rdi 306 decl %ecx 307 jnz .L_8b_nocache_copy_loop 308 309 /* If no byte left, we're done */ 310.L_4b_nocache_copy_entry: 311 andl %edx,%edx 312 jz .L_finish_copy 313 314 /* If destination is not 4-byte aligned, go to byte copy: */ 315 movl %edi,%ecx 316 andl $3,%ecx 317 jnz .L_1b_cache_copy_entry 318 319 /* Set 4-byte copy count (1 or 0) and remainder */ 320 movl %edx,%ecx 321 andl $3,%edx 322 shrl $2,%ecx 323 jz .L_1b_cache_copy_entry /* jump if count is 0 */ 324 325 /* Perform 4-byte nocache copy: */ 32630: movl (%rsi),%r8d 32731: movnti %r8d,(%rdi) 328 leaq 4(%rsi),%rsi 329 leaq 4(%rdi),%rdi 330 331 /* If no bytes left, we're done: */ 332 andl %edx,%edx 333 jz .L_finish_copy 334 335 /* Perform byte "cache" loop-copy for the remainder */ 336.L_1b_cache_copy_entry: 337 movl %edx,%ecx 338.L_1b_cache_copy_loop: 33940: movb (%rsi),%al 34041: movb %al,(%rdi) 341 incq %rsi 342 incq %rdi 343 decl %ecx 344 jnz .L_1b_cache_copy_loop 345 346 /* Finished copying; fence the prior stores */ 347.L_finish_copy: 348 xorl %eax,%eax 349 ASM_CLAC 350 sfence 351 ret 352 353 .section .fixup,"ax" 354.L_fixup_4x8b_copy: 355 shll $6,%ecx 356 addl %ecx,%edx 357 jmp .L_fixup_handle_tail 358.L_fixup_8b_copy: 359 lea (%rdx,%rcx,8),%rdx 360 jmp .L_fixup_handle_tail 361.L_fixup_4b_copy: 362 lea (%rdx,%rcx,4),%rdx 363 jmp .L_fixup_handle_tail 364.L_fixup_1b_copy: 365 movl %ecx,%edx 366.L_fixup_handle_tail: 367 sfence 368 jmp .Lcopy_user_handle_tail 369 .previous 370 371 _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy) 372 _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy) 373 _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy) 374 _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy) 375 _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy) 376 _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy) 377 _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy) 378 _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy) 379 _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy) 380 _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy) 381 _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy) 382 _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy) 383 _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy) 384 _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy) 385 _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy) 386 _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy) 387 _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy) 388 _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy) 389 _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy) 390 _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy) 391 _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy) 392 _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy) 393SYM_FUNC_END(__copy_user_nocache) 394EXPORT_SYMBOL(__copy_user_nocache) 395