1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> 4 * Copyright 2002 Andi Kleen, SuSE Labs. 5 * 6 * Functions to copy from and to user space. 7 */ 8 9#include <linux/linkage.h> 10#include <asm/current.h> 11#include <asm/asm-offsets.h> 12#include <asm/thread_info.h> 13#include <asm/cpufeatures.h> 14#include <asm/alternative.h> 15#include <asm/asm.h> 16#include <asm/smap.h> 17#include <asm/export.h> 18#include <asm/trapnr.h> 19 20.macro ALIGN_DESTINATION 21 /* check for bad alignment of destination */ 22 movl %edi,%ecx 23 andl $7,%ecx 24 jz 102f /* already aligned */ 25 subl $8,%ecx 26 negl %ecx 27 subl %ecx,%edx 28100: movb (%rsi),%al 29101: movb %al,(%rdi) 30 incq %rsi 31 incq %rdi 32 decl %ecx 33 jnz 100b 34102: 35 36 _ASM_EXTABLE_CPY(100b, .Lcopy_user_handle_align) 37 _ASM_EXTABLE_CPY(101b, .Lcopy_user_handle_align) 38.endm 39 40/* 41 * copy_user_generic_unrolled - memory copy with exception handling. 42 * This version is for CPUs like P4 that don't have efficient micro 43 * code for rep movsq 44 * 45 * Input: 46 * rdi destination 47 * rsi source 48 * rdx count 49 * 50 * Output: 51 * eax uncopied bytes or 0 if successful. 52 */ 53SYM_FUNC_START(copy_user_generic_unrolled) 54 ASM_STAC 55 cmpl $8,%edx 56 jb 20f /* less then 8 bytes, go to byte copy loop */ 57 ALIGN_DESTINATION 58 movl %edx,%ecx 59 andl $63,%edx 60 shrl $6,%ecx 61 jz .L_copy_short_string 621: movq (%rsi),%r8 632: movq 1*8(%rsi),%r9 643: movq 2*8(%rsi),%r10 654: movq 3*8(%rsi),%r11 665: movq %r8,(%rdi) 676: movq %r9,1*8(%rdi) 687: movq %r10,2*8(%rdi) 698: movq %r11,3*8(%rdi) 709: movq 4*8(%rsi),%r8 7110: movq 5*8(%rsi),%r9 7211: movq 6*8(%rsi),%r10 7312: movq 7*8(%rsi),%r11 7413: movq %r8,4*8(%rdi) 7514: movq %r9,5*8(%rdi) 7615: movq %r10,6*8(%rdi) 7716: movq %r11,7*8(%rdi) 78 leaq 64(%rsi),%rsi 79 leaq 64(%rdi),%rdi 80 decl %ecx 81 jnz 1b 82.L_copy_short_string: 83 movl %edx,%ecx 84 andl $7,%edx 85 shrl $3,%ecx 86 jz 20f 8718: movq (%rsi),%r8 8819: movq %r8,(%rdi) 89 leaq 8(%rsi),%rsi 90 leaq 8(%rdi),%rdi 91 decl %ecx 92 jnz 18b 9320: andl %edx,%edx 94 jz 23f 95 movl %edx,%ecx 9621: movb (%rsi),%al 9722: movb %al,(%rdi) 98 incq %rsi 99 incq %rdi 100 decl %ecx 101 jnz 21b 10223: xor %eax,%eax 103 ASM_CLAC 104 RET 105 10630: shll $6,%ecx 107 addl %ecx,%edx 108 jmp 60f 10940: leal (%rdx,%rcx,8),%edx 110 jmp 60f 11150: movl %ecx,%edx 11260: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */ 113 114 _ASM_EXTABLE_CPY(1b, 30b) 115 _ASM_EXTABLE_CPY(2b, 30b) 116 _ASM_EXTABLE_CPY(3b, 30b) 117 _ASM_EXTABLE_CPY(4b, 30b) 118 _ASM_EXTABLE_CPY(5b, 30b) 119 _ASM_EXTABLE_CPY(6b, 30b) 120 _ASM_EXTABLE_CPY(7b, 30b) 121 _ASM_EXTABLE_CPY(8b, 30b) 122 _ASM_EXTABLE_CPY(9b, 30b) 123 _ASM_EXTABLE_CPY(10b, 30b) 124 _ASM_EXTABLE_CPY(11b, 30b) 125 _ASM_EXTABLE_CPY(12b, 30b) 126 _ASM_EXTABLE_CPY(13b, 30b) 127 _ASM_EXTABLE_CPY(14b, 30b) 128 _ASM_EXTABLE_CPY(15b, 30b) 129 _ASM_EXTABLE_CPY(16b, 30b) 130 _ASM_EXTABLE_CPY(18b, 40b) 131 _ASM_EXTABLE_CPY(19b, 40b) 132 _ASM_EXTABLE_CPY(21b, 50b) 133 _ASM_EXTABLE_CPY(22b, 50b) 134SYM_FUNC_END(copy_user_generic_unrolled) 135EXPORT_SYMBOL(copy_user_generic_unrolled) 136 137/* Some CPUs run faster using the string copy instructions. 138 * This is also a lot simpler. Use them when possible. 139 * 140 * Only 4GB of copy is supported. This shouldn't be a problem 141 * because the kernel normally only writes from/to page sized chunks 142 * even if user space passed a longer buffer. 143 * And more would be dangerous because both Intel and AMD have 144 * errata with rep movsq > 4GB. If someone feels the need to fix 145 * this please consider this. 146 * 147 * Input: 148 * rdi destination 149 * rsi source 150 * rdx count 151 * 152 * Output: 153 * eax uncopied bytes or 0 if successful. 154 */ 155SYM_FUNC_START(copy_user_generic_string) 156 ASM_STAC 157 cmpl $8,%edx 158 jb 2f /* less than 8 bytes, go to byte copy loop */ 159 ALIGN_DESTINATION 160 movl %edx,%ecx 161 shrl $3,%ecx 162 andl $7,%edx 1631: rep movsq 1642: movl %edx,%ecx 1653: rep movsb 166 xorl %eax,%eax 167 ASM_CLAC 168 RET 169 17011: leal (%rdx,%rcx,8),%ecx 17112: movl %ecx,%edx /* ecx is zerorest also */ 172 jmp .Lcopy_user_handle_tail 173 174 _ASM_EXTABLE_CPY(1b, 11b) 175 _ASM_EXTABLE_CPY(3b, 12b) 176SYM_FUNC_END(copy_user_generic_string) 177EXPORT_SYMBOL(copy_user_generic_string) 178 179/* 180 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. 181 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. 182 * 183 * Input: 184 * rdi destination 185 * rsi source 186 * rdx count 187 * 188 * Output: 189 * eax uncopied bytes or 0 if successful. 190 */ 191SYM_FUNC_START(copy_user_enhanced_fast_string) 192 ASM_STAC 193 /* CPUs without FSRM should avoid rep movsb for short copies */ 194 ALTERNATIVE "cmpl $64, %edx; jb .L_copy_short_string", "", X86_FEATURE_FSRM 195 movl %edx,%ecx 1961: rep movsb 197 xorl %eax,%eax 198 ASM_CLAC 199 RET 200 20112: movl %ecx,%edx /* ecx is zerorest also */ 202 jmp .Lcopy_user_handle_tail 203 204 _ASM_EXTABLE_CPY(1b, 12b) 205SYM_FUNC_END(copy_user_enhanced_fast_string) 206EXPORT_SYMBOL(copy_user_enhanced_fast_string) 207 208/* 209 * Try to copy last bytes and clear the rest if needed. 210 * Since protection fault in copy_from/to_user is not a normal situation, 211 * it is not necessary to optimize tail handling. 212 * Don't try to copy the tail if machine check happened 213 * 214 * Input: 215 * eax trap number written by ex_handler_copy() 216 * rdi destination 217 * rsi source 218 * rdx count 219 * 220 * Output: 221 * eax uncopied bytes or 0 if successful. 222 */ 223SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) 224 cmp $X86_TRAP_MC,%eax 225 je 3f 226 227 movl %edx,%ecx 2281: rep movsb 2292: mov %ecx,%eax 230 ASM_CLAC 231 RET 232 2333: 234 movl %edx,%eax 235 ASM_CLAC 236 RET 237 238 _ASM_EXTABLE_CPY(1b, 2b) 239 240.Lcopy_user_handle_align: 241 addl %ecx,%edx /* ecx is zerorest also */ 242 jmp .Lcopy_user_handle_tail 243 244SYM_CODE_END(.Lcopy_user_handle_tail) 245 246/* 247 * copy_user_nocache - Uncached memory copy with exception handling 248 * This will force destination out of cache for more performance. 249 * 250 * Note: Cached memory copy is used when destination or size is not 251 * naturally aligned. That is: 252 * - Require 8-byte alignment when size is 8 bytes or larger. 253 * - Require 4-byte alignment when size is 4 bytes. 254 */ 255SYM_FUNC_START(__copy_user_nocache) 256 ASM_STAC 257 258 /* If size is less than 8 bytes, go to 4-byte copy */ 259 cmpl $8,%edx 260 jb .L_4b_nocache_copy_entry 261 262 /* If destination is not 8-byte aligned, "cache" copy to align it */ 263 ALIGN_DESTINATION 264 265 /* Set 4x8-byte copy count and remainder */ 266 movl %edx,%ecx 267 andl $63,%edx 268 shrl $6,%ecx 269 jz .L_8b_nocache_copy_entry /* jump if count is 0 */ 270 271 /* Perform 4x8-byte nocache loop-copy */ 272.L_4x8b_nocache_copy_loop: 2731: movq (%rsi),%r8 2742: movq 1*8(%rsi),%r9 2753: movq 2*8(%rsi),%r10 2764: movq 3*8(%rsi),%r11 2775: movnti %r8,(%rdi) 2786: movnti %r9,1*8(%rdi) 2797: movnti %r10,2*8(%rdi) 2808: movnti %r11,3*8(%rdi) 2819: movq 4*8(%rsi),%r8 28210: movq 5*8(%rsi),%r9 28311: movq 6*8(%rsi),%r10 28412: movq 7*8(%rsi),%r11 28513: movnti %r8,4*8(%rdi) 28614: movnti %r9,5*8(%rdi) 28715: movnti %r10,6*8(%rdi) 28816: movnti %r11,7*8(%rdi) 289 leaq 64(%rsi),%rsi 290 leaq 64(%rdi),%rdi 291 decl %ecx 292 jnz .L_4x8b_nocache_copy_loop 293 294 /* Set 8-byte copy count and remainder */ 295.L_8b_nocache_copy_entry: 296 movl %edx,%ecx 297 andl $7,%edx 298 shrl $3,%ecx 299 jz .L_4b_nocache_copy_entry /* jump if count is 0 */ 300 301 /* Perform 8-byte nocache loop-copy */ 302.L_8b_nocache_copy_loop: 30320: movq (%rsi),%r8 30421: movnti %r8,(%rdi) 305 leaq 8(%rsi),%rsi 306 leaq 8(%rdi),%rdi 307 decl %ecx 308 jnz .L_8b_nocache_copy_loop 309 310 /* If no byte left, we're done */ 311.L_4b_nocache_copy_entry: 312 andl %edx,%edx 313 jz .L_finish_copy 314 315 /* If destination is not 4-byte aligned, go to byte copy: */ 316 movl %edi,%ecx 317 andl $3,%ecx 318 jnz .L_1b_cache_copy_entry 319 320 /* Set 4-byte copy count (1 or 0) and remainder */ 321 movl %edx,%ecx 322 andl $3,%edx 323 shrl $2,%ecx 324 jz .L_1b_cache_copy_entry /* jump if count is 0 */ 325 326 /* Perform 4-byte nocache copy: */ 32730: movl (%rsi),%r8d 32831: movnti %r8d,(%rdi) 329 leaq 4(%rsi),%rsi 330 leaq 4(%rdi),%rdi 331 332 /* If no bytes left, we're done: */ 333 andl %edx,%edx 334 jz .L_finish_copy 335 336 /* Perform byte "cache" loop-copy for the remainder */ 337.L_1b_cache_copy_entry: 338 movl %edx,%ecx 339.L_1b_cache_copy_loop: 34040: movb (%rsi),%al 34141: movb %al,(%rdi) 342 incq %rsi 343 incq %rdi 344 decl %ecx 345 jnz .L_1b_cache_copy_loop 346 347 /* Finished copying; fence the prior stores */ 348.L_finish_copy: 349 xorl %eax,%eax 350 ASM_CLAC 351 sfence 352 RET 353 354.L_fixup_4x8b_copy: 355 shll $6,%ecx 356 addl %ecx,%edx 357 jmp .L_fixup_handle_tail 358.L_fixup_8b_copy: 359 lea (%rdx,%rcx,8),%rdx 360 jmp .L_fixup_handle_tail 361.L_fixup_4b_copy: 362 lea (%rdx,%rcx,4),%rdx 363 jmp .L_fixup_handle_tail 364.L_fixup_1b_copy: 365 movl %ecx,%edx 366.L_fixup_handle_tail: 367 sfence 368 jmp .Lcopy_user_handle_tail 369 370 _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy) 371 _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy) 372 _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy) 373 _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy) 374 _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy) 375 _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy) 376 _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy) 377 _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy) 378 _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy) 379 _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy) 380 _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy) 381 _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy) 382 _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy) 383 _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy) 384 _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy) 385 _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy) 386 _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy) 387 _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy) 388 _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy) 389 _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy) 390 _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy) 391 _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy) 392SYM_FUNC_END(__copy_user_nocache) 393EXPORT_SYMBOL(__copy_user_nocache) 394