1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> 4 * Copyright 2002 Andi Kleen, SuSE Labs. 5 * 6 * Functions to copy from and to user space. 7 */ 8 9#include <linux/linkage.h> 10#include <asm/current.h> 11#include <asm/asm-offsets.h> 12#include <asm/thread_info.h> 13#include <asm/cpufeatures.h> 14#include <asm/alternative-asm.h> 15#include <asm/asm.h> 16#include <asm/smap.h> 17#include <asm/export.h> 18 19.macro ALIGN_DESTINATION 20 /* check for bad alignment of destination */ 21 movl %edi,%ecx 22 andl $7,%ecx 23 jz 102f /* already aligned */ 24 subl $8,%ecx 25 negl %ecx 26 subl %ecx,%edx 27100: movb (%rsi),%al 28101: movb %al,(%rdi) 29 incq %rsi 30 incq %rdi 31 decl %ecx 32 jnz 100b 33102: 34 .section .fixup,"ax" 35103: addl %ecx,%edx /* ecx is zerorest also */ 36 jmp .Lcopy_user_handle_tail 37 .previous 38 39 _ASM_EXTABLE_UA(100b, 103b) 40 _ASM_EXTABLE_UA(101b, 103b) 41 .endm 42 43/* 44 * copy_user_generic_unrolled - memory copy with exception handling. 45 * This version is for CPUs like P4 that don't have efficient micro 46 * code for rep movsq 47 * 48 * Input: 49 * rdi destination 50 * rsi source 51 * rdx count 52 * 53 * Output: 54 * eax uncopied bytes or 0 if successful. 55 */ 56SYM_FUNC_START(copy_user_generic_unrolled) 57 ASM_STAC 58 cmpl $8,%edx 59 jb 20f /* less then 8 bytes, go to byte copy loop */ 60 ALIGN_DESTINATION 61 movl %edx,%ecx 62 andl $63,%edx 63 shrl $6,%ecx 64 jz .L_copy_short_string 651: movq (%rsi),%r8 662: movq 1*8(%rsi),%r9 673: movq 2*8(%rsi),%r10 684: movq 3*8(%rsi),%r11 695: movq %r8,(%rdi) 706: movq %r9,1*8(%rdi) 717: movq %r10,2*8(%rdi) 728: movq %r11,3*8(%rdi) 739: movq 4*8(%rsi),%r8 7410: movq 5*8(%rsi),%r9 7511: movq 6*8(%rsi),%r10 7612: movq 7*8(%rsi),%r11 7713: movq %r8,4*8(%rdi) 7814: movq %r9,5*8(%rdi) 7915: movq %r10,6*8(%rdi) 8016: movq %r11,7*8(%rdi) 81 leaq 64(%rsi),%rsi 82 leaq 64(%rdi),%rdi 83 decl %ecx 84 jnz 1b 85.L_copy_short_string: 86 movl %edx,%ecx 87 andl $7,%edx 88 shrl $3,%ecx 89 jz 20f 9018: movq (%rsi),%r8 9119: movq %r8,(%rdi) 92 leaq 8(%rsi),%rsi 93 leaq 8(%rdi),%rdi 94 decl %ecx 95 jnz 18b 9620: andl %edx,%edx 97 jz 23f 98 movl %edx,%ecx 9921: movb (%rsi),%al 10022: movb %al,(%rdi) 101 incq %rsi 102 incq %rdi 103 decl %ecx 104 jnz 21b 10523: xor %eax,%eax 106 ASM_CLAC 107 ret 108 109 .section .fixup,"ax" 11030: shll $6,%ecx 111 addl %ecx,%edx 112 jmp 60f 11340: leal (%rdx,%rcx,8),%edx 114 jmp 60f 11550: movl %ecx,%edx 11660: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */ 117 .previous 118 119 _ASM_EXTABLE_UA(1b, 30b) 120 _ASM_EXTABLE_UA(2b, 30b) 121 _ASM_EXTABLE_UA(3b, 30b) 122 _ASM_EXTABLE_UA(4b, 30b) 123 _ASM_EXTABLE_UA(5b, 30b) 124 _ASM_EXTABLE_UA(6b, 30b) 125 _ASM_EXTABLE_UA(7b, 30b) 126 _ASM_EXTABLE_UA(8b, 30b) 127 _ASM_EXTABLE_UA(9b, 30b) 128 _ASM_EXTABLE_UA(10b, 30b) 129 _ASM_EXTABLE_UA(11b, 30b) 130 _ASM_EXTABLE_UA(12b, 30b) 131 _ASM_EXTABLE_UA(13b, 30b) 132 _ASM_EXTABLE_UA(14b, 30b) 133 _ASM_EXTABLE_UA(15b, 30b) 134 _ASM_EXTABLE_UA(16b, 30b) 135 _ASM_EXTABLE_UA(18b, 40b) 136 _ASM_EXTABLE_UA(19b, 40b) 137 _ASM_EXTABLE_UA(21b, 50b) 138 _ASM_EXTABLE_UA(22b, 50b) 139SYM_FUNC_END(copy_user_generic_unrolled) 140EXPORT_SYMBOL(copy_user_generic_unrolled) 141 142/* Some CPUs run faster using the string copy instructions. 143 * This is also a lot simpler. Use them when possible. 144 * 145 * Only 4GB of copy is supported. This shouldn't be a problem 146 * because the kernel normally only writes from/to page sized chunks 147 * even if user space passed a longer buffer. 148 * And more would be dangerous because both Intel and AMD have 149 * errata with rep movsq > 4GB. If someone feels the need to fix 150 * this please consider this. 151 * 152 * Input: 153 * rdi destination 154 * rsi source 155 * rdx count 156 * 157 * Output: 158 * eax uncopied bytes or 0 if successful. 159 */ 160SYM_FUNC_START(copy_user_generic_string) 161 ASM_STAC 162 cmpl $8,%edx 163 jb 2f /* less than 8 bytes, go to byte copy loop */ 164 ALIGN_DESTINATION 165 movl %edx,%ecx 166 shrl $3,%ecx 167 andl $7,%edx 1681: rep 169 movsq 1702: movl %edx,%ecx 1713: rep 172 movsb 173 xorl %eax,%eax 174 ASM_CLAC 175 ret 176 177 .section .fixup,"ax" 17811: leal (%rdx,%rcx,8),%ecx 17912: movl %ecx,%edx /* ecx is zerorest also */ 180 jmp .Lcopy_user_handle_tail 181 .previous 182 183 _ASM_EXTABLE_UA(1b, 11b) 184 _ASM_EXTABLE_UA(3b, 12b) 185SYM_FUNC_END(copy_user_generic_string) 186EXPORT_SYMBOL(copy_user_generic_string) 187 188/* 189 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. 190 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. 191 * 192 * Input: 193 * rdi destination 194 * rsi source 195 * rdx count 196 * 197 * Output: 198 * eax uncopied bytes or 0 if successful. 199 */ 200SYM_FUNC_START(copy_user_enhanced_fast_string) 201 ASM_STAC 202 cmpl $64,%edx 203 jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ 204 movl %edx,%ecx 2051: rep 206 movsb 207 xorl %eax,%eax 208 ASM_CLAC 209 ret 210 211 .section .fixup,"ax" 21212: movl %ecx,%edx /* ecx is zerorest also */ 213 jmp .Lcopy_user_handle_tail 214 .previous 215 216 _ASM_EXTABLE_UA(1b, 12b) 217SYM_FUNC_END(copy_user_enhanced_fast_string) 218EXPORT_SYMBOL(copy_user_enhanced_fast_string) 219 220/* 221 * Try to copy last bytes and clear the rest if needed. 222 * Since protection fault in copy_from/to_user is not a normal situation, 223 * it is not necessary to optimize tail handling. 224 * 225 * Input: 226 * rdi destination 227 * rsi source 228 * rdx count 229 * 230 * Output: 231 * eax uncopied bytes or 0 if successful. 232 */ 233SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) 234 movl %edx,%ecx 2351: rep movsb 2362: mov %ecx,%eax 237 ASM_CLAC 238 ret 239 240 _ASM_EXTABLE_UA(1b, 2b) 241SYM_CODE_END(.Lcopy_user_handle_tail) 242 243/* 244 * copy_user_nocache - Uncached memory copy with exception handling 245 * This will force destination out of cache for more performance. 246 * 247 * Note: Cached memory copy is used when destination or size is not 248 * naturally aligned. That is: 249 * - Require 8-byte alignment when size is 8 bytes or larger. 250 * - Require 4-byte alignment when size is 4 bytes. 251 */ 252SYM_FUNC_START(__copy_user_nocache) 253 ASM_STAC 254 255 /* If size is less than 8 bytes, go to 4-byte copy */ 256 cmpl $8,%edx 257 jb .L_4b_nocache_copy_entry 258 259 /* If destination is not 8-byte aligned, "cache" copy to align it */ 260 ALIGN_DESTINATION 261 262 /* Set 4x8-byte copy count and remainder */ 263 movl %edx,%ecx 264 andl $63,%edx 265 shrl $6,%ecx 266 jz .L_8b_nocache_copy_entry /* jump if count is 0 */ 267 268 /* Perform 4x8-byte nocache loop-copy */ 269.L_4x8b_nocache_copy_loop: 2701: movq (%rsi),%r8 2712: movq 1*8(%rsi),%r9 2723: movq 2*8(%rsi),%r10 2734: movq 3*8(%rsi),%r11 2745: movnti %r8,(%rdi) 2756: movnti %r9,1*8(%rdi) 2767: movnti %r10,2*8(%rdi) 2778: movnti %r11,3*8(%rdi) 2789: movq 4*8(%rsi),%r8 27910: movq 5*8(%rsi),%r9 28011: movq 6*8(%rsi),%r10 28112: movq 7*8(%rsi),%r11 28213: movnti %r8,4*8(%rdi) 28314: movnti %r9,5*8(%rdi) 28415: movnti %r10,6*8(%rdi) 28516: movnti %r11,7*8(%rdi) 286 leaq 64(%rsi),%rsi 287 leaq 64(%rdi),%rdi 288 decl %ecx 289 jnz .L_4x8b_nocache_copy_loop 290 291 /* Set 8-byte copy count and remainder */ 292.L_8b_nocache_copy_entry: 293 movl %edx,%ecx 294 andl $7,%edx 295 shrl $3,%ecx 296 jz .L_4b_nocache_copy_entry /* jump if count is 0 */ 297 298 /* Perform 8-byte nocache loop-copy */ 299.L_8b_nocache_copy_loop: 30020: movq (%rsi),%r8 30121: movnti %r8,(%rdi) 302 leaq 8(%rsi),%rsi 303 leaq 8(%rdi),%rdi 304 decl %ecx 305 jnz .L_8b_nocache_copy_loop 306 307 /* If no byte left, we're done */ 308.L_4b_nocache_copy_entry: 309 andl %edx,%edx 310 jz .L_finish_copy 311 312 /* If destination is not 4-byte aligned, go to byte copy: */ 313 movl %edi,%ecx 314 andl $3,%ecx 315 jnz .L_1b_cache_copy_entry 316 317 /* Set 4-byte copy count (1 or 0) and remainder */ 318 movl %edx,%ecx 319 andl $3,%edx 320 shrl $2,%ecx 321 jz .L_1b_cache_copy_entry /* jump if count is 0 */ 322 323 /* Perform 4-byte nocache copy: */ 32430: movl (%rsi),%r8d 32531: movnti %r8d,(%rdi) 326 leaq 4(%rsi),%rsi 327 leaq 4(%rdi),%rdi 328 329 /* If no bytes left, we're done: */ 330 andl %edx,%edx 331 jz .L_finish_copy 332 333 /* Perform byte "cache" loop-copy for the remainder */ 334.L_1b_cache_copy_entry: 335 movl %edx,%ecx 336.L_1b_cache_copy_loop: 33740: movb (%rsi),%al 33841: movb %al,(%rdi) 339 incq %rsi 340 incq %rdi 341 decl %ecx 342 jnz .L_1b_cache_copy_loop 343 344 /* Finished copying; fence the prior stores */ 345.L_finish_copy: 346 xorl %eax,%eax 347 ASM_CLAC 348 sfence 349 ret 350 351 .section .fixup,"ax" 352.L_fixup_4x8b_copy: 353 shll $6,%ecx 354 addl %ecx,%edx 355 jmp .L_fixup_handle_tail 356.L_fixup_8b_copy: 357 lea (%rdx,%rcx,8),%rdx 358 jmp .L_fixup_handle_tail 359.L_fixup_4b_copy: 360 lea (%rdx,%rcx,4),%rdx 361 jmp .L_fixup_handle_tail 362.L_fixup_1b_copy: 363 movl %ecx,%edx 364.L_fixup_handle_tail: 365 sfence 366 jmp .Lcopy_user_handle_tail 367 .previous 368 369 _ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy) 370 _ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy) 371 _ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy) 372 _ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy) 373 _ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy) 374 _ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy) 375 _ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy) 376 _ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy) 377 _ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy) 378 _ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy) 379 _ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy) 380 _ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy) 381 _ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy) 382 _ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy) 383 _ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy) 384 _ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy) 385 _ASM_EXTABLE_UA(20b, .L_fixup_8b_copy) 386 _ASM_EXTABLE_UA(21b, .L_fixup_8b_copy) 387 _ASM_EXTABLE_UA(30b, .L_fixup_4b_copy) 388 _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy) 389 _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy) 390 _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy) 391SYM_FUNC_END(__copy_user_nocache) 392EXPORT_SYMBOL(__copy_user_nocache) 393