1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> 4 * Copyright 2002 Andi Kleen, SuSE Labs. 5 * 6 * Functions to copy from and to user space. 7 */ 8 9#include <linux/linkage.h> 10#include <asm/current.h> 11#include <asm/asm-offsets.h> 12#include <asm/thread_info.h> 13#include <asm/cpufeatures.h> 14#include <asm/alternative.h> 15#include <asm/asm.h> 16#include <asm/smap.h> 17#include <asm/export.h> 18#include <asm/trapnr.h> 19 20.macro ALIGN_DESTINATION 21 /* check for bad alignment of destination */ 22 movl %edi,%ecx 23 andl $7,%ecx 24 jz 102f /* already aligned */ 25 subl $8,%ecx 26 negl %ecx 27 subl %ecx,%edx 28100: movb (%rsi),%al 29101: movb %al,(%rdi) 30 incq %rsi 31 incq %rdi 32 decl %ecx 33 jnz 100b 34102: 35 36 _ASM_EXTABLE_CPY(100b, .Lcopy_user_handle_align) 37 _ASM_EXTABLE_CPY(101b, .Lcopy_user_handle_align) 38.endm 39 40/* 41 * copy_user_generic_unrolled - memory copy with exception handling. 42 * This version is for CPUs like P4 that don't have efficient micro 43 * code for rep movsq 44 * 45 * Input: 46 * rdi destination 47 * rsi source 48 * rdx count 49 * 50 * Output: 51 * eax uncopied bytes or 0 if successful. 52 */ 53SYM_FUNC_START(copy_user_generic_unrolled) 54 ASM_STAC 55 cmpl $8,%edx 56 jb 20f /* less then 8 bytes, go to byte copy loop */ 57 ALIGN_DESTINATION 58 movl %edx,%ecx 59 andl $63,%edx 60 shrl $6,%ecx 61 jz .L_copy_short_string 621: movq (%rsi),%r8 632: movq 1*8(%rsi),%r9 643: movq 2*8(%rsi),%r10 654: movq 3*8(%rsi),%r11 665: movq %r8,(%rdi) 676: movq %r9,1*8(%rdi) 687: movq %r10,2*8(%rdi) 698: movq %r11,3*8(%rdi) 709: movq 4*8(%rsi),%r8 7110: movq 5*8(%rsi),%r9 7211: movq 6*8(%rsi),%r10 7312: movq 7*8(%rsi),%r11 7413: movq %r8,4*8(%rdi) 7514: movq %r9,5*8(%rdi) 7615: movq %r10,6*8(%rdi) 7716: movq %r11,7*8(%rdi) 78 leaq 64(%rsi),%rsi 79 leaq 64(%rdi),%rdi 80 decl %ecx 81 jnz 1b 82.L_copy_short_string: 83 movl %edx,%ecx 84 andl $7,%edx 85 shrl $3,%ecx 86 jz 20f 8718: movq (%rsi),%r8 8819: movq %r8,(%rdi) 89 leaq 8(%rsi),%rsi 90 leaq 8(%rdi),%rdi 91 decl %ecx 92 jnz 18b 9320: andl %edx,%edx 94 jz 23f 95 movl %edx,%ecx 9621: movb (%rsi),%al 9722: movb %al,(%rdi) 98 incq %rsi 99 incq %rdi 100 decl %ecx 101 jnz 21b 10223: xor %eax,%eax 103 ASM_CLAC 104 RET 105 10630: shll $6,%ecx 107 addl %ecx,%edx 108 jmp 60f 10940: leal (%rdx,%rcx,8),%edx 110 jmp 60f 11150: movl %ecx,%edx 11260: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */ 113 114 _ASM_EXTABLE_CPY(1b, 30b) 115 _ASM_EXTABLE_CPY(2b, 30b) 116 _ASM_EXTABLE_CPY(3b, 30b) 117 _ASM_EXTABLE_CPY(4b, 30b) 118 _ASM_EXTABLE_CPY(5b, 30b) 119 _ASM_EXTABLE_CPY(6b, 30b) 120 _ASM_EXTABLE_CPY(7b, 30b) 121 _ASM_EXTABLE_CPY(8b, 30b) 122 _ASM_EXTABLE_CPY(9b, 30b) 123 _ASM_EXTABLE_CPY(10b, 30b) 124 _ASM_EXTABLE_CPY(11b, 30b) 125 _ASM_EXTABLE_CPY(12b, 30b) 126 _ASM_EXTABLE_CPY(13b, 30b) 127 _ASM_EXTABLE_CPY(14b, 30b) 128 _ASM_EXTABLE_CPY(15b, 30b) 129 _ASM_EXTABLE_CPY(16b, 30b) 130 _ASM_EXTABLE_CPY(18b, 40b) 131 _ASM_EXTABLE_CPY(19b, 40b) 132 _ASM_EXTABLE_CPY(21b, 50b) 133 _ASM_EXTABLE_CPY(22b, 50b) 134SYM_FUNC_END(copy_user_generic_unrolled) 135EXPORT_SYMBOL(copy_user_generic_unrolled) 136 137/* Some CPUs run faster using the string copy instructions. 138 * This is also a lot simpler. Use them when possible. 139 * 140 * Only 4GB of copy is supported. This shouldn't be a problem 141 * because the kernel normally only writes from/to page sized chunks 142 * even if user space passed a longer buffer. 143 * And more would be dangerous because both Intel and AMD have 144 * errata with rep movsq > 4GB. If someone feels the need to fix 145 * this please consider this. 146 * 147 * Input: 148 * rdi destination 149 * rsi source 150 * rdx count 151 * 152 * Output: 153 * eax uncopied bytes or 0 if successful. 154 */ 155SYM_FUNC_START(copy_user_generic_string) 156 ASM_STAC 157 cmpl $8,%edx 158 jb 2f /* less than 8 bytes, go to byte copy loop */ 159 ALIGN_DESTINATION 160 movl %edx,%ecx 161 shrl $3,%ecx 162 andl $7,%edx 1631: rep movsq 1642: movl %edx,%ecx 1653: rep movsb 166 xorl %eax,%eax 167 ASM_CLAC 168 RET 169 17011: leal (%rdx,%rcx,8),%ecx 17112: movl %ecx,%edx /* ecx is zerorest also */ 172 jmp .Lcopy_user_handle_tail 173 174 _ASM_EXTABLE_CPY(1b, 11b) 175 _ASM_EXTABLE_CPY(3b, 12b) 176SYM_FUNC_END(copy_user_generic_string) 177EXPORT_SYMBOL(copy_user_generic_string) 178 179/* 180 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. 181 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. 182 * 183 * Input: 184 * rdi destination 185 * rsi source 186 * rdx count 187 * 188 * Output: 189 * eax uncopied bytes or 0 if successful. 190 */ 191SYM_FUNC_START(copy_user_enhanced_fast_string) 192 ASM_STAC 193 cmpl $64,%edx 194 jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ 195 movl %edx,%ecx 1961: rep movsb 197 xorl %eax,%eax 198 ASM_CLAC 199 RET 200 20112: movl %ecx,%edx /* ecx is zerorest also */ 202 jmp .Lcopy_user_handle_tail 203 204 _ASM_EXTABLE_CPY(1b, 12b) 205SYM_FUNC_END(copy_user_enhanced_fast_string) 206EXPORT_SYMBOL(copy_user_enhanced_fast_string) 207 208/* 209 * Try to copy last bytes and clear the rest if needed. 210 * Since protection fault in copy_from/to_user is not a normal situation, 211 * it is not necessary to optimize tail handling. 212 * Don't try to copy the tail if machine check happened 213 * 214 * Input: 215 * rdi destination 216 * rsi source 217 * rdx count 218 * 219 * Output: 220 * eax uncopied bytes or 0 if successful. 221 */ 222SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) 223 movl %edx,%ecx 2241: rep movsb 2252: mov %ecx,%eax 226 ASM_CLAC 227 RET 228 229 _ASM_EXTABLE_CPY(1b, 2b) 230 231.Lcopy_user_handle_align: 232 addl %ecx,%edx /* ecx is zerorest also */ 233 jmp .Lcopy_user_handle_tail 234 235SYM_CODE_END(.Lcopy_user_handle_tail) 236 237/* 238 * copy_user_nocache - Uncached memory copy with exception handling 239 * This will force destination out of cache for more performance. 240 * 241 * Note: Cached memory copy is used when destination or size is not 242 * naturally aligned. That is: 243 * - Require 8-byte alignment when size is 8 bytes or larger. 244 * - Require 4-byte alignment when size is 4 bytes. 245 */ 246SYM_FUNC_START(__copy_user_nocache) 247 ASM_STAC 248 249 /* If size is less than 8 bytes, go to 4-byte copy */ 250 cmpl $8,%edx 251 jb .L_4b_nocache_copy_entry 252 253 /* If destination is not 8-byte aligned, "cache" copy to align it */ 254 ALIGN_DESTINATION 255 256 /* Set 4x8-byte copy count and remainder */ 257 movl %edx,%ecx 258 andl $63,%edx 259 shrl $6,%ecx 260 jz .L_8b_nocache_copy_entry /* jump if count is 0 */ 261 262 /* Perform 4x8-byte nocache loop-copy */ 263.L_4x8b_nocache_copy_loop: 2641: movq (%rsi),%r8 2652: movq 1*8(%rsi),%r9 2663: movq 2*8(%rsi),%r10 2674: movq 3*8(%rsi),%r11 2685: movnti %r8,(%rdi) 2696: movnti %r9,1*8(%rdi) 2707: movnti %r10,2*8(%rdi) 2718: movnti %r11,3*8(%rdi) 2729: movq 4*8(%rsi),%r8 27310: movq 5*8(%rsi),%r9 27411: movq 6*8(%rsi),%r10 27512: movq 7*8(%rsi),%r11 27613: movnti %r8,4*8(%rdi) 27714: movnti %r9,5*8(%rdi) 27815: movnti %r10,6*8(%rdi) 27916: movnti %r11,7*8(%rdi) 280 leaq 64(%rsi),%rsi 281 leaq 64(%rdi),%rdi 282 decl %ecx 283 jnz .L_4x8b_nocache_copy_loop 284 285 /* Set 8-byte copy count and remainder */ 286.L_8b_nocache_copy_entry: 287 movl %edx,%ecx 288 andl $7,%edx 289 shrl $3,%ecx 290 jz .L_4b_nocache_copy_entry /* jump if count is 0 */ 291 292 /* Perform 8-byte nocache loop-copy */ 293.L_8b_nocache_copy_loop: 29420: movq (%rsi),%r8 29521: movnti %r8,(%rdi) 296 leaq 8(%rsi),%rsi 297 leaq 8(%rdi),%rdi 298 decl %ecx 299 jnz .L_8b_nocache_copy_loop 300 301 /* If no byte left, we're done */ 302.L_4b_nocache_copy_entry: 303 andl %edx,%edx 304 jz .L_finish_copy 305 306 /* If destination is not 4-byte aligned, go to byte copy: */ 307 movl %edi,%ecx 308 andl $3,%ecx 309 jnz .L_1b_cache_copy_entry 310 311 /* Set 4-byte copy count (1 or 0) and remainder */ 312 movl %edx,%ecx 313 andl $3,%edx 314 shrl $2,%ecx 315 jz .L_1b_cache_copy_entry /* jump if count is 0 */ 316 317 /* Perform 4-byte nocache copy: */ 31830: movl (%rsi),%r8d 31931: movnti %r8d,(%rdi) 320 leaq 4(%rsi),%rsi 321 leaq 4(%rdi),%rdi 322 323 /* If no bytes left, we're done: */ 324 andl %edx,%edx 325 jz .L_finish_copy 326 327 /* Perform byte "cache" loop-copy for the remainder */ 328.L_1b_cache_copy_entry: 329 movl %edx,%ecx 330.L_1b_cache_copy_loop: 33140: movb (%rsi),%al 33241: movb %al,(%rdi) 333 incq %rsi 334 incq %rdi 335 decl %ecx 336 jnz .L_1b_cache_copy_loop 337 338 /* Finished copying; fence the prior stores */ 339.L_finish_copy: 340 xorl %eax,%eax 341 ASM_CLAC 342 sfence 343 RET 344 345.L_fixup_4x8b_copy: 346 shll $6,%ecx 347 addl %ecx,%edx 348 jmp .L_fixup_handle_tail 349.L_fixup_8b_copy: 350 lea (%rdx,%rcx,8),%rdx 351 jmp .L_fixup_handle_tail 352.L_fixup_4b_copy: 353 lea (%rdx,%rcx,4),%rdx 354 jmp .L_fixup_handle_tail 355.L_fixup_1b_copy: 356 movl %ecx,%edx 357.L_fixup_handle_tail: 358 sfence 359 jmp .Lcopy_user_handle_tail 360 361 _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy) 362 _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy) 363 _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy) 364 _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy) 365 _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy) 366 _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy) 367 _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy) 368 _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy) 369 _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy) 370 _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy) 371 _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy) 372 _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy) 373 _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy) 374 _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy) 375 _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy) 376 _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy) 377 _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy) 378 _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy) 379 _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy) 380 _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy) 381 _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy) 382 _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy) 383SYM_FUNC_END(__copy_user_nocache) 384EXPORT_SYMBOL(__copy_user_nocache) 385