1/* 2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> 3 * Copyright 2002 Andi Kleen, SuSE Labs. 4 * Subject to the GNU Public License v2. 5 * 6 * Functions to copy from and to user space. 7 */ 8 9#include <linux/linkage.h> 10#include <asm/current.h> 11#include <asm/asm-offsets.h> 12#include <asm/thread_info.h> 13#include <asm/cpufeatures.h> 14#include <asm/alternative-asm.h> 15#include <asm/asm.h> 16#include <asm/smap.h> 17#include <asm/export.h> 18 19/* Standard copy_to_user with segment limit checking */ 20ENTRY(_copy_to_user) 21 mov PER_CPU_VAR(current_task), %rax 22 movq %rdi,%rcx 23 addq %rdx,%rcx 24 jc bad_to_user 25 cmpq TASK_addr_limit(%rax),%rcx 26 ja bad_to_user 27 ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \ 28 "jmp copy_user_generic_string", \ 29 X86_FEATURE_REP_GOOD, \ 30 "jmp copy_user_enhanced_fast_string", \ 31 X86_FEATURE_ERMS 32ENDPROC(_copy_to_user) 33EXPORT_SYMBOL(_copy_to_user) 34 35/* Standard copy_from_user with segment limit checking */ 36ENTRY(_copy_from_user) 37 mov PER_CPU_VAR(current_task), %rax 38 movq %rsi,%rcx 39 addq %rdx,%rcx 40 jc bad_from_user 41 cmpq TASK_addr_limit(%rax),%rcx 42 ja bad_from_user 43 ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \ 44 "jmp copy_user_generic_string", \ 45 X86_FEATURE_REP_GOOD, \ 46 "jmp copy_user_enhanced_fast_string", \ 47 X86_FEATURE_ERMS 48ENDPROC(_copy_from_user) 49EXPORT_SYMBOL(_copy_from_user) 50 51 52 .section .fixup,"ax" 53 /* must zero dest */ 54ENTRY(bad_from_user) 55bad_from_user: 56 movl %edx,%ecx 57 xorl %eax,%eax 58 rep 59 stosb 60bad_to_user: 61 movl %edx,%eax 62 ret 63ENDPROC(bad_from_user) 64 .previous 65 66/* 67 * copy_user_generic_unrolled - memory copy with exception handling. 68 * This version is for CPUs like P4 that don't have efficient micro 69 * code for rep movsq 70 * 71 * Input: 72 * rdi destination 73 * rsi source 74 * rdx count 75 * 76 * Output: 77 * eax uncopied bytes or 0 if successful. 78 */ 79ENTRY(copy_user_generic_unrolled) 80 ASM_STAC 81 cmpl $8,%edx 82 jb 20f /* less then 8 bytes, go to byte copy loop */ 83 ALIGN_DESTINATION 84 movl %edx,%ecx 85 andl $63,%edx 86 shrl $6,%ecx 87 jz 17f 881: movq (%rsi),%r8 892: movq 1*8(%rsi),%r9 903: movq 2*8(%rsi),%r10 914: movq 3*8(%rsi),%r11 925: movq %r8,(%rdi) 936: movq %r9,1*8(%rdi) 947: movq %r10,2*8(%rdi) 958: movq %r11,3*8(%rdi) 969: movq 4*8(%rsi),%r8 9710: movq 5*8(%rsi),%r9 9811: movq 6*8(%rsi),%r10 9912: movq 7*8(%rsi),%r11 10013: movq %r8,4*8(%rdi) 10114: movq %r9,5*8(%rdi) 10215: movq %r10,6*8(%rdi) 10316: movq %r11,7*8(%rdi) 104 leaq 64(%rsi),%rsi 105 leaq 64(%rdi),%rdi 106 decl %ecx 107 jnz 1b 10817: movl %edx,%ecx 109 andl $7,%edx 110 shrl $3,%ecx 111 jz 20f 11218: movq (%rsi),%r8 11319: movq %r8,(%rdi) 114 leaq 8(%rsi),%rsi 115 leaq 8(%rdi),%rdi 116 decl %ecx 117 jnz 18b 11820: andl %edx,%edx 119 jz 23f 120 movl %edx,%ecx 12121: movb (%rsi),%al 12222: movb %al,(%rdi) 123 incq %rsi 124 incq %rdi 125 decl %ecx 126 jnz 21b 12723: xor %eax,%eax 128 ASM_CLAC 129 ret 130 131 .section .fixup,"ax" 13230: shll $6,%ecx 133 addl %ecx,%edx 134 jmp 60f 13540: leal (%rdx,%rcx,8),%edx 136 jmp 60f 13750: movl %ecx,%edx 13860: jmp copy_user_handle_tail /* ecx is zerorest also */ 139 .previous 140 141 _ASM_EXTABLE(1b,30b) 142 _ASM_EXTABLE(2b,30b) 143 _ASM_EXTABLE(3b,30b) 144 _ASM_EXTABLE(4b,30b) 145 _ASM_EXTABLE(5b,30b) 146 _ASM_EXTABLE(6b,30b) 147 _ASM_EXTABLE(7b,30b) 148 _ASM_EXTABLE(8b,30b) 149 _ASM_EXTABLE(9b,30b) 150 _ASM_EXTABLE(10b,30b) 151 _ASM_EXTABLE(11b,30b) 152 _ASM_EXTABLE(12b,30b) 153 _ASM_EXTABLE(13b,30b) 154 _ASM_EXTABLE(14b,30b) 155 _ASM_EXTABLE(15b,30b) 156 _ASM_EXTABLE(16b,30b) 157 _ASM_EXTABLE(18b,40b) 158 _ASM_EXTABLE(19b,40b) 159 _ASM_EXTABLE(21b,50b) 160 _ASM_EXTABLE(22b,50b) 161ENDPROC(copy_user_generic_unrolled) 162EXPORT_SYMBOL(copy_user_generic_unrolled) 163 164/* Some CPUs run faster using the string copy instructions. 165 * This is also a lot simpler. Use them when possible. 166 * 167 * Only 4GB of copy is supported. This shouldn't be a problem 168 * because the kernel normally only writes from/to page sized chunks 169 * even if user space passed a longer buffer. 170 * And more would be dangerous because both Intel and AMD have 171 * errata with rep movsq > 4GB. If someone feels the need to fix 172 * this please consider this. 173 * 174 * Input: 175 * rdi destination 176 * rsi source 177 * rdx count 178 * 179 * Output: 180 * eax uncopied bytes or 0 if successful. 181 */ 182ENTRY(copy_user_generic_string) 183 ASM_STAC 184 cmpl $8,%edx 185 jb 2f /* less than 8 bytes, go to byte copy loop */ 186 ALIGN_DESTINATION 187 movl %edx,%ecx 188 shrl $3,%ecx 189 andl $7,%edx 1901: rep 191 movsq 1922: movl %edx,%ecx 1933: rep 194 movsb 195 xorl %eax,%eax 196 ASM_CLAC 197 ret 198 199 .section .fixup,"ax" 20011: leal (%rdx,%rcx,8),%ecx 20112: movl %ecx,%edx /* ecx is zerorest also */ 202 jmp copy_user_handle_tail 203 .previous 204 205 _ASM_EXTABLE(1b,11b) 206 _ASM_EXTABLE(3b,12b) 207ENDPROC(copy_user_generic_string) 208EXPORT_SYMBOL(copy_user_generic_string) 209 210/* 211 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. 212 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. 213 * 214 * Input: 215 * rdi destination 216 * rsi source 217 * rdx count 218 * 219 * Output: 220 * eax uncopied bytes or 0 if successful. 221 */ 222ENTRY(copy_user_enhanced_fast_string) 223 ASM_STAC 224 movl %edx,%ecx 2251: rep 226 movsb 227 xorl %eax,%eax 228 ASM_CLAC 229 ret 230 231 .section .fixup,"ax" 23212: movl %ecx,%edx /* ecx is zerorest also */ 233 jmp copy_user_handle_tail 234 .previous 235 236 _ASM_EXTABLE(1b,12b) 237ENDPROC(copy_user_enhanced_fast_string) 238EXPORT_SYMBOL(copy_user_enhanced_fast_string) 239 240/* 241 * copy_user_nocache - Uncached memory copy with exception handling 242 * This will force destination out of cache for more performance. 243 * 244 * Note: Cached memory copy is used when destination or size is not 245 * naturally aligned. That is: 246 * - Require 8-byte alignment when size is 8 bytes or larger. 247 * - Require 4-byte alignment when size is 4 bytes. 248 */ 249ENTRY(__copy_user_nocache) 250 ASM_STAC 251 252 /* If size is less than 8 bytes, go to 4-byte copy */ 253 cmpl $8,%edx 254 jb .L_4b_nocache_copy_entry 255 256 /* If destination is not 8-byte aligned, "cache" copy to align it */ 257 ALIGN_DESTINATION 258 259 /* Set 4x8-byte copy count and remainder */ 260 movl %edx,%ecx 261 andl $63,%edx 262 shrl $6,%ecx 263 jz .L_8b_nocache_copy_entry /* jump if count is 0 */ 264 265 /* Perform 4x8-byte nocache loop-copy */ 266.L_4x8b_nocache_copy_loop: 2671: movq (%rsi),%r8 2682: movq 1*8(%rsi),%r9 2693: movq 2*8(%rsi),%r10 2704: movq 3*8(%rsi),%r11 2715: movnti %r8,(%rdi) 2726: movnti %r9,1*8(%rdi) 2737: movnti %r10,2*8(%rdi) 2748: movnti %r11,3*8(%rdi) 2759: movq 4*8(%rsi),%r8 27610: movq 5*8(%rsi),%r9 27711: movq 6*8(%rsi),%r10 27812: movq 7*8(%rsi),%r11 27913: movnti %r8,4*8(%rdi) 28014: movnti %r9,5*8(%rdi) 28115: movnti %r10,6*8(%rdi) 28216: movnti %r11,7*8(%rdi) 283 leaq 64(%rsi),%rsi 284 leaq 64(%rdi),%rdi 285 decl %ecx 286 jnz .L_4x8b_nocache_copy_loop 287 288 /* Set 8-byte copy count and remainder */ 289.L_8b_nocache_copy_entry: 290 movl %edx,%ecx 291 andl $7,%edx 292 shrl $3,%ecx 293 jz .L_4b_nocache_copy_entry /* jump if count is 0 */ 294 295 /* Perform 8-byte nocache loop-copy */ 296.L_8b_nocache_copy_loop: 29720: movq (%rsi),%r8 29821: movnti %r8,(%rdi) 299 leaq 8(%rsi),%rsi 300 leaq 8(%rdi),%rdi 301 decl %ecx 302 jnz .L_8b_nocache_copy_loop 303 304 /* If no byte left, we're done */ 305.L_4b_nocache_copy_entry: 306 andl %edx,%edx 307 jz .L_finish_copy 308 309 /* If destination is not 4-byte aligned, go to byte copy: */ 310 movl %edi,%ecx 311 andl $3,%ecx 312 jnz .L_1b_cache_copy_entry 313 314 /* Set 4-byte copy count (1 or 0) and remainder */ 315 movl %edx,%ecx 316 andl $3,%edx 317 shrl $2,%ecx 318 jz .L_1b_cache_copy_entry /* jump if count is 0 */ 319 320 /* Perform 4-byte nocache copy: */ 32130: movl (%rsi),%r8d 32231: movnti %r8d,(%rdi) 323 leaq 4(%rsi),%rsi 324 leaq 4(%rdi),%rdi 325 326 /* If no bytes left, we're done: */ 327 andl %edx,%edx 328 jz .L_finish_copy 329 330 /* Perform byte "cache" loop-copy for the remainder */ 331.L_1b_cache_copy_entry: 332 movl %edx,%ecx 333.L_1b_cache_copy_loop: 33440: movb (%rsi),%al 33541: movb %al,(%rdi) 336 incq %rsi 337 incq %rdi 338 decl %ecx 339 jnz .L_1b_cache_copy_loop 340 341 /* Finished copying; fence the prior stores */ 342.L_finish_copy: 343 xorl %eax,%eax 344 ASM_CLAC 345 sfence 346 ret 347 348 .section .fixup,"ax" 349.L_fixup_4x8b_copy: 350 shll $6,%ecx 351 addl %ecx,%edx 352 jmp .L_fixup_handle_tail 353.L_fixup_8b_copy: 354 lea (%rdx,%rcx,8),%rdx 355 jmp .L_fixup_handle_tail 356.L_fixup_4b_copy: 357 lea (%rdx,%rcx,4),%rdx 358 jmp .L_fixup_handle_tail 359.L_fixup_1b_copy: 360 movl %ecx,%edx 361.L_fixup_handle_tail: 362 sfence 363 jmp copy_user_handle_tail 364 .previous 365 366 _ASM_EXTABLE(1b,.L_fixup_4x8b_copy) 367 _ASM_EXTABLE(2b,.L_fixup_4x8b_copy) 368 _ASM_EXTABLE(3b,.L_fixup_4x8b_copy) 369 _ASM_EXTABLE(4b,.L_fixup_4x8b_copy) 370 _ASM_EXTABLE(5b,.L_fixup_4x8b_copy) 371 _ASM_EXTABLE(6b,.L_fixup_4x8b_copy) 372 _ASM_EXTABLE(7b,.L_fixup_4x8b_copy) 373 _ASM_EXTABLE(8b,.L_fixup_4x8b_copy) 374 _ASM_EXTABLE(9b,.L_fixup_4x8b_copy) 375 _ASM_EXTABLE(10b,.L_fixup_4x8b_copy) 376 _ASM_EXTABLE(11b,.L_fixup_4x8b_copy) 377 _ASM_EXTABLE(12b,.L_fixup_4x8b_copy) 378 _ASM_EXTABLE(13b,.L_fixup_4x8b_copy) 379 _ASM_EXTABLE(14b,.L_fixup_4x8b_copy) 380 _ASM_EXTABLE(15b,.L_fixup_4x8b_copy) 381 _ASM_EXTABLE(16b,.L_fixup_4x8b_copy) 382 _ASM_EXTABLE(20b,.L_fixup_8b_copy) 383 _ASM_EXTABLE(21b,.L_fixup_8b_copy) 384 _ASM_EXTABLE(30b,.L_fixup_4b_copy) 385 _ASM_EXTABLE(31b,.L_fixup_4b_copy) 386 _ASM_EXTABLE(40b,.L_fixup_1b_copy) 387 _ASM_EXTABLE(41b,.L_fixup_1b_copy) 388ENDPROC(__copy_user_nocache) 389EXPORT_SYMBOL(__copy_user_nocache) 390