1/* 2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> 3 * Copyright 2002 Andi Kleen, SuSE Labs. 4 * Subject to the GNU Public License v2. 5 * 6 * Functions to copy from and to user space. 7 */ 8 9#include <linux/linkage.h> 10#include <asm/current.h> 11#include <asm/asm-offsets.h> 12#include <asm/thread_info.h> 13#include <asm/cpufeatures.h> 14#include <asm/alternative-asm.h> 15#include <asm/asm.h> 16#include <asm/smap.h> 17#include <asm/export.h> 18 19/* 20 * copy_user_generic_unrolled - memory copy with exception handling. 21 * This version is for CPUs like P4 that don't have efficient micro 22 * code for rep movsq 23 * 24 * Input: 25 * rdi destination 26 * rsi source 27 * rdx count 28 * 29 * Output: 30 * eax uncopied bytes or 0 if successful. 31 */ 32ENTRY(copy_user_generic_unrolled) 33 ASM_STAC 34 cmpl $8,%edx 35 jb 20f /* less then 8 bytes, go to byte copy loop */ 36 ALIGN_DESTINATION 37 movl %edx,%ecx 38 andl $63,%edx 39 shrl $6,%ecx 40 jz .L_copy_short_string 411: movq (%rsi),%r8 422: movq 1*8(%rsi),%r9 433: movq 2*8(%rsi),%r10 444: movq 3*8(%rsi),%r11 455: movq %r8,(%rdi) 466: movq %r9,1*8(%rdi) 477: movq %r10,2*8(%rdi) 488: movq %r11,3*8(%rdi) 499: movq 4*8(%rsi),%r8 5010: movq 5*8(%rsi),%r9 5111: movq 6*8(%rsi),%r10 5212: movq 7*8(%rsi),%r11 5313: movq %r8,4*8(%rdi) 5414: movq %r9,5*8(%rdi) 5515: movq %r10,6*8(%rdi) 5616: movq %r11,7*8(%rdi) 57 leaq 64(%rsi),%rsi 58 leaq 64(%rdi),%rdi 59 decl %ecx 60 jnz 1b 61.L_copy_short_string: 62 movl %edx,%ecx 63 andl $7,%edx 64 shrl $3,%ecx 65 jz 20f 6618: movq (%rsi),%r8 6719: movq %r8,(%rdi) 68 leaq 8(%rsi),%rsi 69 leaq 8(%rdi),%rdi 70 decl %ecx 71 jnz 18b 7220: andl %edx,%edx 73 jz 23f 74 movl %edx,%ecx 7521: movb (%rsi),%al 7622: movb %al,(%rdi) 77 incq %rsi 78 incq %rdi 79 decl %ecx 80 jnz 21b 8123: xor %eax,%eax 82 ASM_CLAC 83 ret 84 85 .section .fixup,"ax" 8630: shll $6,%ecx 87 addl %ecx,%edx 88 jmp 60f 8940: leal (%rdx,%rcx,8),%edx 90 jmp 60f 9150: movl %ecx,%edx 9260: jmp copy_user_handle_tail /* ecx is zerorest also */ 93 .previous 94 95 _ASM_EXTABLE_UA(1b, 30b) 96 _ASM_EXTABLE_UA(2b, 30b) 97 _ASM_EXTABLE_UA(3b, 30b) 98 _ASM_EXTABLE_UA(4b, 30b) 99 _ASM_EXTABLE_UA(5b, 30b) 100 _ASM_EXTABLE_UA(6b, 30b) 101 _ASM_EXTABLE_UA(7b, 30b) 102 _ASM_EXTABLE_UA(8b, 30b) 103 _ASM_EXTABLE_UA(9b, 30b) 104 _ASM_EXTABLE_UA(10b, 30b) 105 _ASM_EXTABLE_UA(11b, 30b) 106 _ASM_EXTABLE_UA(12b, 30b) 107 _ASM_EXTABLE_UA(13b, 30b) 108 _ASM_EXTABLE_UA(14b, 30b) 109 _ASM_EXTABLE_UA(15b, 30b) 110 _ASM_EXTABLE_UA(16b, 30b) 111 _ASM_EXTABLE_UA(18b, 40b) 112 _ASM_EXTABLE_UA(19b, 40b) 113 _ASM_EXTABLE_UA(21b, 50b) 114 _ASM_EXTABLE_UA(22b, 50b) 115ENDPROC(copy_user_generic_unrolled) 116EXPORT_SYMBOL(copy_user_generic_unrolled) 117 118/* Some CPUs run faster using the string copy instructions. 119 * This is also a lot simpler. Use them when possible. 120 * 121 * Only 4GB of copy is supported. This shouldn't be a problem 122 * because the kernel normally only writes from/to page sized chunks 123 * even if user space passed a longer buffer. 124 * And more would be dangerous because both Intel and AMD have 125 * errata with rep movsq > 4GB. If someone feels the need to fix 126 * this please consider this. 127 * 128 * Input: 129 * rdi destination 130 * rsi source 131 * rdx count 132 * 133 * Output: 134 * eax uncopied bytes or 0 if successful. 135 */ 136ENTRY(copy_user_generic_string) 137 ASM_STAC 138 cmpl $8,%edx 139 jb 2f /* less than 8 bytes, go to byte copy loop */ 140 ALIGN_DESTINATION 141 movl %edx,%ecx 142 shrl $3,%ecx 143 andl $7,%edx 1441: rep 145 movsq 1462: movl %edx,%ecx 1473: rep 148 movsb 149 xorl %eax,%eax 150 ASM_CLAC 151 ret 152 153 .section .fixup,"ax" 15411: leal (%rdx,%rcx,8),%ecx 15512: movl %ecx,%edx /* ecx is zerorest also */ 156 jmp copy_user_handle_tail 157 .previous 158 159 _ASM_EXTABLE_UA(1b, 11b) 160 _ASM_EXTABLE_UA(3b, 12b) 161ENDPROC(copy_user_generic_string) 162EXPORT_SYMBOL(copy_user_generic_string) 163 164/* 165 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. 166 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. 167 * 168 * Input: 169 * rdi destination 170 * rsi source 171 * rdx count 172 * 173 * Output: 174 * eax uncopied bytes or 0 if successful. 175 */ 176ENTRY(copy_user_enhanced_fast_string) 177 ASM_STAC 178 cmpl $64,%edx 179 jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ 180 movl %edx,%ecx 1811: rep 182 movsb 183 xorl %eax,%eax 184 ASM_CLAC 185 ret 186 187 .section .fixup,"ax" 18812: movl %ecx,%edx /* ecx is zerorest also */ 189 jmp copy_user_handle_tail 190 .previous 191 192 _ASM_EXTABLE_UA(1b, 12b) 193ENDPROC(copy_user_enhanced_fast_string) 194EXPORT_SYMBOL(copy_user_enhanced_fast_string) 195 196/* 197 * copy_user_nocache - Uncached memory copy with exception handling 198 * This will force destination out of cache for more performance. 199 * 200 * Note: Cached memory copy is used when destination or size is not 201 * naturally aligned. That is: 202 * - Require 8-byte alignment when size is 8 bytes or larger. 203 * - Require 4-byte alignment when size is 4 bytes. 204 */ 205ENTRY(__copy_user_nocache) 206 ASM_STAC 207 208 /* If size is less than 8 bytes, go to 4-byte copy */ 209 cmpl $8,%edx 210 jb .L_4b_nocache_copy_entry 211 212 /* If destination is not 8-byte aligned, "cache" copy to align it */ 213 ALIGN_DESTINATION 214 215 /* Set 4x8-byte copy count and remainder */ 216 movl %edx,%ecx 217 andl $63,%edx 218 shrl $6,%ecx 219 jz .L_8b_nocache_copy_entry /* jump if count is 0 */ 220 221 /* Perform 4x8-byte nocache loop-copy */ 222.L_4x8b_nocache_copy_loop: 2231: movq (%rsi),%r8 2242: movq 1*8(%rsi),%r9 2253: movq 2*8(%rsi),%r10 2264: movq 3*8(%rsi),%r11 2275: movnti %r8,(%rdi) 2286: movnti %r9,1*8(%rdi) 2297: movnti %r10,2*8(%rdi) 2308: movnti %r11,3*8(%rdi) 2319: movq 4*8(%rsi),%r8 23210: movq 5*8(%rsi),%r9 23311: movq 6*8(%rsi),%r10 23412: movq 7*8(%rsi),%r11 23513: movnti %r8,4*8(%rdi) 23614: movnti %r9,5*8(%rdi) 23715: movnti %r10,6*8(%rdi) 23816: movnti %r11,7*8(%rdi) 239 leaq 64(%rsi),%rsi 240 leaq 64(%rdi),%rdi 241 decl %ecx 242 jnz .L_4x8b_nocache_copy_loop 243 244 /* Set 8-byte copy count and remainder */ 245.L_8b_nocache_copy_entry: 246 movl %edx,%ecx 247 andl $7,%edx 248 shrl $3,%ecx 249 jz .L_4b_nocache_copy_entry /* jump if count is 0 */ 250 251 /* Perform 8-byte nocache loop-copy */ 252.L_8b_nocache_copy_loop: 25320: movq (%rsi),%r8 25421: movnti %r8,(%rdi) 255 leaq 8(%rsi),%rsi 256 leaq 8(%rdi),%rdi 257 decl %ecx 258 jnz .L_8b_nocache_copy_loop 259 260 /* If no byte left, we're done */ 261.L_4b_nocache_copy_entry: 262 andl %edx,%edx 263 jz .L_finish_copy 264 265 /* If destination is not 4-byte aligned, go to byte copy: */ 266 movl %edi,%ecx 267 andl $3,%ecx 268 jnz .L_1b_cache_copy_entry 269 270 /* Set 4-byte copy count (1 or 0) and remainder */ 271 movl %edx,%ecx 272 andl $3,%edx 273 shrl $2,%ecx 274 jz .L_1b_cache_copy_entry /* jump if count is 0 */ 275 276 /* Perform 4-byte nocache copy: */ 27730: movl (%rsi),%r8d 27831: movnti %r8d,(%rdi) 279 leaq 4(%rsi),%rsi 280 leaq 4(%rdi),%rdi 281 282 /* If no bytes left, we're done: */ 283 andl %edx,%edx 284 jz .L_finish_copy 285 286 /* Perform byte "cache" loop-copy for the remainder */ 287.L_1b_cache_copy_entry: 288 movl %edx,%ecx 289.L_1b_cache_copy_loop: 29040: movb (%rsi),%al 29141: movb %al,(%rdi) 292 incq %rsi 293 incq %rdi 294 decl %ecx 295 jnz .L_1b_cache_copy_loop 296 297 /* Finished copying; fence the prior stores */ 298.L_finish_copy: 299 xorl %eax,%eax 300 ASM_CLAC 301 sfence 302 ret 303 304 .section .fixup,"ax" 305.L_fixup_4x8b_copy: 306 shll $6,%ecx 307 addl %ecx,%edx 308 jmp .L_fixup_handle_tail 309.L_fixup_8b_copy: 310 lea (%rdx,%rcx,8),%rdx 311 jmp .L_fixup_handle_tail 312.L_fixup_4b_copy: 313 lea (%rdx,%rcx,4),%rdx 314 jmp .L_fixup_handle_tail 315.L_fixup_1b_copy: 316 movl %ecx,%edx 317.L_fixup_handle_tail: 318 sfence 319 jmp copy_user_handle_tail 320 .previous 321 322 _ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy) 323 _ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy) 324 _ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy) 325 _ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy) 326 _ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy) 327 _ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy) 328 _ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy) 329 _ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy) 330 _ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy) 331 _ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy) 332 _ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy) 333 _ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy) 334 _ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy) 335 _ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy) 336 _ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy) 337 _ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy) 338 _ASM_EXTABLE_UA(20b, .L_fixup_8b_copy) 339 _ASM_EXTABLE_UA(21b, .L_fixup_8b_copy) 340 _ASM_EXTABLE_UA(30b, .L_fixup_4b_copy) 341 _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy) 342 _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy) 343 _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy) 344ENDPROC(__copy_user_nocache) 345EXPORT_SYMBOL(__copy_user_nocache) 346