1/* Copyright 2002 Andi Kleen, SuSE Labs. 2 * Subject to the GNU Public License v2. 3 * 4 * Functions to copy from and to user space. 5 */ 6 7#include <linux/linkage.h> 8#include <asm/dwarf2.h> 9 10#define FIX_ALIGNMENT 1 11 12#include <asm/current.h> 13#include <asm/asm-offsets.h> 14#include <asm/thread_info.h> 15#include <asm/cpufeature.h> 16 17 .macro ALTERNATIVE_JUMP feature,orig,alt 180: 19 .byte 0xe9 /* 32bit jump */ 20 .long \orig-1f /* by default jump to orig */ 211: 22 .section .altinstr_replacement,"ax" 232: .byte 0xe9 /* near jump with 32bit immediate */ 24 .long \alt-1b /* offset */ /* or alternatively to alt */ 25 .previous 26 .section .altinstructions,"a" 27 .align 8 28 .quad 0b 29 .quad 2b 30 .byte \feature /* when feature is set */ 31 .byte 5 32 .byte 5 33 .previous 34 .endm 35 36/* Standard copy_to_user with segment limit checking */ 37ENTRY(copy_to_user) 38 CFI_STARTPROC 39 GET_THREAD_INFO(%rax) 40 movq %rdi,%rcx 41 addq %rdx,%rcx 42 jc bad_to_user 43 cmpq threadinfo_addr_limit(%rax),%rcx 44 jae bad_to_user 45 xorl %eax,%eax /* clear zero flag */ 46 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 47 CFI_ENDPROC 48 49ENTRY(copy_user_generic) 50 CFI_STARTPROC 51 movl $1,%ecx /* set zero flag */ 52 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 53 CFI_ENDPROC 54 55ENTRY(__copy_from_user_inatomic) 56 CFI_STARTPROC 57 xorl %ecx,%ecx /* clear zero flag */ 58 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 59 CFI_ENDPROC 60 61/* Standard copy_from_user with segment limit checking */ 62ENTRY(copy_from_user) 63 CFI_STARTPROC 64 GET_THREAD_INFO(%rax) 65 movq %rsi,%rcx 66 addq %rdx,%rcx 67 jc bad_from_user 68 cmpq threadinfo_addr_limit(%rax),%rcx 69 jae bad_from_user 70 movl $1,%ecx /* set zero flag */ 71 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 72 CFI_ENDPROC 73ENDPROC(copy_from_user) 74 75 .section .fixup,"ax" 76 /* must zero dest */ 77bad_from_user: 78 CFI_STARTPROC 79 movl %edx,%ecx 80 xorl %eax,%eax 81 rep 82 stosb 83bad_to_user: 84 movl %edx,%eax 85 ret 86 CFI_ENDPROC 87END(bad_from_user) 88 .previous 89 90 91/* 92 * copy_user_generic_unrolled - memory copy with exception handling. 93 * This version is for CPUs like P4 that don't have efficient micro code for rep movsq 94 * 95 * Input: 96 * rdi destination 97 * rsi source 98 * rdx count 99 * ecx zero flag -- if true zero destination on error 100 * 101 * Output: 102 * eax uncopied bytes or 0 if successful. 103 */ 104ENTRY(copy_user_generic_unrolled) 105 CFI_STARTPROC 106 pushq %rbx 107 CFI_ADJUST_CFA_OFFSET 8 108 CFI_REL_OFFSET rbx, 0 109 pushq %rcx 110 CFI_ADJUST_CFA_OFFSET 8 111 CFI_REL_OFFSET rcx, 0 112 xorl %eax,%eax /*zero for the exception handler */ 113 114#ifdef FIX_ALIGNMENT 115 /* check for bad alignment of destination */ 116 movl %edi,%ecx 117 andl $7,%ecx 118 jnz .Lbad_alignment 119.Lafter_bad_alignment: 120#endif 121 122 movq %rdx,%rcx 123 124 movl $64,%ebx 125 shrq $6,%rdx 126 decq %rdx 127 js .Lhandle_tail 128 129 .p2align 4 130.Lloop: 131.Ls1: movq (%rsi),%r11 132.Ls2: movq 1*8(%rsi),%r8 133.Ls3: movq 2*8(%rsi),%r9 134.Ls4: movq 3*8(%rsi),%r10 135.Ld1: movq %r11,(%rdi) 136.Ld2: movq %r8,1*8(%rdi) 137.Ld3: movq %r9,2*8(%rdi) 138.Ld4: movq %r10,3*8(%rdi) 139 140.Ls5: movq 4*8(%rsi),%r11 141.Ls6: movq 5*8(%rsi),%r8 142.Ls7: movq 6*8(%rsi),%r9 143.Ls8: movq 7*8(%rsi),%r10 144.Ld5: movq %r11,4*8(%rdi) 145.Ld6: movq %r8,5*8(%rdi) 146.Ld7: movq %r9,6*8(%rdi) 147.Ld8: movq %r10,7*8(%rdi) 148 149 decq %rdx 150 151 leaq 64(%rsi),%rsi 152 leaq 64(%rdi),%rdi 153 154 jns .Lloop 155 156 .p2align 4 157.Lhandle_tail: 158 movl %ecx,%edx 159 andl $63,%ecx 160 shrl $3,%ecx 161 jz .Lhandle_7 162 movl $8,%ebx 163 .p2align 4 164.Lloop_8: 165.Ls9: movq (%rsi),%r8 166.Ld9: movq %r8,(%rdi) 167 decl %ecx 168 leaq 8(%rdi),%rdi 169 leaq 8(%rsi),%rsi 170 jnz .Lloop_8 171 172.Lhandle_7: 173 movl %edx,%ecx 174 andl $7,%ecx 175 jz .Lende 176 .p2align 4 177.Lloop_1: 178.Ls10: movb (%rsi),%bl 179.Ld10: movb %bl,(%rdi) 180 incq %rdi 181 incq %rsi 182 decl %ecx 183 jnz .Lloop_1 184 185 CFI_REMEMBER_STATE 186.Lende: 187 popq %rcx 188 CFI_ADJUST_CFA_OFFSET -8 189 CFI_RESTORE rcx 190 popq %rbx 191 CFI_ADJUST_CFA_OFFSET -8 192 CFI_RESTORE rbx 193 ret 194 CFI_RESTORE_STATE 195 196#ifdef FIX_ALIGNMENT 197 /* align destination */ 198 .p2align 4 199.Lbad_alignment: 200 movl $8,%r9d 201 subl %ecx,%r9d 202 movl %r9d,%ecx 203 cmpq %r9,%rdx 204 jz .Lhandle_7 205 js .Lhandle_7 206.Lalign_1: 207.Ls11: movb (%rsi),%bl 208.Ld11: movb %bl,(%rdi) 209 incq %rsi 210 incq %rdi 211 decl %ecx 212 jnz .Lalign_1 213 subq %r9,%rdx 214 jmp .Lafter_bad_alignment 215#endif 216 217 /* table sorted by exception address */ 218 .section __ex_table,"a" 219 .align 8 220 .quad .Ls1,.Ls1e 221 .quad .Ls2,.Ls2e 222 .quad .Ls3,.Ls3e 223 .quad .Ls4,.Ls4e 224 .quad .Ld1,.Ls1e 225 .quad .Ld2,.Ls2e 226 .quad .Ld3,.Ls3e 227 .quad .Ld4,.Ls4e 228 .quad .Ls5,.Ls5e 229 .quad .Ls6,.Ls6e 230 .quad .Ls7,.Ls7e 231 .quad .Ls8,.Ls8e 232 .quad .Ld5,.Ls5e 233 .quad .Ld6,.Ls6e 234 .quad .Ld7,.Ls7e 235 .quad .Ld8,.Ls8e 236 .quad .Ls9,.Le_quad 237 .quad .Ld9,.Le_quad 238 .quad .Ls10,.Le_byte 239 .quad .Ld10,.Le_byte 240#ifdef FIX_ALIGNMENT 241 .quad .Ls11,.Lzero_rest 242 .quad .Ld11,.Lzero_rest 243#endif 244 .quad .Le5,.Le_zero 245 .previous 246 247 /* compute 64-offset for main loop. 8 bytes accuracy with error on the 248 pessimistic side. this is gross. it would be better to fix the 249 interface. */ 250 /* eax: zero, ebx: 64 */ 251.Ls1e: addl $8,%eax 252.Ls2e: addl $8,%eax 253.Ls3e: addl $8,%eax 254.Ls4e: addl $8,%eax 255.Ls5e: addl $8,%eax 256.Ls6e: addl $8,%eax 257.Ls7e: addl $8,%eax 258.Ls8e: addl $8,%eax 259 addq %rbx,%rdi /* +64 */ 260 subq %rax,%rdi /* correct destination with computed offset */ 261 262 shlq $6,%rdx /* loop counter * 64 (stride length) */ 263 addq %rax,%rdx /* add offset to loopcnt */ 264 andl $63,%ecx /* remaining bytes */ 265 addq %rcx,%rdx /* add them */ 266 jmp .Lzero_rest 267 268 /* exception on quad word loop in tail handling */ 269 /* ecx: loopcnt/8, %edx: length, rdi: correct */ 270.Le_quad: 271 shll $3,%ecx 272 andl $7,%edx 273 addl %ecx,%edx 274 /* edx: bytes to zero, rdi: dest, eax:zero */ 275.Lzero_rest: 276 cmpl $0,(%rsp) 277 jz .Le_zero 278 movq %rdx,%rcx 279.Le_byte: 280 xorl %eax,%eax 281.Le5: rep 282 stosb 283 /* when there is another exception while zeroing the rest just return */ 284.Le_zero: 285 movq %rdx,%rax 286 jmp .Lende 287 CFI_ENDPROC 288ENDPROC(copy_user_generic) 289 290 291 /* Some CPUs run faster using the string copy instructions. 292 This is also a lot simpler. Use them when possible. 293 Patch in jmps to this code instead of copying it fully 294 to avoid unwanted aliasing in the exception tables. */ 295 296 /* rdi destination 297 * rsi source 298 * rdx count 299 * ecx zero flag 300 * 301 * Output: 302 * eax uncopied bytes or 0 if successfull. 303 * 304 * Only 4GB of copy is supported. This shouldn't be a problem 305 * because the kernel normally only writes from/to page sized chunks 306 * even if user space passed a longer buffer. 307 * And more would be dangerous because both Intel and AMD have 308 * errata with rep movsq > 4GB. If someone feels the need to fix 309 * this please consider this. 310 */ 311ENTRY(copy_user_generic_string) 312 CFI_STARTPROC 313 movl %ecx,%r8d /* save zero flag */ 314 movl %edx,%ecx 315 shrl $3,%ecx 316 andl $7,%edx 317 jz 10f 3181: rep 319 movsq 320 movl %edx,%ecx 3212: rep 322 movsb 3239: movl %ecx,%eax 324 ret 325 326 /* multiple of 8 byte */ 32710: rep 328 movsq 329 xor %eax,%eax 330 ret 331 332 /* exception handling */ 3333: lea (%rdx,%rcx,8),%rax /* exception on quad loop */ 334 jmp 6f 3355: movl %ecx,%eax /* exception on byte loop */ 336 /* eax: left over bytes */ 3376: testl %r8d,%r8d /* zero flag set? */ 338 jz 7f 339 movl %eax,%ecx /* initialize x86 loop counter */ 340 push %rax 341 xorl %eax,%eax 3428: rep 343 stosb /* zero the rest */ 34411: pop %rax 3457: ret 346 CFI_ENDPROC 347END(copy_user_generic_c) 348 349 .section __ex_table,"a" 350 .quad 1b,3b 351 .quad 2b,5b 352 .quad 8b,11b 353 .quad 10b,3b 354 .previous 355