1/* 2 * Copyright 2002,2003 Andi Kleen, SuSE Labs. 3 * 4 * This file is subject to the terms and conditions of the GNU General Public 5 * License. See the file COPYING in the main directory of this archive 6 * for more details. No warranty for anything given at all. 7 */ 8#include <linux/linkage.h> 9#include <asm/dwarf2.h> 10#include <asm/errno.h> 11 12/* 13 * Checksum copy with exception handling. 14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the 15 * destination is zeroed. 16 * 17 * Input 18 * rdi source 19 * rsi destination 20 * edx len (32bit) 21 * ecx sum (32bit) 22 * r8 src_err_ptr (int) 23 * r9 dst_err_ptr (int) 24 * 25 * Output 26 * eax 64bit sum. undefined in case of exception. 27 * 28 * Wrappers need to take care of valid exception sum and zeroing. 29 * They also should align source or destination to 8 bytes. 30 */ 31 32 .macro source 3310: 34 .section __ex_table,"a" 35 .align 8 36 .quad 10b,.Lbad_source 37 .previous 38 .endm 39 40 .macro dest 4120: 42 .section __ex_table,"a" 43 .align 8 44 .quad 20b,.Lbad_dest 45 .previous 46 .endm 47 48 .macro ignore L=.Lignore 4930: 50 .section __ex_table,"a" 51 .align 8 52 .quad 30b,\L 53 .previous 54 .endm 55 56 57ENTRY(csum_partial_copy_generic) 58 CFI_STARTPROC 59 cmpl $3*64,%edx 60 jle .Lignore 61 62.Lignore: 63 subq $7*8,%rsp 64 CFI_ADJUST_CFA_OFFSET 7*8 65 movq %rbx,2*8(%rsp) 66 CFI_REL_OFFSET rbx, 2*8 67 movq %r12,3*8(%rsp) 68 CFI_REL_OFFSET r12, 3*8 69 movq %r14,4*8(%rsp) 70 CFI_REL_OFFSET r14, 4*8 71 movq %r13,5*8(%rsp) 72 CFI_REL_OFFSET r13, 5*8 73 movq %rbp,6*8(%rsp) 74 CFI_REL_OFFSET rbp, 6*8 75 76 movq %r8,(%rsp) 77 movq %r9,1*8(%rsp) 78 79 movl %ecx,%eax 80 movl %edx,%ecx 81 82 xorl %r9d,%r9d 83 movq %rcx,%r12 84 85 shrq $6,%r12 86 jz .Lhandle_tail /* < 64 */ 87 88 clc 89 90 /* main loop. clear in 64 byte blocks */ 91 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ 92 /* r11: temp3, rdx: temp4, r12 loopcnt */ 93 /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */ 94 .p2align 4 95.Lloop: 96 source 97 movq (%rdi),%rbx 98 source 99 movq 8(%rdi),%r8 100 source 101 movq 16(%rdi),%r11 102 source 103 movq 24(%rdi),%rdx 104 105 source 106 movq 32(%rdi),%r10 107 source 108 movq 40(%rdi),%rbp 109 source 110 movq 48(%rdi),%r14 111 source 112 movq 56(%rdi),%r13 113 114 ignore 2f 115 prefetcht0 5*64(%rdi) 1162: 117 adcq %rbx,%rax 118 adcq %r8,%rax 119 adcq %r11,%rax 120 adcq %rdx,%rax 121 adcq %r10,%rax 122 adcq %rbp,%rax 123 adcq %r14,%rax 124 adcq %r13,%rax 125 126 decl %r12d 127 128 dest 129 movq %rbx,(%rsi) 130 dest 131 movq %r8,8(%rsi) 132 dest 133 movq %r11,16(%rsi) 134 dest 135 movq %rdx,24(%rsi) 136 137 dest 138 movq %r10,32(%rsi) 139 dest 140 movq %rbp,40(%rsi) 141 dest 142 movq %r14,48(%rsi) 143 dest 144 movq %r13,56(%rsi) 145 1463: 147 148 leaq 64(%rdi),%rdi 149 leaq 64(%rsi),%rsi 150 151 jnz .Lloop 152 153 adcq %r9,%rax 154 155 /* do last upto 56 bytes */ 156.Lhandle_tail: 157 /* ecx: count */ 158 movl %ecx,%r10d 159 andl $63,%ecx 160 shrl $3,%ecx 161 jz .Lfold 162 clc 163 .p2align 4 164.Lloop_8: 165 source 166 movq (%rdi),%rbx 167 adcq %rbx,%rax 168 decl %ecx 169 dest 170 movq %rbx,(%rsi) 171 leaq 8(%rsi),%rsi /* preserve carry */ 172 leaq 8(%rdi),%rdi 173 jnz .Lloop_8 174 adcq %r9,%rax /* add in carry */ 175 176.Lfold: 177 /* reduce checksum to 32bits */ 178 movl %eax,%ebx 179 shrq $32,%rax 180 addl %ebx,%eax 181 adcl %r9d,%eax 182 183 /* do last upto 6 bytes */ 184.Lhandle_7: 185 movl %r10d,%ecx 186 andl $7,%ecx 187 shrl $1,%ecx 188 jz .Lhandle_1 189 movl $2,%edx 190 xorl %ebx,%ebx 191 clc 192 .p2align 4 193.Lloop_1: 194 source 195 movw (%rdi),%bx 196 adcl %ebx,%eax 197 decl %ecx 198 dest 199 movw %bx,(%rsi) 200 leaq 2(%rdi),%rdi 201 leaq 2(%rsi),%rsi 202 jnz .Lloop_1 203 adcl %r9d,%eax /* add in carry */ 204 205 /* handle last odd byte */ 206.Lhandle_1: 207 testl $1,%r10d 208 jz .Lende 209 xorl %ebx,%ebx 210 source 211 movb (%rdi),%bl 212 dest 213 movb %bl,(%rsi) 214 addl %ebx,%eax 215 adcl %r9d,%eax /* carry */ 216 217 CFI_REMEMBER_STATE 218.Lende: 219 movq 2*8(%rsp),%rbx 220 CFI_RESTORE rbx 221 movq 3*8(%rsp),%r12 222 CFI_RESTORE r12 223 movq 4*8(%rsp),%r14 224 CFI_RESTORE r14 225 movq 5*8(%rsp),%r13 226 CFI_RESTORE r13 227 movq 6*8(%rsp),%rbp 228 CFI_RESTORE rbp 229 addq $7*8,%rsp 230 CFI_ADJUST_CFA_OFFSET -7*8 231 ret 232 CFI_RESTORE_STATE 233 234 /* Exception handlers. Very simple, zeroing is done in the wrappers */ 235.Lbad_source: 236 movq (%rsp),%rax 237 testq %rax,%rax 238 jz .Lende 239 movl $-EFAULT,(%rax) 240 jmp .Lende 241 242.Lbad_dest: 243 movq 8(%rsp),%rax 244 testq %rax,%rax 245 jz .Lende 246 movl $-EFAULT,(%rax) 247 jmp .Lende 248 CFI_ENDPROC 249ENDPROC(csum_partial_copy_generic) 250