1/* 2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs. 3 * 4 * This file is subject to the terms and conditions of the GNU General Public 5 * License. See the file COPYING in the main directory of this archive 6 * for more details. No warranty for anything given at all. 7 */ 8#include <linux/linkage.h> 9#include <asm/errno.h> 10#include <asm/asm.h> 11 12/* 13 * Checksum copy with exception handling. 14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the 15 * destination is zeroed. 16 * 17 * Input 18 * rdi source 19 * rsi destination 20 * edx len (32bit) 21 * ecx sum (32bit) 22 * r8 src_err_ptr (int) 23 * r9 dst_err_ptr (int) 24 * 25 * Output 26 * eax 64bit sum. undefined in case of exception. 27 * 28 * Wrappers need to take care of valid exception sum and zeroing. 29 * They also should align source or destination to 8 bytes. 30 */ 31 32 .macro source 3310: 34 _ASM_EXTABLE_UA(10b, .Lbad_source) 35 .endm 36 37 .macro dest 3820: 39 _ASM_EXTABLE_UA(20b, .Lbad_dest) 40 .endm 41 42 /* 43 * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a 44 * potentially unmapped kernel address. 45 */ 46 .macro ignore L=.Lignore 4730: 48 _ASM_EXTABLE(30b, \L) 49 .endm 50 51 52ENTRY(csum_partial_copy_generic) 53 cmpl $3*64, %edx 54 jle .Lignore 55 56.Lignore: 57 subq $7*8, %rsp 58 movq %rbx, 2*8(%rsp) 59 movq %r12, 3*8(%rsp) 60 movq %r14, 4*8(%rsp) 61 movq %r13, 5*8(%rsp) 62 movq %r15, 6*8(%rsp) 63 64 movq %r8, (%rsp) 65 movq %r9, 1*8(%rsp) 66 67 movl %ecx, %eax 68 movl %edx, %ecx 69 70 xorl %r9d, %r9d 71 movq %rcx, %r12 72 73 shrq $6, %r12 74 jz .Lhandle_tail /* < 64 */ 75 76 clc 77 78 /* main loop. clear in 64 byte blocks */ 79 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ 80 /* r11: temp3, rdx: temp4, r12 loopcnt */ 81 /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */ 82 .p2align 4 83.Lloop: 84 source 85 movq (%rdi), %rbx 86 source 87 movq 8(%rdi), %r8 88 source 89 movq 16(%rdi), %r11 90 source 91 movq 24(%rdi), %rdx 92 93 source 94 movq 32(%rdi), %r10 95 source 96 movq 40(%rdi), %r15 97 source 98 movq 48(%rdi), %r14 99 source 100 movq 56(%rdi), %r13 101 102 ignore 2f 103 prefetcht0 5*64(%rdi) 1042: 105 adcq %rbx, %rax 106 adcq %r8, %rax 107 adcq %r11, %rax 108 adcq %rdx, %rax 109 adcq %r10, %rax 110 adcq %r15, %rax 111 adcq %r14, %rax 112 adcq %r13, %rax 113 114 decl %r12d 115 116 dest 117 movq %rbx, (%rsi) 118 dest 119 movq %r8, 8(%rsi) 120 dest 121 movq %r11, 16(%rsi) 122 dest 123 movq %rdx, 24(%rsi) 124 125 dest 126 movq %r10, 32(%rsi) 127 dest 128 movq %r15, 40(%rsi) 129 dest 130 movq %r14, 48(%rsi) 131 dest 132 movq %r13, 56(%rsi) 133 1343: 135 136 leaq 64(%rdi), %rdi 137 leaq 64(%rsi), %rsi 138 139 jnz .Lloop 140 141 adcq %r9, %rax 142 143 /* do last up to 56 bytes */ 144.Lhandle_tail: 145 /* ecx: count */ 146 movl %ecx, %r10d 147 andl $63, %ecx 148 shrl $3, %ecx 149 jz .Lfold 150 clc 151 .p2align 4 152.Lloop_8: 153 source 154 movq (%rdi), %rbx 155 adcq %rbx, %rax 156 decl %ecx 157 dest 158 movq %rbx, (%rsi) 159 leaq 8(%rsi), %rsi /* preserve carry */ 160 leaq 8(%rdi), %rdi 161 jnz .Lloop_8 162 adcq %r9, %rax /* add in carry */ 163 164.Lfold: 165 /* reduce checksum to 32bits */ 166 movl %eax, %ebx 167 shrq $32, %rax 168 addl %ebx, %eax 169 adcl %r9d, %eax 170 171 /* do last up to 6 bytes */ 172.Lhandle_7: 173 movl %r10d, %ecx 174 andl $7, %ecx 175 shrl $1, %ecx 176 jz .Lhandle_1 177 movl $2, %edx 178 xorl %ebx, %ebx 179 clc 180 .p2align 4 181.Lloop_1: 182 source 183 movw (%rdi), %bx 184 adcl %ebx, %eax 185 decl %ecx 186 dest 187 movw %bx, (%rsi) 188 leaq 2(%rdi), %rdi 189 leaq 2(%rsi), %rsi 190 jnz .Lloop_1 191 adcl %r9d, %eax /* add in carry */ 192 193 /* handle last odd byte */ 194.Lhandle_1: 195 testb $1, %r10b 196 jz .Lende 197 xorl %ebx, %ebx 198 source 199 movb (%rdi), %bl 200 dest 201 movb %bl, (%rsi) 202 addl %ebx, %eax 203 adcl %r9d, %eax /* carry */ 204 205.Lende: 206 movq 2*8(%rsp), %rbx 207 movq 3*8(%rsp), %r12 208 movq 4*8(%rsp), %r14 209 movq 5*8(%rsp), %r13 210 movq 6*8(%rsp), %r15 211 addq $7*8, %rsp 212 ret 213 214 /* Exception handlers. Very simple, zeroing is done in the wrappers */ 215.Lbad_source: 216 movq (%rsp), %rax 217 testq %rax, %rax 218 jz .Lende 219 movl $-EFAULT, (%rax) 220 jmp .Lende 221 222.Lbad_dest: 223 movq 8(%rsp), %rax 224 testq %rax, %rax 225 jz .Lende 226 movl $-EFAULT, (%rax) 227 jmp .Lende 228ENDPROC(csum_partial_copy_generic) 229