xref: /openbmc/linux/arch/x86/lib/csum-copy_64.S (revision e23feb16)
1/*
2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License.  See the file COPYING in the main directory of this archive
6 * for more details. No warranty for anything given at all.
7 */
8#include <linux/linkage.h>
9#include <asm/dwarf2.h>
10#include <asm/errno.h>
11#include <asm/asm.h>
12
13/*
14 * Checksum copy with exception handling.
15 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
16 * destination is zeroed.
17 *
18 * Input
19 * rdi  source
20 * rsi  destination
21 * edx  len (32bit)
22 * ecx  sum (32bit)
23 * r8   src_err_ptr (int)
24 * r9   dst_err_ptr (int)
25 *
26 * Output
27 * eax  64bit sum. undefined in case of exception.
28 *
29 * Wrappers need to take care of valid exception sum and zeroing.
30 * They also should align source or destination to 8 bytes.
31 */
32
33	.macro source
3410:
35	_ASM_EXTABLE(10b, .Lbad_source)
36	.endm
37
38	.macro dest
3920:
40	_ASM_EXTABLE(20b, .Lbad_dest)
41	.endm
42
43	.macro ignore L=.Lignore
4430:
45	_ASM_EXTABLE(30b, \L)
46	.endm
47
48
49ENTRY(csum_partial_copy_generic)
50	CFI_STARTPROC
51	cmpl	$3*64, %edx
52	jle	.Lignore
53
54.Lignore:
55	subq  $7*8, %rsp
56	CFI_ADJUST_CFA_OFFSET 7*8
57	movq  %rbx, 2*8(%rsp)
58	CFI_REL_OFFSET rbx, 2*8
59	movq  %r12, 3*8(%rsp)
60	CFI_REL_OFFSET r12, 3*8
61	movq  %r14, 4*8(%rsp)
62	CFI_REL_OFFSET r14, 4*8
63	movq  %r13, 5*8(%rsp)
64	CFI_REL_OFFSET r13, 5*8
65	movq  %rbp, 6*8(%rsp)
66	CFI_REL_OFFSET rbp, 6*8
67
68	movq  %r8, (%rsp)
69	movq  %r9, 1*8(%rsp)
70
71	movl  %ecx, %eax
72	movl  %edx, %ecx
73
74	xorl  %r9d, %r9d
75	movq  %rcx, %r12
76
77	shrq  $6, %r12
78	jz	.Lhandle_tail       /* < 64 */
79
80	clc
81
82	/* main loop. clear in 64 byte blocks */
83	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
84	/* r11:	temp3, rdx: temp4, r12 loopcnt */
85	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */
86	.p2align 4
87.Lloop:
88	source
89	movq  (%rdi), %rbx
90	source
91	movq  8(%rdi), %r8
92	source
93	movq  16(%rdi), %r11
94	source
95	movq  24(%rdi), %rdx
96
97	source
98	movq  32(%rdi), %r10
99	source
100	movq  40(%rdi), %rbp
101	source
102	movq  48(%rdi), %r14
103	source
104	movq  56(%rdi), %r13
105
106	ignore 2f
107	prefetcht0 5*64(%rdi)
1082:
109	adcq  %rbx, %rax
110	adcq  %r8, %rax
111	adcq  %r11, %rax
112	adcq  %rdx, %rax
113	adcq  %r10, %rax
114	adcq  %rbp, %rax
115	adcq  %r14, %rax
116	adcq  %r13, %rax
117
118	decl %r12d
119
120	dest
121	movq %rbx, (%rsi)
122	dest
123	movq %r8, 8(%rsi)
124	dest
125	movq %r11, 16(%rsi)
126	dest
127	movq %rdx, 24(%rsi)
128
129	dest
130	movq %r10, 32(%rsi)
131	dest
132	movq %rbp, 40(%rsi)
133	dest
134	movq %r14, 48(%rsi)
135	dest
136	movq %r13, 56(%rsi)
137
1383:
139
140	leaq 64(%rdi), %rdi
141	leaq 64(%rsi), %rsi
142
143	jnz	.Lloop
144
145	adcq  %r9, %rax
146
147	/* do last up to 56 bytes */
148.Lhandle_tail:
149	/* ecx:	count */
150	movl %ecx, %r10d
151	andl $63, %ecx
152	shrl $3, %ecx
153	jz	.Lfold
154	clc
155	.p2align 4
156.Lloop_8:
157	source
158	movq (%rdi), %rbx
159	adcq %rbx, %rax
160	decl %ecx
161	dest
162	movq %rbx, (%rsi)
163	leaq 8(%rsi), %rsi /* preserve carry */
164	leaq 8(%rdi), %rdi
165	jnz	.Lloop_8
166	adcq %r9, %rax	/* add in carry */
167
168.Lfold:
169	/* reduce checksum to 32bits */
170	movl %eax, %ebx
171	shrq $32, %rax
172	addl %ebx, %eax
173	adcl %r9d, %eax
174
175	/* do last up to 6 bytes */
176.Lhandle_7:
177	movl %r10d, %ecx
178	andl $7, %ecx
179	shrl $1, %ecx
180	jz   .Lhandle_1
181	movl $2, %edx
182	xorl %ebx, %ebx
183	clc
184	.p2align 4
185.Lloop_1:
186	source
187	movw (%rdi), %bx
188	adcl %ebx, %eax
189	decl %ecx
190	dest
191	movw %bx, (%rsi)
192	leaq 2(%rdi), %rdi
193	leaq 2(%rsi), %rsi
194	jnz .Lloop_1
195	adcl %r9d, %eax	/* add in carry */
196
197	/* handle last odd byte */
198.Lhandle_1:
199	testl $1, %r10d
200	jz    .Lende
201	xorl  %ebx, %ebx
202	source
203	movb (%rdi), %bl
204	dest
205	movb %bl, (%rsi)
206	addl %ebx, %eax
207	adcl %r9d, %eax		/* carry */
208
209	CFI_REMEMBER_STATE
210.Lende:
211	movq 2*8(%rsp), %rbx
212	CFI_RESTORE rbx
213	movq 3*8(%rsp), %r12
214	CFI_RESTORE r12
215	movq 4*8(%rsp), %r14
216	CFI_RESTORE r14
217	movq 5*8(%rsp), %r13
218	CFI_RESTORE r13
219	movq 6*8(%rsp), %rbp
220	CFI_RESTORE rbp
221	addq $7*8, %rsp
222	CFI_ADJUST_CFA_OFFSET -7*8
223	ret
224	CFI_RESTORE_STATE
225
226	/* Exception handlers. Very simple, zeroing is done in the wrappers */
227.Lbad_source:
228	movq (%rsp), %rax
229	testq %rax, %rax
230	jz   .Lende
231	movl $-EFAULT, (%rax)
232	jmp  .Lende
233
234.Lbad_dest:
235	movq 8(%rsp), %rax
236	testq %rax, %rax
237	jz   .Lende
238	movl $-EFAULT, (%rax)
239	jmp .Lende
240	CFI_ENDPROC
241ENDPROC(csum_partial_copy_generic)
242