xref: /openbmc/linux/arch/sh/lib/checksum.S (revision f97cee494dc92395a668445bcd24d34c89f4ff8c)
1/* SPDX-License-Identifier: GPL-2.0+
2 *
3 * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
4 *
5 * INET		An implementation of the TCP/IP protocol suite for the LINUX
6 *		operating system.  INET is implemented using the  BSD Socket
7 *		interface as the means of communication with the user level.
8 *
9 *		IP/TCP/UDP checksumming routines
10 *
11 * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
12 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
13 *		Tom May, <ftom@netcom.com>
14 *              Pentium Pro/II routines:
15 *              Alexander Kjeldaas <astor@guardian.no>
16 *              Finn Arne Gangstad <finnag@guardian.no>
17 *		Lots of code moved from tcp.c and ip.c; see those files
18 *		for more names.
19 *
20 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
21 *			     handling.
22 *		Andi Kleen,  add zeroing on error
23 *                   converted to pure assembler
24 *
25 * SuperH version:  Copyright (C) 1999  Niibe Yutaka
26 */
27
28#include <asm/errno.h>
29#include <linux/linkage.h>
30
31/*
32 * computes a partial checksum, e.g. for TCP/UDP fragments
33 */
34
35/*
36 * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
37 */
38
39.text
40ENTRY(csum_partial)
41	  /*
42	   * Experiments with Ethernet and SLIP connections show that buff
43	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
44	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
45	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
46	   * alignment for the unrolled loop.
47	   */
48	mov	r4, r0
49	tst	#3, r0		! Check alignment.
50	bt/s	2f		! Jump if alignment is ok.
51	 mov	r4, r7		! Keep a copy to check for alignment
52	!
53	tst	#1, r0		! Check alignment.
54	bt	21f		! Jump if alignment is boundary of 2bytes.
55
56	! buf is odd
57	tst	r5, r5
58	add	#-1, r5
59	bt	9f
60	mov.b	@r4+, r0
61	extu.b	r0, r0
62	addc	r0, r6		! t=0 from previous tst
63	mov	r6, r0
64	shll8	r6
65	shlr16	r0
66	shlr8	r0
67	or	r0, r6
68	mov	r4, r0
69	tst	#2, r0
70	bt	2f
7121:
72	! buf is 2 byte aligned (len could be 0)
73	add	#-2, r5		! Alignment uses up two bytes.
74	cmp/pz	r5		!
75	bt/s	1f		! Jump if we had at least two bytes.
76	 clrt
77	bra	6f
78	 add	#2, r5		! r5 was < 2.  Deal with it.
791:
80	mov.w	@r4+, r0
81	extu.w	r0, r0
82	addc	r0, r6
83	bf	2f
84	add	#1, r6
852:
86	! buf is 4 byte aligned (len could be 0)
87	mov	r5, r1
88	mov	#-5, r0
89	shld	r0, r1
90	tst	r1, r1
91	bt/s	4f		! if it's =0, go to 4f
92	 clrt
93	.align	2
943:
95	mov.l	@r4+, r0
96	mov.l	@r4+, r2
97	mov.l	@r4+, r3
98	addc	r0, r6
99	mov.l	@r4+, r0
100	addc	r2, r6
101	mov.l	@r4+, r2
102	addc	r3, r6
103	mov.l	@r4+, r3
104	addc	r0, r6
105	mov.l	@r4+, r0
106	addc	r2, r6
107	mov.l	@r4+, r2
108	addc	r3, r6
109	addc	r0, r6
110	addc	r2, r6
111	movt	r0
112	dt	r1
113	bf/s	3b
114	 cmp/eq	#1, r0
115	! here, we know r1==0
116	addc	r1, r6			! add carry to r6
1174:
118	mov	r5, r0
119	and	#0x1c, r0
120	tst	r0, r0
121	bt	6f
122	! 4 bytes or more remaining
123	mov	r0, r1
124	shlr2	r1
125	mov	#0, r2
1265:
127	addc	r2, r6
128	mov.l	@r4+, r2
129	movt	r0
130	dt	r1
131	bf/s	5b
132	 cmp/eq	#1, r0
133	addc	r2, r6
134	addc	r1, r6		! r1==0 here, so it means add carry-bit
1356:
136	! 3 bytes or less remaining
137	mov	#3, r0
138	and	r0, r5
139	tst	r5, r5
140	bt	9f		! if it's =0 go to 9f
141	mov	#2, r1
142	cmp/hs  r1, r5
143	bf	7f
144	mov.w	@r4+, r0
145	extu.w	r0, r0
146	cmp/eq	r1, r5
147	bt/s	8f
148	 clrt
149	shll16	r0
150	addc	r0, r6
1517:
152	mov.b	@r4+, r0
153	extu.b	r0, r0
154#ifndef	__LITTLE_ENDIAN__
155	shll8	r0
156#endif
1578:
158	addc	r0, r6
159	mov	#0, r0
160	addc	r0, r6
1619:
162	! Check if the buffer was misaligned, if so realign sum
163	mov	r7, r0
164	tst	#1, r0
165	bt	10f
166	mov	r6, r0
167	shll8	r6
168	shlr16	r0
169	shlr8	r0
170	or	r0, r6
17110:
172	rts
173	 mov	r6, r0
174
175/*
176unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
177					int sum, int *src_err_ptr, int *dst_err_ptr)
178 */
179
180/*
181 * Copy from ds while checksumming, otherwise like csum_partial
182 *
183 * The macros SRC and DST specify the type of access for the instruction.
184 * thus we can call a custom exception handler for all access types.
185 *
186 * FIXME: could someone double-check whether I haven't mixed up some SRC and
187 *	  DST definitions? It's damn hard to trigger all cases.  I hope I got
188 *	  them all but there's no guarantee.
189 */
190
191#define SRC(...)			\
192	9999: __VA_ARGS__ ;		\
193	.section __ex_table, "a";	\
194	.long 9999b, 6001f	;	\
195	.previous
196
197#define DST(...)			\
198	9999: __VA_ARGS__ ;		\
199	.section __ex_table, "a";	\
200	.long 9999b, 6002f	;	\
201	.previous
202
203!
204! r4:	const char *SRC
205! r5:	char *DST
206! r6:	int LEN
207! r7:	int SUM
208!
209! on stack:
210! int *SRC_ERR_PTR
211! int *DST_ERR_PTR
212!
213ENTRY(csum_partial_copy_generic)
214	mov.l	r5,@-r15
215	mov.l	r6,@-r15
216
217	mov	#3,r0		! Check src and dest are equally aligned
218	mov	r4,r1
219	and	r0,r1
220	and	r5,r0
221	cmp/eq	r1,r0
222	bf	3f		! Different alignments, use slow version
223	tst	#1,r0		! Check dest word aligned
224	bf	3f		! If not, do it the slow way
225
226	mov	#2,r0
227	tst	r0,r5		! Check dest alignment.
228	bt	2f		! Jump if alignment is ok.
229	add	#-2,r6		! Alignment uses up two bytes.
230	cmp/pz	r6		! Jump if we had at least two bytes.
231	bt/s	1f
232	 clrt
233	add	#2,r6		! r6 was < 2.	Deal with it.
234	bra	4f
235	 mov	r6,r2
236
2373:	! Handle different src and dest alignments.
238	! This is not common, so simple byte by byte copy will do.
239	mov	r6,r2
240	shlr	r6
241	tst	r6,r6
242	bt	4f
243	clrt
244	.align	2
2455:
246SRC(	mov.b	@r4+,r1 	)
247SRC(	mov.b	@r4+,r0		)
248	extu.b	r1,r1
249DST(	mov.b	r1,@r5		)
250DST(	mov.b	r0,@(1,r5)	)
251	extu.b	r0,r0
252	add	#2,r5
253
254#ifdef	__LITTLE_ENDIAN__
255	shll8	r0
256#else
257	shll8	r1
258#endif
259	or	r1,r0
260
261	addc	r0,r7
262	movt	r0
263	dt	r6
264	bf/s	5b
265	 cmp/eq	#1,r0
266	mov	#0,r0
267	addc	r0, r7
268
269	mov	r2, r0
270	tst	#1, r0
271	bt	7f
272	bra	5f
273	 clrt
274
275	! src and dest equally aligned, but to a two byte boundary.
276	! Handle first two bytes as a special case
277	.align	2
2781:
279SRC(	mov.w	@r4+,r0		)
280DST(	mov.w	r0,@r5		)
281	add	#2,r5
282	extu.w	r0,r0
283	addc	r0,r7
284	mov	#0,r0
285	addc	r0,r7
2862:
287	mov	r6,r2
288	mov	#-5,r0
289	shld	r0,r6
290	tst	r6,r6
291	bt/s	2f
292	 clrt
293	.align	2
2941:
295SRC(	mov.l	@r4+,r0		)
296SRC(	mov.l	@r4+,r1		)
297	addc	r0,r7
298DST(	mov.l	r0,@r5		)
299DST(	mov.l	r1,@(4,r5)	)
300	addc	r1,r7
301
302SRC(	mov.l	@r4+,r0		)
303SRC(	mov.l	@r4+,r1		)
304	addc	r0,r7
305DST(	mov.l	r0,@(8,r5)	)
306DST(	mov.l	r1,@(12,r5)	)
307	addc	r1,r7
308
309SRC(	mov.l	@r4+,r0 	)
310SRC(	mov.l	@r4+,r1		)
311	addc	r0,r7
312DST(	mov.l	r0,@(16,r5)	)
313DST(	mov.l	r1,@(20,r5)	)
314	addc	r1,r7
315
316SRC(	mov.l	@r4+,r0		)
317SRC(	mov.l	@r4+,r1		)
318	addc	r0,r7
319DST(	mov.l	r0,@(24,r5)	)
320DST(	mov.l	r1,@(28,r5)	)
321	addc	r1,r7
322	add	#32,r5
323	movt	r0
324	dt	r6
325	bf/s	1b
326	 cmp/eq	#1,r0
327	mov	#0,r0
328	addc	r0,r7
329
3302:	mov	r2,r6
331	mov	#0x1c,r0
332	and	r0,r6
333	cmp/pl	r6
334	bf/s	4f
335	 clrt
336	shlr2	r6
3373:
338SRC(	mov.l	@r4+,r0	)
339	addc	r0,r7
340DST(	mov.l	r0,@r5	)
341	add	#4,r5
342	movt	r0
343	dt	r6
344	bf/s	3b
345	 cmp/eq	#1,r0
346	mov	#0,r0
347	addc	r0,r7
3484:	mov	r2,r6
349	mov	#3,r0
350	and	r0,r6
351	cmp/pl	r6
352	bf	7f
353	mov	#2,r1
354	cmp/hs	r1,r6
355	bf	5f
356SRC(	mov.w	@r4+,r0	)
357DST(	mov.w	r0,@r5	)
358	extu.w	r0,r0
359	add	#2,r5
360	cmp/eq	r1,r6
361	bt/s	6f
362	 clrt
363	shll16	r0
364	addc	r0,r7
3655:
366SRC(	mov.b	@r4+,r0	)
367DST(	mov.b	r0,@r5	)
368	extu.b	r0,r0
369#ifndef	__LITTLE_ENDIAN__
370	shll8	r0
371#endif
3726:	addc	r0,r7
373	mov	#0,r0
374	addc	r0,r7
3757:
3765000:
377
378# Exception handler:
379.section .fixup, "ax"
380
3816001:
382	mov.l	@(8,r15),r0			! src_err_ptr
383	mov	#-EFAULT,r1
384	mov.l	r1,@r0
385
386	! zero the complete destination - computing the rest
387	! is too much work
388	mov.l	@(4,r15),r5		! dst
389	mov.l	@r15,r6			! len
390	mov	#0,r7
3911:	mov.b	r7,@r5
392	dt	r6
393	bf/s	1b
394	 add	#1,r5
395	mov.l	8000f,r0
396	jmp	@r0
397	 nop
398	.align	2
3998000:	.long	5000b
400
4016002:
402	mov.l	@(12,r15),r0			! dst_err_ptr
403	mov	#-EFAULT,r1
404	mov.l	r1,@r0
405	mov.l	8001f,r0
406	jmp	@r0
407	 nop
408	.align	2
4098001:	.long	5000b
410
411.previous
412	add	#8,r15
413	rts
414	 mov	r7,r0
415