xref: /openbmc/linux/arch/sh/lib/checksum.S (revision 1f29d857)
14494ce4fSKuninori Morimoto/* SPDX-License-Identifier: GPL-2.0+
24494ce4fSKuninori Morimoto *
34494ce4fSKuninori Morimoto * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
41da177e4SLinus Torvalds *
51da177e4SLinus Torvalds * INET		An implementation of the TCP/IP protocol suite for the LINUX
61da177e4SLinus Torvalds *		operating system.  INET is implemented using the  BSD Socket
71da177e4SLinus Torvalds *		interface as the means of communication with the user level.
81da177e4SLinus Torvalds *
91da177e4SLinus Torvalds *		IP/TCP/UDP checksumming routines
101da177e4SLinus Torvalds *
111da177e4SLinus Torvalds * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
121da177e4SLinus Torvalds *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
131da177e4SLinus Torvalds *		Tom May, <ftom@netcom.com>
141da177e4SLinus Torvalds *              Pentium Pro/II routines:
151da177e4SLinus Torvalds *              Alexander Kjeldaas <astor@guardian.no>
161da177e4SLinus Torvalds *              Finn Arne Gangstad <finnag@guardian.no>
171da177e4SLinus Torvalds *		Lots of code moved from tcp.c and ip.c; see those files
181da177e4SLinus Torvalds *		for more names.
191da177e4SLinus Torvalds *
201da177e4SLinus Torvalds * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
211da177e4SLinus Torvalds *			     handling.
221da177e4SLinus Torvalds *		Andi Kleen,  add zeroing on error
231da177e4SLinus Torvalds *                   converted to pure assembler
241da177e4SLinus Torvalds *
251da177e4SLinus Torvalds * SuperH version:  Copyright (C) 1999  Niibe Yutaka
261da177e4SLinus Torvalds */
271da177e4SLinus Torvalds
281da177e4SLinus Torvalds#include <asm/errno.h>
291da177e4SLinus Torvalds#include <linux/linkage.h>
301da177e4SLinus Torvalds
311da177e4SLinus Torvalds/*
321da177e4SLinus Torvalds * computes a partial checksum, e.g. for TCP/UDP fragments
331da177e4SLinus Torvalds */
341da177e4SLinus Torvalds
351da177e4SLinus Torvalds/*
361f29d857SGuenter Roeck * unsigned int csum_partial(const unsigned char *buf, int len,
371f29d857SGuenter Roeck *                           unsigned int sum);
381da177e4SLinus Torvalds */
391da177e4SLinus Torvalds
401da177e4SLinus Torvalds.text
411da177e4SLinus TorvaldsENTRY(csum_partial)
421da177e4SLinus Torvalds	  /*
431da177e4SLinus Torvalds	   * Experiments with Ethernet and SLIP connections show that buff
441da177e4SLinus Torvalds	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
451da177e4SLinus Torvalds	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
461da177e4SLinus Torvalds	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
471da177e4SLinus Torvalds	   * alignment for the unrolled loop.
481da177e4SLinus Torvalds	   */
491f29d857SGuenter Roeck	mov	r5, r1
501da177e4SLinus Torvalds	mov	r4, r0
511f29d857SGuenter Roeck	tst	#2, r0		! Check alignment.
521f29d857SGuenter Roeck	bt	2f		! Jump if alignment is ok.
531da177e4SLinus Torvalds	!
541da177e4SLinus Torvalds	add	#-2, r5		! Alignment uses up two bytes.
551da177e4SLinus Torvalds	cmp/pz	r5		!
561da177e4SLinus Torvalds	bt/s	1f		! Jump if we had at least two bytes.
571da177e4SLinus Torvalds	 clrt
581da177e4SLinus Torvalds	bra	6f
591da177e4SLinus Torvalds	 add	#2, r5		! r5 was < 2.  Deal with it.
601da177e4SLinus Torvalds1:
611f29d857SGuenter Roeck	mov	r5, r1		! Save new len for later use.
621da177e4SLinus Torvalds	mov.w	@r4+, r0
631da177e4SLinus Torvalds	extu.w	r0, r0
641da177e4SLinus Torvalds	addc	r0, r6
651da177e4SLinus Torvalds	bf	2f
661da177e4SLinus Torvalds	add	#1, r6
671da177e4SLinus Torvalds2:
681da177e4SLinus Torvalds	mov	#-5, r0
691f29d857SGuenter Roeck	shld	r0, r5
701f29d857SGuenter Roeck	tst	r5, r5
711da177e4SLinus Torvalds	bt/s	4f		! if it's =0, go to 4f
721da177e4SLinus Torvalds	 clrt
731da177e4SLinus Torvalds	.align	2
741da177e4SLinus Torvalds3:
751da177e4SLinus Torvalds	mov.l	@r4+, r0
761da177e4SLinus Torvalds	mov.l	@r4+, r2
771da177e4SLinus Torvalds	mov.l	@r4+, r3
781da177e4SLinus Torvalds	addc	r0, r6
791da177e4SLinus Torvalds	mov.l	@r4+, r0
801da177e4SLinus Torvalds	addc	r2, r6
811da177e4SLinus Torvalds	mov.l	@r4+, r2
821da177e4SLinus Torvalds	addc	r3, r6
831da177e4SLinus Torvalds	mov.l	@r4+, r3
841da177e4SLinus Torvalds	addc	r0, r6
851da177e4SLinus Torvalds	mov.l	@r4+, r0
861da177e4SLinus Torvalds	addc	r2, r6
871da177e4SLinus Torvalds	mov.l	@r4+, r2
881da177e4SLinus Torvalds	addc	r3, r6
891da177e4SLinus Torvalds	addc	r0, r6
901da177e4SLinus Torvalds	addc	r2, r6
911da177e4SLinus Torvalds	movt	r0
921f29d857SGuenter Roeck	dt	r5
931da177e4SLinus Torvalds	bf/s	3b
941da177e4SLinus Torvalds	 cmp/eq	#1, r0
951f29d857SGuenter Roeck	! here, we know r5==0
961f29d857SGuenter Roeck	addc	r5, r6			! add carry to r6
971da177e4SLinus Torvalds4:
981f29d857SGuenter Roeck	mov	r1, r0
991da177e4SLinus Torvalds	and	#0x1c, r0
1001da177e4SLinus Torvalds	tst	r0, r0
1011f29d857SGuenter Roeck	bt/s	6f
1021f29d857SGuenter Roeck	 mov	r0, r5
1031f29d857SGuenter Roeck	shlr2	r5
1041da177e4SLinus Torvalds	mov	#0, r2
1051da177e4SLinus Torvalds5:
1061da177e4SLinus Torvalds	addc	r2, r6
1071da177e4SLinus Torvalds	mov.l	@r4+, r2
1081da177e4SLinus Torvalds	movt	r0
1091f29d857SGuenter Roeck	dt	r5
1101da177e4SLinus Torvalds	bf/s	5b
1111da177e4SLinus Torvalds	 cmp/eq	#1, r0
1121da177e4SLinus Torvalds	addc	r2, r6
1131f29d857SGuenter Roeck	addc	r5, r6		! r5==0 here, so it means add carry-bit
1141da177e4SLinus Torvalds6:
1151f29d857SGuenter Roeck	mov	r1, r5
1161da177e4SLinus Torvalds	mov	#3, r0
1171da177e4SLinus Torvalds	and	r0, r5
1181da177e4SLinus Torvalds	tst	r5, r5
1191da177e4SLinus Torvalds	bt	9f		! if it's =0 go to 9f
1201da177e4SLinus Torvalds	mov	#2, r1
1211da177e4SLinus Torvalds	cmp/hs  r1, r5
1221da177e4SLinus Torvalds	bf	7f
1231da177e4SLinus Torvalds	mov.w	@r4+, r0
1241da177e4SLinus Torvalds	extu.w	r0, r0
1251da177e4SLinus Torvalds	cmp/eq	r1, r5
1261da177e4SLinus Torvalds	bt/s	8f
1271da177e4SLinus Torvalds	 clrt
1281da177e4SLinus Torvalds	shll16	r0
1291da177e4SLinus Torvalds	addc	r0, r6
1301da177e4SLinus Torvalds7:
1311da177e4SLinus Torvalds	mov.b	@r4+, r0
1321da177e4SLinus Torvalds	extu.b	r0, r0
1331da177e4SLinus Torvalds#ifndef	__LITTLE_ENDIAN__
1341da177e4SLinus Torvalds	shll8	r0
1351da177e4SLinus Torvalds#endif
1361da177e4SLinus Torvalds8:
1371da177e4SLinus Torvalds	addc	r0, r6
1381da177e4SLinus Torvalds	mov	#0, r0
1391da177e4SLinus Torvalds	addc	r0, r6
1401da177e4SLinus Torvalds9:
1411da177e4SLinus Torvalds	rts
1421da177e4SLinus Torvalds	 mov	r6, r0
1431da177e4SLinus Torvalds
1441da177e4SLinus Torvalds/*
145dc16c8a9SAl Virounsigned int csum_partial_copy_generic (const char *src, char *dst, int len)
1461da177e4SLinus Torvalds */
1471da177e4SLinus Torvalds
1481da177e4SLinus Torvalds/*
149dc16c8a9SAl Viro * Copy from ds while checksumming, otherwise like csum_partial with initial
150dc16c8a9SAl Viro * sum being ~0U
1511da177e4SLinus Torvalds */
1521da177e4SLinus Torvalds
153dc16c8a9SAl Viro#define EXC(...)			\
1541da177e4SLinus Torvalds	9999: __VA_ARGS__ ;		\
1551da177e4SLinus Torvalds	.section __ex_table, "a";	\
1561da177e4SLinus Torvalds	.long 9999b, 6001f	;	\
1571da177e4SLinus Torvalds	.previous
1581da177e4SLinus Torvalds
1591da177e4SLinus Torvalds!
1601da177e4SLinus Torvalds! r4:	const char *SRC
1611da177e4SLinus Torvalds! r5:	char *DST
1621da177e4SLinus Torvalds! r6:	int LEN
1631da177e4SLinus Torvalds!
1641da177e4SLinus TorvaldsENTRY(csum_partial_copy_generic)
165dc16c8a9SAl Viro	mov	#-1,r7
1661da177e4SLinus Torvalds	mov	#3,r0		! Check src and dest are equally aligned
1671da177e4SLinus Torvalds	mov	r4,r1
1681da177e4SLinus Torvalds	and	r0,r1
1691da177e4SLinus Torvalds	and	r5,r0
1701da177e4SLinus Torvalds	cmp/eq	r1,r0
1711da177e4SLinus Torvalds	bf	3f		! Different alignments, use slow version
1721da177e4SLinus Torvalds	tst	#1,r0		! Check dest word aligned
1731da177e4SLinus Torvalds	bf	3f		! If not, do it the slow way
1741da177e4SLinus Torvalds
1751da177e4SLinus Torvalds	mov	#2,r0
1761da177e4SLinus Torvalds	tst	r0,r5		! Check dest alignment.
1771da177e4SLinus Torvalds	bt	2f		! Jump if alignment is ok.
1781da177e4SLinus Torvalds	add	#-2,r6		! Alignment uses up two bytes.
1791da177e4SLinus Torvalds	cmp/pz	r6		! Jump if we had at least two bytes.
1801da177e4SLinus Torvalds	bt/s	1f
1811da177e4SLinus Torvalds	 clrt
1821da177e4SLinus Torvalds	add	#2,r6		! r6 was < 2.	Deal with it.
18324ab54cbSOllie Wild	bra	4f
18424ab54cbSOllie Wild	 mov	r6,r2
1851da177e4SLinus Torvalds
1861da177e4SLinus Torvalds3:	! Handle different src and dest alignments.
1871da177e4SLinus Torvalds	! This is not common, so simple byte by byte copy will do.
1881da177e4SLinus Torvalds	mov	r6,r2
1891da177e4SLinus Torvalds	shlr	r6
1901da177e4SLinus Torvalds	tst	r6,r6
1911da177e4SLinus Torvalds	bt	4f
1921da177e4SLinus Torvalds	clrt
1931da177e4SLinus Torvalds	.align	2
1941da177e4SLinus Torvalds5:
195dc16c8a9SAl ViroEXC(	mov.b	@r4+,r1 	)
196dc16c8a9SAl ViroEXC(	mov.b	@r4+,r0		)
1971da177e4SLinus Torvalds	extu.b	r1,r1
198dc16c8a9SAl ViroEXC(	mov.b	r1,@r5		)
199dc16c8a9SAl ViroEXC(	mov.b	r0,@(1,r5)	)
2001da177e4SLinus Torvalds	extu.b	r0,r0
2011da177e4SLinus Torvalds	add	#2,r5
2021da177e4SLinus Torvalds
2031da177e4SLinus Torvalds#ifdef	__LITTLE_ENDIAN__
2041da177e4SLinus Torvalds	shll8	r0
2051da177e4SLinus Torvalds#else
2061da177e4SLinus Torvalds	shll8	r1
2071da177e4SLinus Torvalds#endif
2081da177e4SLinus Torvalds	or	r1,r0
2091da177e4SLinus Torvalds
2101da177e4SLinus Torvalds	addc	r0,r7
2111da177e4SLinus Torvalds	movt	r0
2121da177e4SLinus Torvalds	dt	r6
2131da177e4SLinus Torvalds	bf/s	5b
2141da177e4SLinus Torvalds	 cmp/eq	#1,r0
2151da177e4SLinus Torvalds	mov	#0,r0
2161da177e4SLinus Torvalds	addc	r0, r7
2171da177e4SLinus Torvalds
2181da177e4SLinus Torvalds	mov	r2, r0
2191da177e4SLinus Torvalds	tst	#1, r0
2201da177e4SLinus Torvalds	bt	7f
2211da177e4SLinus Torvalds	bra	5f
2221da177e4SLinus Torvalds	 clrt
2231da177e4SLinus Torvalds
2241da177e4SLinus Torvalds	! src and dest equally aligned, but to a two byte boundary.
2251da177e4SLinus Torvalds	! Handle first two bytes as a special case
2261da177e4SLinus Torvalds	.align	2
2271da177e4SLinus Torvalds1:
228dc16c8a9SAl ViroEXC(	mov.w	@r4+,r0		)
229dc16c8a9SAl ViroEXC(	mov.w	r0,@r5		)
2301da177e4SLinus Torvalds	add	#2,r5
2311da177e4SLinus Torvalds	extu.w	r0,r0
2321da177e4SLinus Torvalds	addc	r0,r7
2331da177e4SLinus Torvalds	mov	#0,r0
2341da177e4SLinus Torvalds	addc	r0,r7
2351da177e4SLinus Torvalds2:
2361da177e4SLinus Torvalds	mov	r6,r2
2371da177e4SLinus Torvalds	mov	#-5,r0
2381da177e4SLinus Torvalds	shld	r0,r6
2391da177e4SLinus Torvalds	tst	r6,r6
2401da177e4SLinus Torvalds	bt/s	2f
2411da177e4SLinus Torvalds	 clrt
2421da177e4SLinus Torvalds	.align	2
2431da177e4SLinus Torvalds1:
244dc16c8a9SAl ViroEXC(	mov.l	@r4+,r0		)
245dc16c8a9SAl ViroEXC(	mov.l	@r4+,r1		)
2461da177e4SLinus Torvalds	addc	r0,r7
247dc16c8a9SAl ViroEXC(	mov.l	r0,@r5		)
248dc16c8a9SAl ViroEXC(	mov.l	r1,@(4,r5)	)
2491da177e4SLinus Torvalds	addc	r1,r7
2501da177e4SLinus Torvalds
251dc16c8a9SAl ViroEXC(	mov.l	@r4+,r0		)
252dc16c8a9SAl ViroEXC(	mov.l	@r4+,r1		)
2531da177e4SLinus Torvalds	addc	r0,r7
254dc16c8a9SAl ViroEXC(	mov.l	r0,@(8,r5)	)
255dc16c8a9SAl ViroEXC(	mov.l	r1,@(12,r5)	)
2561da177e4SLinus Torvalds	addc	r1,r7
2571da177e4SLinus Torvalds
258dc16c8a9SAl ViroEXC(	mov.l	@r4+,r0 	)
259dc16c8a9SAl ViroEXC(	mov.l	@r4+,r1		)
2601da177e4SLinus Torvalds	addc	r0,r7
261dc16c8a9SAl ViroEXC(	mov.l	r0,@(16,r5)	)
262dc16c8a9SAl ViroEXC(	mov.l	r1,@(20,r5)	)
2631da177e4SLinus Torvalds	addc	r1,r7
2641da177e4SLinus Torvalds
265dc16c8a9SAl ViroEXC(	mov.l	@r4+,r0		)
266dc16c8a9SAl ViroEXC(	mov.l	@r4+,r1		)
2671da177e4SLinus Torvalds	addc	r0,r7
268dc16c8a9SAl ViroEXC(	mov.l	r0,@(24,r5)	)
269dc16c8a9SAl ViroEXC(	mov.l	r1,@(28,r5)	)
2701da177e4SLinus Torvalds	addc	r1,r7
2711da177e4SLinus Torvalds	add	#32,r5
2721da177e4SLinus Torvalds	movt	r0
2731da177e4SLinus Torvalds	dt	r6
2741da177e4SLinus Torvalds	bf/s	1b
2751da177e4SLinus Torvalds	 cmp/eq	#1,r0
2761da177e4SLinus Torvalds	mov	#0,r0
2771da177e4SLinus Torvalds	addc	r0,r7
2781da177e4SLinus Torvalds
2791da177e4SLinus Torvalds2:	mov	r2,r6
2801da177e4SLinus Torvalds	mov	#0x1c,r0
2811da177e4SLinus Torvalds	and	r0,r6
2821da177e4SLinus Torvalds	cmp/pl	r6
2831da177e4SLinus Torvalds	bf/s	4f
2841da177e4SLinus Torvalds	 clrt
2851da177e4SLinus Torvalds	shlr2	r6
2861da177e4SLinus Torvalds3:
287dc16c8a9SAl ViroEXC(	mov.l	@r4+,r0	)
2881da177e4SLinus Torvalds	addc	r0,r7
289dc16c8a9SAl ViroEXC(	mov.l	r0,@r5	)
2901da177e4SLinus Torvalds	add	#4,r5
2911da177e4SLinus Torvalds	movt	r0
2921da177e4SLinus Torvalds	dt	r6
2931da177e4SLinus Torvalds	bf/s	3b
2941da177e4SLinus Torvalds	 cmp/eq	#1,r0
2951da177e4SLinus Torvalds	mov	#0,r0
2961da177e4SLinus Torvalds	addc	r0,r7
2971da177e4SLinus Torvalds4:	mov	r2,r6
2981da177e4SLinus Torvalds	mov	#3,r0
2991da177e4SLinus Torvalds	and	r0,r6
3001da177e4SLinus Torvalds	cmp/pl	r6
3011da177e4SLinus Torvalds	bf	7f
3021da177e4SLinus Torvalds	mov	#2,r1
3031da177e4SLinus Torvalds	cmp/hs	r1,r6
3041da177e4SLinus Torvalds	bf	5f
305dc16c8a9SAl ViroEXC(	mov.w	@r4+,r0	)
306dc16c8a9SAl ViroEXC(	mov.w	r0,@r5	)
3071da177e4SLinus Torvalds	extu.w	r0,r0
3081da177e4SLinus Torvalds	add	#2,r5
3091da177e4SLinus Torvalds	cmp/eq	r1,r6
3101da177e4SLinus Torvalds	bt/s	6f
3111da177e4SLinus Torvalds	 clrt
3121da177e4SLinus Torvalds	shll16	r0
3131da177e4SLinus Torvalds	addc	r0,r7
3141da177e4SLinus Torvalds5:
315dc16c8a9SAl ViroEXC(	mov.b	@r4+,r0	)
316dc16c8a9SAl ViroEXC(	mov.b	r0,@r5	)
3171da177e4SLinus Torvalds	extu.b	r0,r0
3181da177e4SLinus Torvalds#ifndef	__LITTLE_ENDIAN__
3191da177e4SLinus Torvalds	shll8	r0
3201da177e4SLinus Torvalds#endif
3211da177e4SLinus Torvalds6:	addc	r0,r7
3221da177e4SLinus Torvalds	mov	#0,r0
3231da177e4SLinus Torvalds	addc	r0,r7
3241da177e4SLinus Torvalds7:
3251da177e4SLinus Torvalds
3261da177e4SLinus Torvalds# Exception handler:
3271da177e4SLinus Torvalds.section .fixup, "ax"
3281da177e4SLinus Torvalds
3291da177e4SLinus Torvalds6001:
330dc16c8a9SAl Viro	rts
331dc16c8a9SAl Viro	 mov	#0,r0
3321da177e4SLinus Torvalds.previous
3331da177e4SLinus Torvalds	rts
3341da177e4SLinus Torvalds	 mov	r7,r0
335