xref: /openbmc/linux/arch/powerpc/lib/checksum_32.S (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
12874c5fdSThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-or-later */
270d64ceaSPaul Mackerras/*
370d64ceaSPaul Mackerras * This file contains assembly-language implementations
470d64ceaSPaul Mackerras * of IP-style 1's complement checksum routines.
570d64ceaSPaul Mackerras *
670d64ceaSPaul Mackerras *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
770d64ceaSPaul Mackerras *
870d64ceaSPaul Mackerras * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
970d64ceaSPaul Mackerras */
1070d64ceaSPaul Mackerras
11*39326182SMasahiro Yamada#include <linux/export.h>
1270d64ceaSPaul Mackerras#include <linux/sys.h>
1370d64ceaSPaul Mackerras#include <asm/processor.h>
147aef4136SChristophe Leroy#include <asm/cache.h>
1570d64ceaSPaul Mackerras#include <asm/errno.h>
1670d64ceaSPaul Mackerras#include <asm/ppc_asm.h>
1770d64ceaSPaul Mackerras
1870d64ceaSPaul Mackerras	.text
1970d64ceaSPaul Mackerras
2070d64ceaSPaul Mackerras/*
2170d64ceaSPaul Mackerras * computes the checksum of a memory block at buff, length len,
2270d64ceaSPaul Mackerras * and adds in "sum" (32-bit)
2370d64ceaSPaul Mackerras *
247e393220SChristophe Leroy * __csum_partial(buff, len, sum)
2570d64ceaSPaul Mackerras */
267e393220SChristophe Leroy_GLOBAL(__csum_partial)
2770d64ceaSPaul Mackerras	subi	r3,r3,4
2848821a34SChristophe Leroy	srawi.	r6,r4,2		/* Divide len by 4 and also clear carry */
2970d64ceaSPaul Mackerras	beq	3f		/* if we're doing < 4 bytes */
3048821a34SChristophe Leroy	andi.	r0,r3,2		/* Align buffer to longword boundary */
3170d64ceaSPaul Mackerras	beq+	1f
3248821a34SChristophe Leroy	lhz	r0,4(r3)	/* do 2 bytes to get aligned */
3370d64ceaSPaul Mackerras	subi	r4,r4,2
3448821a34SChristophe Leroy	addi	r3,r3,2
3570d64ceaSPaul Mackerras	srwi.	r6,r4,2		/* # words to do */
3648821a34SChristophe Leroy	adde	r5,r5,r0
3770d64ceaSPaul Mackerras	beq	3f
38f867d556SChristophe Leroy1:	andi.	r6,r6,3		/* Prepare to handle words 4 by 4 */
39f867d556SChristophe Leroy	beq	21f
40f867d556SChristophe Leroy	mtctr	r6
4148821a34SChristophe Leroy2:	lwzu	r0,4(r3)
4248821a34SChristophe Leroy	adde	r5,r5,r0
4370d64ceaSPaul Mackerras	bdnz	2b
44f867d556SChristophe Leroy21:	srwi.	r6,r4,4		/* # blocks of 4 words to do */
45f867d556SChristophe Leroy	beq	3f
46373e098eSChristophe Leroy	lwz	r0,4(r3)
47f867d556SChristophe Leroy	mtctr	r6
48f867d556SChristophe Leroy	lwz	r6,8(r3)
49f867d556SChristophe Leroy	adde	r5,r5,r0
50373e098eSChristophe Leroy	lwz	r7,12(r3)
51f867d556SChristophe Leroy	adde	r5,r5,r6
52373e098eSChristophe Leroy	lwzu	r8,16(r3)
53f867d556SChristophe Leroy	adde	r5,r5,r7
54373e098eSChristophe Leroy	bdz	23f
55373e098eSChristophe Leroy22:	lwz	r0,4(r3)
56f867d556SChristophe Leroy	adde	r5,r5,r8
57373e098eSChristophe Leroy	lwz	r6,8(r3)
58373e098eSChristophe Leroy	adde	r5,r5,r0
59373e098eSChristophe Leroy	lwz	r7,12(r3)
60373e098eSChristophe Leroy	adde	r5,r5,r6
61373e098eSChristophe Leroy	lwzu	r8,16(r3)
62373e098eSChristophe Leroy	adde	r5,r5,r7
63f867d556SChristophe Leroy	bdnz	22b
64373e098eSChristophe Leroy23:	adde	r5,r5,r8
6548821a34SChristophe Leroy3:	andi.	r0,r4,2
6648821a34SChristophe Leroy	beq+	4f
6748821a34SChristophe Leroy	lhz	r0,4(r3)
6870d64ceaSPaul Mackerras	addi	r3,r3,2
6948821a34SChristophe Leroy	adde	r5,r5,r0
7048821a34SChristophe Leroy4:	andi.	r0,r4,1
7148821a34SChristophe Leroy	beq+	5f
7248821a34SChristophe Leroy	lbz	r0,4(r3)
7348821a34SChristophe Leroy	slwi	r0,r0,8		/* Upper byte of word */
7448821a34SChristophe Leroy	adde	r5,r5,r0
7548821a34SChristophe Leroy5:	addze	r3,r5		/* add in final carry */
7670d64ceaSPaul Mackerras	blr
779445aa1aSAl ViroEXPORT_SYMBOL(__csum_partial)
7870d64ceaSPaul Mackerras
7970d64ceaSPaul Mackerras/*
8070d64ceaSPaul Mackerras * Computes the checksum of a memory block at src, length len,
8170d65cd5SAl Viro * and adds in 0xffffffff, while copying the block to dst.
8270d65cd5SAl Viro * If an access exception occurs it returns zero.
8370d64ceaSPaul Mackerras *
8470d65cd5SAl Viro * csum_partial_copy_generic(src, dst, len)
8570d64ceaSPaul Mackerras */
867aef4136SChristophe Leroy#define CSUM_COPY_16_BYTES_WITHEX(n)	\
877aef4136SChristophe Leroy8 ## n ## 0:			\
887aef4136SChristophe Leroy	lwz	r7,4(r4);	\
897aef4136SChristophe Leroy8 ## n ## 1:			\
907aef4136SChristophe Leroy	lwz	r8,8(r4);	\
917aef4136SChristophe Leroy8 ## n ## 2:			\
927aef4136SChristophe Leroy	lwz	r9,12(r4);	\
937aef4136SChristophe Leroy8 ## n ## 3:			\
947aef4136SChristophe Leroy	lwzu	r10,16(r4);	\
957aef4136SChristophe Leroy8 ## n ## 4:			\
967aef4136SChristophe Leroy	stw	r7,4(r6);	\
977aef4136SChristophe Leroy	adde	r12,r12,r7;	\
987aef4136SChristophe Leroy8 ## n ## 5:			\
997aef4136SChristophe Leroy	stw	r8,8(r6);	\
1007aef4136SChristophe Leroy	adde	r12,r12,r8;	\
1017aef4136SChristophe Leroy8 ## n ## 6:			\
1027aef4136SChristophe Leroy	stw	r9,12(r6);	\
1037aef4136SChristophe Leroy	adde	r12,r12,r9;	\
1047aef4136SChristophe Leroy8 ## n ## 7:			\
1057aef4136SChristophe Leroy	stwu	r10,16(r6);	\
1067aef4136SChristophe Leroy	adde	r12,r12,r10
1077aef4136SChristophe Leroy
1087aef4136SChristophe Leroy#define CSUM_COPY_16_BYTES_EXCODE(n)		\
10970d65cd5SAl Viro	EX_TABLE(8 ## n ## 0b, fault);	\
11070d65cd5SAl Viro	EX_TABLE(8 ## n ## 1b, fault);	\
11170d65cd5SAl Viro	EX_TABLE(8 ## n ## 2b, fault);	\
11270d65cd5SAl Viro	EX_TABLE(8 ## n ## 3b, fault);	\
11370d65cd5SAl Viro	EX_TABLE(8 ## n ## 4b, fault);	\
11470d65cd5SAl Viro	EX_TABLE(8 ## n ## 5b, fault);	\
11570d65cd5SAl Viro	EX_TABLE(8 ## n ## 6b, fault);	\
11670d65cd5SAl Viro	EX_TABLE(8 ## n ## 7b, fault);
1177aef4136SChristophe Leroy
1187aef4136SChristophe Leroy	.text
1197aef4136SChristophe Leroy
1207aef4136SChristophe LeroyCACHELINE_BYTES = L1_CACHE_BYTES
1217aef4136SChristophe LeroyLG_CACHELINE_BYTES = L1_CACHE_SHIFT
1227aef4136SChristophe LeroyCACHELINE_MASK = (L1_CACHE_BYTES-1)
1237aef4136SChristophe Leroy
12470d64ceaSPaul Mackerras_GLOBAL(csum_partial_copy_generic)
12570d65cd5SAl Viro	li	r12,-1
12670d65cd5SAl Viro	addic	r0,r0,0			/* clear carry */
1277aef4136SChristophe Leroy	addi	r6,r4,-4
1287aef4136SChristophe Leroy	neg	r0,r4
1297aef4136SChristophe Leroy	addi	r4,r3,-4
1307aef4136SChristophe Leroy	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
1318540571eSChristophe Leroy	crset	4*cr7+eq
1327aef4136SChristophe Leroy	beq	58f
1337aef4136SChristophe Leroy
1347aef4136SChristophe Leroy	cmplw	0,r5,r0			/* is this more than total to do? */
1357aef4136SChristophe Leroy	blt	63f			/* if not much to do */
1368540571eSChristophe Leroy	rlwinm	r7,r6,3,0x8
1378540571eSChristophe Leroy	rlwnm	r12,r12,r7,0,31	/* odd destination address: rotate one byte */
1388540571eSChristophe Leroy	cmplwi	cr7,r7,0	/* is destination address even ? */
1397aef4136SChristophe Leroy	andi.	r8,r0,3			/* get it word-aligned first */
1407aef4136SChristophe Leroy	mtctr	r8
1417aef4136SChristophe Leroy	beq+	61f
1427aef4136SChristophe Leroy	li	r3,0
1437aef4136SChristophe Leroy70:	lbz	r9,4(r4)		/* do some bytes */
1447aef4136SChristophe Leroy	addi	r4,r4,1
1457aef4136SChristophe Leroy	slwi	r3,r3,8
1467aef4136SChristophe Leroy	rlwimi	r3,r9,0,24,31
1477aef4136SChristophe Leroy71:	stb	r9,4(r6)
1487aef4136SChristophe Leroy	addi	r6,r6,1
1497aef4136SChristophe Leroy	bdnz	70b
1507aef4136SChristophe Leroy	adde	r12,r12,r3
1517aef4136SChristophe Leroy61:	subf	r5,r0,r5
1527aef4136SChristophe Leroy	srwi.	r0,r0,2
1537aef4136SChristophe Leroy	mtctr	r0
1547aef4136SChristophe Leroy	beq	58f
1557aef4136SChristophe Leroy72:	lwzu	r9,4(r4)		/* do some words */
1567aef4136SChristophe Leroy	adde	r12,r12,r9
1577aef4136SChristophe Leroy73:	stwu	r9,4(r6)
1587aef4136SChristophe Leroy	bdnz	72b
1597aef4136SChristophe Leroy
1607aef4136SChristophe Leroy58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
1617aef4136SChristophe Leroy	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
1627aef4136SChristophe Leroy	li	r11,4
1637aef4136SChristophe Leroy	beq	63f
1647aef4136SChristophe Leroy
1657aef4136SChristophe Leroy	/* Here we decide how far ahead to prefetch the source */
1667aef4136SChristophe Leroy	li	r3,4
1677aef4136SChristophe Leroy	cmpwi	r0,1
1687aef4136SChristophe Leroy	li	r7,0
1697aef4136SChristophe Leroy	ble	114f
1707aef4136SChristophe Leroy	li	r7,1
1717aef4136SChristophe Leroy#if MAX_COPY_PREFETCH > 1
1727aef4136SChristophe Leroy	/* Heuristically, for large transfers we prefetch
1737aef4136SChristophe Leroy	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
1747aef4136SChristophe Leroy	   we prefetch 1 cacheline ahead. */
1757aef4136SChristophe Leroy	cmpwi	r0,MAX_COPY_PREFETCH
1767aef4136SChristophe Leroy	ble	112f
1777aef4136SChristophe Leroy	li	r7,MAX_COPY_PREFETCH
1787aef4136SChristophe Leroy112:	mtctr	r7
1797aef4136SChristophe Leroy111:	dcbt	r3,r4
1807aef4136SChristophe Leroy	addi	r3,r3,CACHELINE_BYTES
1817aef4136SChristophe Leroy	bdnz	111b
1827aef4136SChristophe Leroy#else
1837aef4136SChristophe Leroy	dcbt	r3,r4
1847aef4136SChristophe Leroy	addi	r3,r3,CACHELINE_BYTES
1857aef4136SChristophe Leroy#endif /* MAX_COPY_PREFETCH > 1 */
1867aef4136SChristophe Leroy
1877aef4136SChristophe Leroy114:	subf	r8,r7,r0
1887aef4136SChristophe Leroy	mr	r0,r7
1897aef4136SChristophe Leroy	mtctr	r8
1907aef4136SChristophe Leroy
1917aef4136SChristophe Leroy53:	dcbt	r3,r4
1927aef4136SChristophe Leroy54:	dcbz	r11,r6
1937aef4136SChristophe Leroy/* the main body of the cacheline loop */
1947aef4136SChristophe Leroy	CSUM_COPY_16_BYTES_WITHEX(0)
1957aef4136SChristophe Leroy#if L1_CACHE_BYTES >= 32
1967aef4136SChristophe Leroy	CSUM_COPY_16_BYTES_WITHEX(1)
1977aef4136SChristophe Leroy#if L1_CACHE_BYTES >= 64
1987aef4136SChristophe Leroy	CSUM_COPY_16_BYTES_WITHEX(2)
1997aef4136SChristophe Leroy	CSUM_COPY_16_BYTES_WITHEX(3)
2007aef4136SChristophe Leroy#if L1_CACHE_BYTES >= 128
2017aef4136SChristophe Leroy	CSUM_COPY_16_BYTES_WITHEX(4)
2027aef4136SChristophe Leroy	CSUM_COPY_16_BYTES_WITHEX(5)
2037aef4136SChristophe Leroy	CSUM_COPY_16_BYTES_WITHEX(6)
2047aef4136SChristophe Leroy	CSUM_COPY_16_BYTES_WITHEX(7)
2057aef4136SChristophe Leroy#endif
2067aef4136SChristophe Leroy#endif
2077aef4136SChristophe Leroy#endif
2087aef4136SChristophe Leroy	bdnz	53b
2097aef4136SChristophe Leroy	cmpwi	r0,0
2107aef4136SChristophe Leroy	li	r3,4
2117aef4136SChristophe Leroy	li	r7,0
2127aef4136SChristophe Leroy	bne	114b
2137aef4136SChristophe Leroy
2147aef4136SChristophe Leroy63:	srwi.	r0,r5,2
2157aef4136SChristophe Leroy	mtctr	r0
2167aef4136SChristophe Leroy	beq	64f
2177aef4136SChristophe Leroy30:	lwzu	r0,4(r4)
2187aef4136SChristophe Leroy	adde	r12,r12,r0
2197aef4136SChristophe Leroy31:	stwu	r0,4(r6)
2207aef4136SChristophe Leroy	bdnz	30b
2217aef4136SChristophe Leroy
2227aef4136SChristophe Leroy64:	andi.	r0,r5,2
2237aef4136SChristophe Leroy	beq+	65f
2247aef4136SChristophe Leroy40:	lhz	r0,4(r4)
22570d64ceaSPaul Mackerras	addi	r4,r4,2
2267aef4136SChristophe Leroy41:	sth	r0,4(r6)
2277aef4136SChristophe Leroy	adde	r12,r12,r0
2287aef4136SChristophe Leroy	addi	r6,r6,2
2297aef4136SChristophe Leroy65:	andi.	r0,r5,1
2307aef4136SChristophe Leroy	beq+	66f
2317aef4136SChristophe Leroy50:	lbz	r0,4(r4)
2327aef4136SChristophe Leroy51:	stb	r0,4(r6)
2337aef4136SChristophe Leroy	slwi	r0,r0,8
2347aef4136SChristophe Leroy	adde	r12,r12,r0
2357aef4136SChristophe Leroy66:	addze	r3,r12
2367aef4136SChristophe Leroy	beqlr+	cr7
2371bc8b816SChristophe Leroy	rlwinm	r3,r3,8,0,31	/* odd destination address: rotate one byte */
23870d64ceaSPaul Mackerras	blr
23970d64ceaSPaul Mackerras
24070d65cd5SAl Virofault:
24170d65cd5SAl Viro	li	r3,0
24270d64ceaSPaul Mackerras	blr
24370d64ceaSPaul Mackerras
24470d65cd5SAl Viro	EX_TABLE(70b, fault);
24570d65cd5SAl Viro	EX_TABLE(71b, fault);
24670d65cd5SAl Viro	EX_TABLE(72b, fault);
24770d65cd5SAl Viro	EX_TABLE(73b, fault);
24870d65cd5SAl Viro	EX_TABLE(54b, fault);
2497aef4136SChristophe Leroy
2507aef4136SChristophe Leroy/*
2517aef4136SChristophe Leroy * this stuff handles faults in the cacheline loop and branches to either
25270d65cd5SAl Viro * fault (if in read part) or fault (if in write part)
2537aef4136SChristophe Leroy */
2547aef4136SChristophe Leroy	CSUM_COPY_16_BYTES_EXCODE(0)
2557aef4136SChristophe Leroy#if L1_CACHE_BYTES >= 32
2567aef4136SChristophe Leroy	CSUM_COPY_16_BYTES_EXCODE(1)
2577aef4136SChristophe Leroy#if L1_CACHE_BYTES >= 64
2587aef4136SChristophe Leroy	CSUM_COPY_16_BYTES_EXCODE(2)
2597aef4136SChristophe Leroy	CSUM_COPY_16_BYTES_EXCODE(3)
2607aef4136SChristophe Leroy#if L1_CACHE_BYTES >= 128
2617aef4136SChristophe Leroy	CSUM_COPY_16_BYTES_EXCODE(4)
2627aef4136SChristophe Leroy	CSUM_COPY_16_BYTES_EXCODE(5)
2637aef4136SChristophe Leroy	CSUM_COPY_16_BYTES_EXCODE(6)
2647aef4136SChristophe Leroy	CSUM_COPY_16_BYTES_EXCODE(7)
2657aef4136SChristophe Leroy#endif
2667aef4136SChristophe Leroy#endif
2677aef4136SChristophe Leroy#endif
2687aef4136SChristophe Leroy
26970d65cd5SAl Viro	EX_TABLE(30b, fault);
27070d65cd5SAl Viro	EX_TABLE(31b, fault);
27170d65cd5SAl Viro	EX_TABLE(40b, fault);
27270d65cd5SAl Viro	EX_TABLE(41b, fault);
27370d65cd5SAl Viro	EX_TABLE(50b, fault);
27470d65cd5SAl Viro	EX_TABLE(51b, fault);
27524bfa6a9SNicholas Piggin
2769445aa1aSAl ViroEXPORT_SYMBOL(csum_partial_copy_generic)
277e9c4943aSChristophe Leroy
278e9c4943aSChristophe Leroy/*
279e9c4943aSChristophe Leroy * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
280e9c4943aSChristophe Leroy *			   const struct in6_addr *daddr,
281e9c4943aSChristophe Leroy *			   __u32 len, __u8 proto, __wsum sum)
282e9c4943aSChristophe Leroy */
283e9c4943aSChristophe Leroy
284e9c4943aSChristophe Leroy_GLOBAL(csum_ipv6_magic)
285e9c4943aSChristophe Leroy	lwz	r8, 0(r3)
286e9c4943aSChristophe Leroy	lwz	r9, 4(r3)
287e9c4943aSChristophe Leroy	addc	r0, r7, r8
288e9c4943aSChristophe Leroy	lwz	r10, 8(r3)
289e9c4943aSChristophe Leroy	adde	r0, r0, r9
290e9c4943aSChristophe Leroy	lwz	r11, 12(r3)
291e9c4943aSChristophe Leroy	adde	r0, r0, r10
292e9c4943aSChristophe Leroy	lwz	r8, 0(r4)
293e9c4943aSChristophe Leroy	adde	r0, r0, r11
294e9c4943aSChristophe Leroy	lwz	r9, 4(r4)
295e9c4943aSChristophe Leroy	adde	r0, r0, r8
296e9c4943aSChristophe Leroy	lwz	r10, 8(r4)
297e9c4943aSChristophe Leroy	adde	r0, r0, r9
298e9c4943aSChristophe Leroy	lwz	r11, 12(r4)
299e9c4943aSChristophe Leroy	adde	r0, r0, r10
300e9c4943aSChristophe Leroy	add	r5, r5, r6	/* assumption: len + proto doesn't carry */
301e9c4943aSChristophe Leroy	adde	r0, r0, r11
302e9c4943aSChristophe Leroy	adde	r0, r0, r5
303e9c4943aSChristophe Leroy	addze	r0, r0
304e9c4943aSChristophe Leroy	rotlwi	r3, r0, 16
305e9c4943aSChristophe Leroy	add	r3, r0, r3
306e9c4943aSChristophe Leroy	not	r3, r3
307e9c4943aSChristophe Leroy	rlwinm	r3, r3, 16, 16, 31
308e9c4943aSChristophe Leroy	blr
309e9c4943aSChristophe LeroyEXPORT_SYMBOL(csum_ipv6_magic)
310