xref: /openbmc/linux/arch/powerpc/lib/checksum_64.S (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
12874c5fdSThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-or-later */
270d64ceaSPaul Mackerras/*
370d64ceaSPaul Mackerras * This file contains assembly-language implementations
470d64ceaSPaul Mackerras * of IP-style 1's complement checksum routines.
570d64ceaSPaul Mackerras *
670d64ceaSPaul Mackerras *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
770d64ceaSPaul Mackerras *
870d64ceaSPaul Mackerras * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
970d64ceaSPaul Mackerras */
1070d64ceaSPaul Mackerras
11*39326182SMasahiro Yamada#include <linux/export.h>
1270d64ceaSPaul Mackerras#include <linux/sys.h>
1370d64ceaSPaul Mackerras#include <asm/processor.h>
1470d64ceaSPaul Mackerras#include <asm/errno.h>
1570d64ceaSPaul Mackerras#include <asm/ppc_asm.h>
1670d64ceaSPaul Mackerras
1770d64ceaSPaul Mackerras/*
1870d64ceaSPaul Mackerras * Computes the checksum of a memory block at buff, length len,
1970d64ceaSPaul Mackerras * and adds in "sum" (32-bit).
2070d64ceaSPaul Mackerras *
217e393220SChristophe Leroy * __csum_partial(r3=buff, r4=len, r5=sum)
2270d64ceaSPaul Mackerras */
237e393220SChristophe Leroy_GLOBAL(__csum_partial)
249b83ecb0SAnton Blanchard	addic	r0,r5,0			/* clear carry */
259b83ecb0SAnton Blanchard
269b83ecb0SAnton Blanchard	srdi.	r6,r4,3			/* less than 8 bytes? */
279b83ecb0SAnton Blanchard	beq	.Lcsum_tail_word
289b83ecb0SAnton Blanchard
299b83ecb0SAnton Blanchard	/*
309b83ecb0SAnton Blanchard	 * If only halfword aligned, align to a double word. Since odd
319b83ecb0SAnton Blanchard	 * aligned addresses should be rare and they would require more
329b83ecb0SAnton Blanchard	 * work to calculate the correct checksum, we ignore that case
339b83ecb0SAnton Blanchard	 * and take the potential slowdown of unaligned loads.
349b83ecb0SAnton Blanchard	 */
35d4fde568SPaul Mackerras	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 >> 1) & 0x3 */
369b83ecb0SAnton Blanchard	beq	.Lcsum_aligned
379b83ecb0SAnton Blanchard
389b83ecb0SAnton Blanchard	li	r7,4
399b83ecb0SAnton Blanchard	sub	r6,r7,r6
409b83ecb0SAnton Blanchard	mtctr	r6
419b83ecb0SAnton Blanchard
429b83ecb0SAnton Blanchard1:
439b83ecb0SAnton Blanchard	lhz	r6,0(r3)		/* align to doubleword */
4470d64ceaSPaul Mackerras	subi	r4,r4,2
459b83ecb0SAnton Blanchard	addi	r3,r3,2
469b83ecb0SAnton Blanchard	adde	r0,r0,r6
479b83ecb0SAnton Blanchard	bdnz	1b
489b83ecb0SAnton Blanchard
499b83ecb0SAnton Blanchard.Lcsum_aligned:
509b83ecb0SAnton Blanchard	/*
519b83ecb0SAnton Blanchard	 * We unroll the loop such that each iteration is 64 bytes with an
529b83ecb0SAnton Blanchard	 * entry and exit limb of 64 bytes, meaning a minimum size of
539b83ecb0SAnton Blanchard	 * 128 bytes.
549b83ecb0SAnton Blanchard	 */
559b83ecb0SAnton Blanchard	srdi.	r6,r4,7
569b83ecb0SAnton Blanchard	beq	.Lcsum_tail_doublewords		/* len < 128 */
579b83ecb0SAnton Blanchard
589b83ecb0SAnton Blanchard	srdi	r6,r4,6
599b83ecb0SAnton Blanchard	subi	r6,r6,1
609b83ecb0SAnton Blanchard	mtctr	r6
619b83ecb0SAnton Blanchard
629b83ecb0SAnton Blanchard	stdu	r1,-STACKFRAMESIZE(r1)
63c75df6f9SMichael Neuling	std	r14,STK_REG(R14)(r1)
64c75df6f9SMichael Neuling	std	r15,STK_REG(R15)(r1)
65c75df6f9SMichael Neuling	std	r16,STK_REG(R16)(r1)
669b83ecb0SAnton Blanchard
679b83ecb0SAnton Blanchard	ld	r6,0(r3)
689b83ecb0SAnton Blanchard	ld	r9,8(r3)
699b83ecb0SAnton Blanchard
709b83ecb0SAnton Blanchard	ld	r10,16(r3)
719b83ecb0SAnton Blanchard	ld	r11,24(r3)
729b83ecb0SAnton Blanchard
739b83ecb0SAnton Blanchard	/*
74ec5619fdSStewart Smith	 * On POWER6 and POWER7 back to back adde instructions take 2 cycles
75ec5619fdSStewart Smith	 * because of the XER dependency. This means the fastest this loop can
76ec5619fdSStewart Smith	 * go is 16 cycles per iteration. The scheduling of the loop below has
779b83ecb0SAnton Blanchard	 * been shown to hit this on both POWER6 and POWER7.
789b83ecb0SAnton Blanchard	 */
799b83ecb0SAnton Blanchard	.align 5
809b83ecb0SAnton Blanchard2:
819b83ecb0SAnton Blanchard	adde	r0,r0,r6
829b83ecb0SAnton Blanchard	ld	r12,32(r3)
839b83ecb0SAnton Blanchard	ld	r14,40(r3)
849b83ecb0SAnton Blanchard
859b83ecb0SAnton Blanchard	adde	r0,r0,r9
869b83ecb0SAnton Blanchard	ld	r15,48(r3)
879b83ecb0SAnton Blanchard	ld	r16,56(r3)
889b83ecb0SAnton Blanchard	addi	r3,r3,64
899b83ecb0SAnton Blanchard
909b83ecb0SAnton Blanchard	adde	r0,r0,r10
919b83ecb0SAnton Blanchard
929b83ecb0SAnton Blanchard	adde	r0,r0,r11
939b83ecb0SAnton Blanchard
949b83ecb0SAnton Blanchard	adde	r0,r0,r12
959b83ecb0SAnton Blanchard
969b83ecb0SAnton Blanchard	adde	r0,r0,r14
979b83ecb0SAnton Blanchard
989b83ecb0SAnton Blanchard	adde	r0,r0,r15
999b83ecb0SAnton Blanchard	ld	r6,0(r3)
1009b83ecb0SAnton Blanchard	ld	r9,8(r3)
1019b83ecb0SAnton Blanchard
1029b83ecb0SAnton Blanchard	adde	r0,r0,r16
1039b83ecb0SAnton Blanchard	ld	r10,16(r3)
1049b83ecb0SAnton Blanchard	ld	r11,24(r3)
10570d64ceaSPaul Mackerras	bdnz	2b
1069b83ecb0SAnton Blanchard
1079b83ecb0SAnton Blanchard
1089b83ecb0SAnton Blanchard	adde	r0,r0,r6
1099b83ecb0SAnton Blanchard	ld	r12,32(r3)
1109b83ecb0SAnton Blanchard	ld	r14,40(r3)
1119b83ecb0SAnton Blanchard
1129b83ecb0SAnton Blanchard	adde	r0,r0,r9
1139b83ecb0SAnton Blanchard	ld	r15,48(r3)
1149b83ecb0SAnton Blanchard	ld	r16,56(r3)
1159b83ecb0SAnton Blanchard	addi	r3,r3,64
1169b83ecb0SAnton Blanchard
1179b83ecb0SAnton Blanchard	adde	r0,r0,r10
1189b83ecb0SAnton Blanchard	adde	r0,r0,r11
1199b83ecb0SAnton Blanchard	adde	r0,r0,r12
1209b83ecb0SAnton Blanchard	adde	r0,r0,r14
1219b83ecb0SAnton Blanchard	adde	r0,r0,r15
1229b83ecb0SAnton Blanchard	adde	r0,r0,r16
1239b83ecb0SAnton Blanchard
124c75df6f9SMichael Neuling	ld	r14,STK_REG(R14)(r1)
125c75df6f9SMichael Neuling	ld	r15,STK_REG(R15)(r1)
126c75df6f9SMichael Neuling	ld	r16,STK_REG(R16)(r1)
1279b83ecb0SAnton Blanchard	addi	r1,r1,STACKFRAMESIZE
1289b83ecb0SAnton Blanchard
1299b83ecb0SAnton Blanchard	andi.	r4,r4,63
1309b83ecb0SAnton Blanchard
1319b83ecb0SAnton Blanchard.Lcsum_tail_doublewords:		/* Up to 127 bytes to go */
1329b83ecb0SAnton Blanchard	srdi.	r6,r4,3
1339b83ecb0SAnton Blanchard	beq	.Lcsum_tail_word
1349b83ecb0SAnton Blanchard
1359b83ecb0SAnton Blanchard	mtctr	r6
1369b83ecb0SAnton Blanchard3:
1379b83ecb0SAnton Blanchard	ld	r6,0(r3)
1389b83ecb0SAnton Blanchard	addi	r3,r3,8
1399b83ecb0SAnton Blanchard	adde	r0,r0,r6
1409b83ecb0SAnton Blanchard	bdnz	3b
1419b83ecb0SAnton Blanchard
1429b83ecb0SAnton Blanchard	andi.	r4,r4,7
1439b83ecb0SAnton Blanchard
1449b83ecb0SAnton Blanchard.Lcsum_tail_word:			/* Up to 7 bytes to go */
1459b83ecb0SAnton Blanchard	srdi.	r6,r4,2
1469b83ecb0SAnton Blanchard	beq	.Lcsum_tail_halfword
1479b83ecb0SAnton Blanchard
1489b83ecb0SAnton Blanchard	lwz	r6,0(r3)
14970d64ceaSPaul Mackerras	addi	r3,r3,4
1509b83ecb0SAnton Blanchard	adde	r0,r0,r6
15170d64ceaSPaul Mackerras	subi	r4,r4,4
1529b83ecb0SAnton Blanchard
1539b83ecb0SAnton Blanchard.Lcsum_tail_halfword:			/* Up to 3 bytes to go */
1549b83ecb0SAnton Blanchard	srdi.	r6,r4,1
1559b83ecb0SAnton Blanchard	beq	.Lcsum_tail_byte
1569b83ecb0SAnton Blanchard
1579b83ecb0SAnton Blanchard	lhz	r6,0(r3)
15870d64ceaSPaul Mackerras	addi	r3,r3,2
1599b83ecb0SAnton Blanchard	adde	r0,r0,r6
16070d64ceaSPaul Mackerras	subi	r4,r4,2
1619b83ecb0SAnton Blanchard
1629b83ecb0SAnton Blanchard.Lcsum_tail_byte:			/* Up to 1 byte to go */
1639b83ecb0SAnton Blanchard	andi.	r6,r4,1
1649b83ecb0SAnton Blanchard	beq	.Lcsum_finish
1659b83ecb0SAnton Blanchard
1669b83ecb0SAnton Blanchard	lbz	r6,0(r3)
167d4fde568SPaul Mackerras#ifdef __BIG_ENDIAN__
1689b83ecb0SAnton Blanchard	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
1699b83ecb0SAnton Blanchard	adde	r0,r0,r9
170d4fde568SPaul Mackerras#else
171d4fde568SPaul Mackerras	adde	r0,r0,r6
172d4fde568SPaul Mackerras#endif
1739b83ecb0SAnton Blanchard
1749b83ecb0SAnton Blanchard.Lcsum_finish:
1759b83ecb0SAnton Blanchard	addze	r0,r0			/* add in final carry */
1769b83ecb0SAnton Blanchard	rldicl	r4,r0,32,0		/* fold two 32 bit halves together */
1779b83ecb0SAnton Blanchard	add	r3,r4,r0
17870d64ceaSPaul Mackerras	srdi	r3,r3,32
17970d64ceaSPaul Mackerras	blr
1809445aa1aSAl ViroEXPORT_SYMBOL(__csum_partial)
18170d64ceaSPaul Mackerras
182fdd374b6SAnton Blanchard
1838f21bd00SPaul E. McKenney	.macro srcnr
184fdd374b6SAnton Blanchard100:
18570d65cd5SAl Viro	EX_TABLE(100b,.Lerror_nr)
1868f21bd00SPaul E. McKenney	.endm
1878f21bd00SPaul E. McKenney
1888f21bd00SPaul E. McKenney	.macro source
1898f21bd00SPaul E. McKenney150:
19070d65cd5SAl Viro	EX_TABLE(150b,.Lerror)
1918f21bd00SPaul E. McKenney	.endm
1928f21bd00SPaul E. McKenney
1938f21bd00SPaul E. McKenney	.macro dstnr
1948f21bd00SPaul E. McKenney200:
19570d65cd5SAl Viro	EX_TABLE(200b,.Lerror_nr)
196fdd374b6SAnton Blanchard	.endm
197fdd374b6SAnton Blanchard
198fdd374b6SAnton Blanchard	.macro dest
1998f21bd00SPaul E. McKenney250:
20070d65cd5SAl Viro	EX_TABLE(250b,.Lerror)
201fdd374b6SAnton Blanchard	.endm
202fdd374b6SAnton Blanchard
20370d64ceaSPaul Mackerras/*
20470d64ceaSPaul Mackerras * Computes the checksum of a memory block at src, length len,
20570d65cd5SAl Viro * and adds in 0xffffffff (32-bit), while copying the block to dst.
20670d65cd5SAl Viro * If an access exception occurs, it returns 0.
20770d64ceaSPaul Mackerras *
20870d65cd5SAl Viro * csum_partial_copy_generic(r3=src, r4=dst, r5=len)
20970d64ceaSPaul Mackerras */
21070d64ceaSPaul Mackerras_GLOBAL(csum_partial_copy_generic)
21170d65cd5SAl Viro	li	r6,-1
212fdd374b6SAnton Blanchard	addic	r0,r6,0			/* clear carry */
213fdd374b6SAnton Blanchard
214fdd374b6SAnton Blanchard	srdi.	r6,r5,3			/* less than 8 bytes? */
215fdd374b6SAnton Blanchard	beq	.Lcopy_tail_word
216fdd374b6SAnton Blanchard
217fdd374b6SAnton Blanchard	/*
218fdd374b6SAnton Blanchard	 * If only halfword aligned, align to a double word. Since odd
219fdd374b6SAnton Blanchard	 * aligned addresses should be rare and they would require more
220fdd374b6SAnton Blanchard	 * work to calculate the correct checksum, we ignore that case
221fdd374b6SAnton Blanchard	 * and take the potential slowdown of unaligned loads.
222fdd374b6SAnton Blanchard	 *
223fdd374b6SAnton Blanchard	 * If the source and destination are relatively unaligned we only
224fdd374b6SAnton Blanchard	 * align the source. This keeps things simple.
225fdd374b6SAnton Blanchard	 */
226d4fde568SPaul Mackerras	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 >> 1) & 0x3 */
227fdd374b6SAnton Blanchard	beq	.Lcopy_aligned
228fdd374b6SAnton Blanchard
229d9813c36SPaul E. McKenney	li	r9,4
230d9813c36SPaul E. McKenney	sub	r6,r9,r6
231fdd374b6SAnton Blanchard	mtctr	r6
232fdd374b6SAnton Blanchard
233fdd374b6SAnton Blanchard1:
2348f21bd00SPaul E. McKenneysrcnr;	lhz	r6,0(r3)		/* align to doubleword */
23570d64ceaSPaul Mackerras	subi	r5,r5,2
23670d64ceaSPaul Mackerras	addi	r3,r3,2
237fdd374b6SAnton Blanchard	adde	r0,r0,r6
2388f21bd00SPaul E. McKenneydstnr;	sth	r6,0(r4)
23970d64ceaSPaul Mackerras	addi	r4,r4,2
240fdd374b6SAnton Blanchard	bdnz	1b
241fdd374b6SAnton Blanchard
242fdd374b6SAnton Blanchard.Lcopy_aligned:
243fdd374b6SAnton Blanchard	/*
244fdd374b6SAnton Blanchard	 * We unroll the loop such that each iteration is 64 bytes with an
245fdd374b6SAnton Blanchard	 * entry and exit limb of 64 bytes, meaning a minimum size of
246fdd374b6SAnton Blanchard	 * 128 bytes.
247fdd374b6SAnton Blanchard	 */
248fdd374b6SAnton Blanchard	srdi.	r6,r5,7
249fdd374b6SAnton Blanchard	beq	.Lcopy_tail_doublewords		/* len < 128 */
250fdd374b6SAnton Blanchard
251fdd374b6SAnton Blanchard	srdi	r6,r5,6
252fdd374b6SAnton Blanchard	subi	r6,r6,1
253fdd374b6SAnton Blanchard	mtctr	r6
254fdd374b6SAnton Blanchard
255fdd374b6SAnton Blanchard	stdu	r1,-STACKFRAMESIZE(r1)
256c75df6f9SMichael Neuling	std	r14,STK_REG(R14)(r1)
257c75df6f9SMichael Neuling	std	r15,STK_REG(R15)(r1)
258c75df6f9SMichael Neuling	std	r16,STK_REG(R16)(r1)
259fdd374b6SAnton Blanchard
260fdd374b6SAnton Blanchardsource;	ld	r6,0(r3)
261fdd374b6SAnton Blanchardsource;	ld	r9,8(r3)
262fdd374b6SAnton Blanchard
263fdd374b6SAnton Blanchardsource;	ld	r10,16(r3)
264fdd374b6SAnton Blanchardsource;	ld	r11,24(r3)
265fdd374b6SAnton Blanchard
266fdd374b6SAnton Blanchard	/*
267ec5619fdSStewart Smith	 * On POWER6 and POWER7 back to back adde instructions take 2 cycles
268ec5619fdSStewart Smith	 * because of the XER dependency. This means the fastest this loop can
269ec5619fdSStewart Smith	 * go is 16 cycles per iteration. The scheduling of the loop below has
270fdd374b6SAnton Blanchard	 * been shown to hit this on both POWER6 and POWER7.
271fdd374b6SAnton Blanchard	 */
272fdd374b6SAnton Blanchard	.align 5
273fdd374b6SAnton Blanchard2:
27470d64ceaSPaul Mackerras	adde	r0,r0,r6
275fdd374b6SAnton Blanchardsource;	ld	r12,32(r3)
276fdd374b6SAnton Blanchardsource;	ld	r14,40(r3)
277fdd374b6SAnton Blanchard
278fdd374b6SAnton Blanchard	adde	r0,r0,r9
279fdd374b6SAnton Blanchardsource;	ld	r15,48(r3)
280fdd374b6SAnton Blanchardsource;	ld	r16,56(r3)
281fdd374b6SAnton Blanchard	addi	r3,r3,64
282fdd374b6SAnton Blanchard
283fdd374b6SAnton Blanchard	adde	r0,r0,r10
284fdd374b6SAnton Blancharddest;	std	r6,0(r4)
285fdd374b6SAnton Blancharddest;	std	r9,8(r4)
286fdd374b6SAnton Blanchard
287fdd374b6SAnton Blanchard	adde	r0,r0,r11
288fdd374b6SAnton Blancharddest;	std	r10,16(r4)
289fdd374b6SAnton Blancharddest;	std	r11,24(r4)
290fdd374b6SAnton Blanchard
291fdd374b6SAnton Blanchard	adde	r0,r0,r12
292fdd374b6SAnton Blancharddest;	std	r12,32(r4)
293fdd374b6SAnton Blancharddest;	std	r14,40(r4)
294fdd374b6SAnton Blanchard
295fdd374b6SAnton Blanchard	adde	r0,r0,r14
296fdd374b6SAnton Blancharddest;	std	r15,48(r4)
297fdd374b6SAnton Blancharddest;	std	r16,56(r4)
298fdd374b6SAnton Blanchard	addi	r4,r4,64
299fdd374b6SAnton Blanchard
300fdd374b6SAnton Blanchard	adde	r0,r0,r15
301fdd374b6SAnton Blanchardsource;	ld	r6,0(r3)
302fdd374b6SAnton Blanchardsource;	ld	r9,8(r3)
303fdd374b6SAnton Blanchard
304fdd374b6SAnton Blanchard	adde	r0,r0,r16
305fdd374b6SAnton Blanchardsource;	ld	r10,16(r3)
306fdd374b6SAnton Blanchardsource;	ld	r11,24(r3)
307fdd374b6SAnton Blanchard	bdnz	2b
308fdd374b6SAnton Blanchard
309fdd374b6SAnton Blanchard
31070d64ceaSPaul Mackerras	adde	r0,r0,r6
311fdd374b6SAnton Blanchardsource;	ld	r12,32(r3)
312fdd374b6SAnton Blanchardsource;	ld	r14,40(r3)
313fdd374b6SAnton Blanchard
314fdd374b6SAnton Blanchard	adde	r0,r0,r9
315fdd374b6SAnton Blanchardsource;	ld	r15,48(r3)
316fdd374b6SAnton Blanchardsource;	ld	r16,56(r3)
317fdd374b6SAnton Blanchard	addi	r3,r3,64
318fdd374b6SAnton Blanchard
319fdd374b6SAnton Blanchard	adde	r0,r0,r10
320fdd374b6SAnton Blancharddest;	std	r6,0(r4)
321fdd374b6SAnton Blancharddest;	std	r9,8(r4)
322fdd374b6SAnton Blanchard
323fdd374b6SAnton Blanchard	adde	r0,r0,r11
324fdd374b6SAnton Blancharddest;	std	r10,16(r4)
325fdd374b6SAnton Blancharddest;	std	r11,24(r4)
326fdd374b6SAnton Blanchard
327fdd374b6SAnton Blanchard	adde	r0,r0,r12
328fdd374b6SAnton Blancharddest;	std	r12,32(r4)
329fdd374b6SAnton Blancharddest;	std	r14,40(r4)
330fdd374b6SAnton Blanchard
331fdd374b6SAnton Blanchard	adde	r0,r0,r14
332fdd374b6SAnton Blancharddest;	std	r15,48(r4)
333fdd374b6SAnton Blancharddest;	std	r16,56(r4)
334fdd374b6SAnton Blanchard	addi	r4,r4,64
335fdd374b6SAnton Blanchard
336fdd374b6SAnton Blanchard	adde	r0,r0,r15
337fdd374b6SAnton Blanchard	adde	r0,r0,r16
338fdd374b6SAnton Blanchard
339c75df6f9SMichael Neuling	ld	r14,STK_REG(R14)(r1)
340c75df6f9SMichael Neuling	ld	r15,STK_REG(R15)(r1)
341c75df6f9SMichael Neuling	ld	r16,STK_REG(R16)(r1)
342fdd374b6SAnton Blanchard	addi	r1,r1,STACKFRAMESIZE
343fdd374b6SAnton Blanchard
344fdd374b6SAnton Blanchard	andi.	r5,r5,63
345fdd374b6SAnton Blanchard
346fdd374b6SAnton Blanchard.Lcopy_tail_doublewords:		/* Up to 127 bytes to go */
347fdd374b6SAnton Blanchard	srdi.	r6,r5,3
348fdd374b6SAnton Blanchard	beq	.Lcopy_tail_word
349fdd374b6SAnton Blanchard
350fdd374b6SAnton Blanchard	mtctr	r6
351fdd374b6SAnton Blanchard3:
3528f21bd00SPaul E. McKenneysrcnr;	ld	r6,0(r3)
353fdd374b6SAnton Blanchard	addi	r3,r3,8
354fdd374b6SAnton Blanchard	adde	r0,r0,r6
3558f21bd00SPaul E. McKenneydstnr;	std	r6,0(r4)
356fdd374b6SAnton Blanchard	addi	r4,r4,8
357fdd374b6SAnton Blanchard	bdnz	3b
358fdd374b6SAnton Blanchard
359fdd374b6SAnton Blanchard	andi.	r5,r5,7
360fdd374b6SAnton Blanchard
361fdd374b6SAnton Blanchard.Lcopy_tail_word:			/* Up to 7 bytes to go */
362fdd374b6SAnton Blanchard	srdi.	r6,r5,2
363fdd374b6SAnton Blanchard	beq	.Lcopy_tail_halfword
364fdd374b6SAnton Blanchard
3658f21bd00SPaul E. McKenneysrcnr;	lwz	r6,0(r3)
366fdd374b6SAnton Blanchard	addi	r3,r3,4
367fdd374b6SAnton Blanchard	adde	r0,r0,r6
3688f21bd00SPaul E. McKenneydstnr;	stw	r6,0(r4)
369fdd374b6SAnton Blanchard	addi	r4,r4,4
370fdd374b6SAnton Blanchard	subi	r5,r5,4
371fdd374b6SAnton Blanchard
372fdd374b6SAnton Blanchard.Lcopy_tail_halfword:			/* Up to 3 bytes to go */
373fdd374b6SAnton Blanchard	srdi.	r6,r5,1
374fdd374b6SAnton Blanchard	beq	.Lcopy_tail_byte
375fdd374b6SAnton Blanchard
3768f21bd00SPaul E. McKenneysrcnr;	lhz	r6,0(r3)
377fdd374b6SAnton Blanchard	addi	r3,r3,2
378fdd374b6SAnton Blanchard	adde	r0,r0,r6
3798f21bd00SPaul E. McKenneydstnr;	sth	r6,0(r4)
380fdd374b6SAnton Blanchard	addi	r4,r4,2
381fdd374b6SAnton Blanchard	subi	r5,r5,2
382fdd374b6SAnton Blanchard
383fdd374b6SAnton Blanchard.Lcopy_tail_byte:			/* Up to 1 byte to go */
384fdd374b6SAnton Blanchard	andi.	r6,r5,1
385fdd374b6SAnton Blanchard	beq	.Lcopy_finish
386fdd374b6SAnton Blanchard
3878f21bd00SPaul E. McKenneysrcnr;	lbz	r6,0(r3)
388d4fde568SPaul Mackerras#ifdef __BIG_ENDIAN__
389fdd374b6SAnton Blanchard	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
390fdd374b6SAnton Blanchard	adde	r0,r0,r9
391d4fde568SPaul Mackerras#else
392d4fde568SPaul Mackerras	adde	r0,r0,r6
393d4fde568SPaul Mackerras#endif
3948f21bd00SPaul E. McKenneydstnr;	stb	r6,0(r4)
395fdd374b6SAnton Blanchard
396fdd374b6SAnton Blanchard.Lcopy_finish:
397fdd374b6SAnton Blanchard	addze	r0,r0			/* add in final carry */
398fdd374b6SAnton Blanchard	rldicl	r4,r0,32,0		/* fold two 32 bit halves together */
399fdd374b6SAnton Blanchard	add	r3,r4,r0
40070d64ceaSPaul Mackerras	srdi	r3,r3,32
40170d64ceaSPaul Mackerras	blr
40270d64ceaSPaul Mackerras
40370d65cd5SAl Viro.Lerror:
4048f21bd00SPaul E. McKenney	ld	r14,STK_REG(R14)(r1)
4058f21bd00SPaul E. McKenney	ld	r15,STK_REG(R15)(r1)
4068f21bd00SPaul E. McKenney	ld	r16,STK_REG(R16)(r1)
4078f21bd00SPaul E. McKenney	addi	r1,r1,STACKFRAMESIZE
40870d65cd5SAl Viro.Lerror_nr:
40970d65cd5SAl Viro	li	r3,0
41070d64ceaSPaul Mackerras	blr
41170d64ceaSPaul Mackerras
4129445aa1aSAl ViroEXPORT_SYMBOL(csum_partial_copy_generic)
413e9c4943aSChristophe Leroy
414e9c4943aSChristophe Leroy/*
415e9c4943aSChristophe Leroy * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
416e9c4943aSChristophe Leroy *			   const struct in6_addr *daddr,
417e9c4943aSChristophe Leroy *			   __u32 len, __u8 proto, __wsum sum)
418e9c4943aSChristophe Leroy */
419e9c4943aSChristophe Leroy
420e9c4943aSChristophe Leroy_GLOBAL(csum_ipv6_magic)
421e9c4943aSChristophe Leroy	ld	r8, 0(r3)
422e9c4943aSChristophe Leroy	ld	r9, 8(r3)
423e9c4943aSChristophe Leroy	add	r5, r5, r6
424e9c4943aSChristophe Leroy	addc	r0, r8, r9
425e9c4943aSChristophe Leroy	ld	r10, 0(r4)
426e9c4943aSChristophe Leroy	ld	r11, 8(r4)
42785682a7eSChristophe Leroy#ifdef CONFIG_CPU_LITTLE_ENDIAN
42885682a7eSChristophe Leroy	rotldi	r5, r5, 8
42985682a7eSChristophe Leroy#endif
430e9c4943aSChristophe Leroy	adde	r0, r0, r10
431e9c4943aSChristophe Leroy	add	r5, r5, r7
432e9c4943aSChristophe Leroy	adde	r0, r0, r11
433e9c4943aSChristophe Leroy	adde	r0, r0, r5
434e9c4943aSChristophe Leroy	addze	r0, r0
435e9c4943aSChristophe Leroy	rotldi  r3, r0, 32		/* fold two 32 bit halves together */
436e9c4943aSChristophe Leroy	add	r3, r0, r3
437e9c4943aSChristophe Leroy	srdi	r0, r3, 32
438e9c4943aSChristophe Leroy	rotlwi	r3, r0, 16		/* fold two 16 bit halves together */
439e9c4943aSChristophe Leroy	add	r3, r0, r3
440e9c4943aSChristophe Leroy	not	r3, r3
441e9c4943aSChristophe Leroy	rlwinm	r3, r3, 16, 16, 31
442e9c4943aSChristophe Leroy	blr
443e9c4943aSChristophe LeroyEXPORT_SYMBOL(csum_ipv6_magic)
444