xref: /openbmc/linux/arch/alpha/lib/csum_ipv6_magic.S (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */
21da177e4SLinus Torvalds/*
31da177e4SLinus Torvalds * arch/alpha/lib/csum_ipv6_magic.S
41da177e4SLinus Torvalds * Contributed by Richard Henderson <rth@tamu.edu>
51da177e4SLinus Torvalds *
61da177e4SLinus Torvalds * unsigned short csum_ipv6_magic(struct in6_addr *saddr,
71da177e4SLinus Torvalds *                                struct in6_addr *daddr,
81da177e4SLinus Torvalds *                                __u32 len,
91da177e4SLinus Torvalds *                                unsigned short proto,
101da177e4SLinus Torvalds *                                unsigned int csum);
1158ed2f9cSIvan Kokshaysky *
1258ed2f9cSIvan Kokshaysky * Misalignment handling (which costs 16 instructions / 8 cycles)
1358ed2f9cSIvan Kokshaysky * added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
141da177e4SLinus Torvalds */
151da177e4SLinus Torvalds
16*f3c78e94SMasahiro Yamada#include <linux/export.h>
171da177e4SLinus Torvalds	.globl csum_ipv6_magic
181da177e4SLinus Torvalds	.align 4
191da177e4SLinus Torvalds	.ent csum_ipv6_magic
201da177e4SLinus Torvalds	.frame $30,0,$26,0
211da177e4SLinus Torvaldscsum_ipv6_magic:
221da177e4SLinus Torvalds	.prologue 0
231da177e4SLinus Torvalds
2458ed2f9cSIvan Kokshaysky	ldq_u	$0,0($16)	# e0    : load src & dst addr words
251da177e4SLinus Torvalds	zapnot	$20,15,$20	# .. e1 : zero extend incoming csum
261da177e4SLinus Torvalds	extqh	$18,1,$4	# e0    : byte swap len & proto while we wait
2758ed2f9cSIvan Kokshaysky	ldq_u	$21,7($16)	# .. e1 : handle misalignment
281da177e4SLinus Torvalds
291da177e4SLinus Torvalds	extbl	$18,1,$5	# e0	:
3058ed2f9cSIvan Kokshaysky	ldq_u	$1,8($16)	# .. e1 :
311da177e4SLinus Torvalds	extbl	$18,2,$6	# e0 	:
3258ed2f9cSIvan Kokshaysky	ldq_u	$22,15($16)	# .. e1 :
331da177e4SLinus Torvalds
341da177e4SLinus Torvalds	extbl	$18,3,$18	# e0	:
3558ed2f9cSIvan Kokshaysky	ldq_u	$2,0($17)	# .. e1 :
361da177e4SLinus Torvalds	sra	$4,32,$4	# e0	:
3758ed2f9cSIvan Kokshaysky	ldq_u	$23,7($17)	# .. e1 :
3858ed2f9cSIvan Kokshaysky
3958ed2f9cSIvan Kokshaysky	extql	$0,$16,$0	# e0	:
4058ed2f9cSIvan Kokshaysky	ldq_u	$3,8($17)	# .. e1 :
4158ed2f9cSIvan Kokshaysky	extqh	$21,$16,$21	# e0	:
4258ed2f9cSIvan Kokshaysky	ldq_u	$24,15($17)	# .. e1 :
4358ed2f9cSIvan Kokshaysky
441da177e4SLinus Torvalds	sll	$5,16,$5	# e0	:
4558ed2f9cSIvan Kokshaysky	or	$0,$21,$0	# .. e1 : 1st src word complete
4658ed2f9cSIvan Kokshaysky	extql	$1,$16,$1	# e0	:
471da177e4SLinus Torvalds	addq	$20,$0,$20	# .. e1 : begin summing the words
481da177e4SLinus Torvalds
4958ed2f9cSIvan Kokshaysky	extqh	$22,$16,$22	# e0	:
501da177e4SLinus Torvalds	cmpult	$20,$0,$0	# .. e1 :
5158ed2f9cSIvan Kokshaysky	sll	$6,8,$6		# e0	:
5258ed2f9cSIvan Kokshaysky	or	$1,$22,$1	# .. e1 : 2nd src word complete
5358ed2f9cSIvan Kokshaysky
5458ed2f9cSIvan Kokshaysky	extql	$2,$17,$2	# e0	:
551da177e4SLinus Torvalds	or	$4,$18,$18	# .. e1 :
5658ed2f9cSIvan Kokshaysky	extqh	$23,$17,$23	# e0	:
571da177e4SLinus Torvalds	or	$5,$6,$5	# .. e1 :
581da177e4SLinus Torvalds
5958ed2f9cSIvan Kokshaysky	extql	$3,$17,$3	# e0	:
6058ed2f9cSIvan Kokshaysky	or	$2,$23,$2	# .. e1 : 1st dst word complete
6158ed2f9cSIvan Kokshaysky	extqh	$24,$17,$24	# e0	:
6258ed2f9cSIvan Kokshaysky	or	$18,$5,$18	# .. e1 : len complete
6358ed2f9cSIvan Kokshaysky
6458ed2f9cSIvan Kokshaysky	extwh	$19,7,$7	# e0    :
6558ed2f9cSIvan Kokshaysky	or	$3,$24,$3	# .. e1 : 2nd dst word complete
6658ed2f9cSIvan Kokshaysky	extbl	$19,1,$19	# e0    :
671da177e4SLinus Torvalds	addq	$20,$1,$20	# .. e1 :
681da177e4SLinus Torvalds
6958ed2f9cSIvan Kokshaysky	or	$19,$7,$19	# e0    :
7058ed2f9cSIvan Kokshaysky	cmpult	$20,$1,$1	# .. e1 :
7158ed2f9cSIvan Kokshaysky	sll	$19,48,$19	# e0    :
7258ed2f9cSIvan Kokshaysky	nop			# .. e0 :
7358ed2f9cSIvan Kokshaysky
7458ed2f9cSIvan Kokshaysky	sra	$19,32,$19	# e0    : proto complete
751da177e4SLinus Torvalds	addq	$20,$2,$20	# .. e1 :
761da177e4SLinus Torvalds	cmpult	$20,$2,$2	# e0    :
771da177e4SLinus Torvalds	addq	$20,$3,$20	# .. e1 :
781da177e4SLinus Torvalds
791da177e4SLinus Torvalds	cmpult	$20,$3,$3	# e0    :
801da177e4SLinus Torvalds	addq	$20,$18,$20	# .. e1 :
811da177e4SLinus Torvalds	cmpult	$20,$18,$18	# e0    :
821da177e4SLinus Torvalds	addq	$20,$19,$20	# .. e1 :
831da177e4SLinus Torvalds
841da177e4SLinus Torvalds	cmpult	$20,$19,$19	# e0    :
851da177e4SLinus Torvalds	addq	$0,$1,$0	# .. e1 : merge the carries back into the csum
861da177e4SLinus Torvalds	addq	$2,$3,$2	# e0    :
871da177e4SLinus Torvalds	addq	$18,$19,$18	# .. e1 :
881da177e4SLinus Torvalds
891da177e4SLinus Torvalds	addq	$0,$2,$0	# e0    :
901da177e4SLinus Torvalds	addq	$20,$18,$20	# .. e1 :
911da177e4SLinus Torvalds	addq	$0,$20,$0	# e0    :
921da177e4SLinus Torvalds	unop			#       :
931da177e4SLinus Torvalds
941da177e4SLinus Torvalds	extwl	$0,2,$2		# e0    : begin folding the 64-bit value
951da177e4SLinus Torvalds	zapnot	$0,3,$3		# .. e1 :
961da177e4SLinus Torvalds	extwl	$0,4,$1		# e0    :
971da177e4SLinus Torvalds	addq	$2,$3,$3	# .. e1 :
981da177e4SLinus Torvalds
991da177e4SLinus Torvalds	extwl	$0,6,$0		# e0    :
1001da177e4SLinus Torvalds	addq	$3,$1,$3	# .. e1 :
1011da177e4SLinus Torvalds	addq	$0,$3,$0	# e0    :
1021da177e4SLinus Torvalds	unop			#       :
1031da177e4SLinus Torvalds
1041da177e4SLinus Torvalds	extwl	$0,2,$1		# e0    : fold 18-bit value
1051da177e4SLinus Torvalds	zapnot	$0,3,$0		# .. e1 :
1061da177e4SLinus Torvalds	addq	$0,$1,$0	# e0    :
1071da177e4SLinus Torvalds	unop			#       :
1081da177e4SLinus Torvalds
1091da177e4SLinus Torvalds	extwl	$0,2,$1		# e0    : fold 17-bit value
1101da177e4SLinus Torvalds	zapnot	$0,3,$0		# .. e1 :
1111da177e4SLinus Torvalds	addq	$0,$1,$0	# e0    :
11258ed2f9cSIvan Kokshaysky	not	$0,$0		# .. e1 : and complement.
1131da177e4SLinus Torvalds
1141da177e4SLinus Torvalds	zapnot	$0,3,$0		# e0    :
1151da177e4SLinus Torvalds	ret			# .. e1 :
1161da177e4SLinus Torvalds
1171da177e4SLinus Torvalds	.end csum_ipv6_magic
11800fc0e0dSAl Viro	EXPORT_SYMBOL(csum_ipv6_magic)
119