xref: /openbmc/linux/arch/ia64/lib/xor.S (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
1af1a8899SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-or-later */
21da177e4SLinus Torvalds/*
31da177e4SLinus Torvalds * arch/ia64/lib/xor.S
41da177e4SLinus Torvalds *
51da177e4SLinus Torvalds * Optimized RAID-5 checksumming functions for IA-64.
61da177e4SLinus Torvalds */
71da177e4SLinus Torvalds
8*ab03e604SMasahiro Yamada#include <linux/export.h>
91da177e4SLinus Torvalds#include <asm/asmmacro.h>
101da177e4SLinus Torvalds
111da177e4SLinus TorvaldsGLOBAL_ENTRY(xor_ia64_2)
121da177e4SLinus Torvalds	.prologue
131da177e4SLinus Torvalds	.fframe 0
141da177e4SLinus Torvalds	.save ar.pfs, r31
151da177e4SLinus Torvalds	alloc r31 = ar.pfs, 3, 0, 13, 16
161da177e4SLinus Torvalds	.save ar.lc, r30
171da177e4SLinus Torvalds	mov r30 = ar.lc
181da177e4SLinus Torvalds	.save pr, r29
191da177e4SLinus Torvalds	mov r29 = pr
201da177e4SLinus Torvalds	;;
211da177e4SLinus Torvalds	.body
221da177e4SLinus Torvalds	mov r8 = in1
231da177e4SLinus Torvalds	mov ar.ec = 6 + 2
241da177e4SLinus Torvalds	shr in0 = in0, 3
251da177e4SLinus Torvalds	;;
261da177e4SLinus Torvalds	adds in0 = -1, in0
271da177e4SLinus Torvalds	mov r16 = in1
281da177e4SLinus Torvalds	mov r17 = in2
291da177e4SLinus Torvalds	;;
301da177e4SLinus Torvalds	mov ar.lc = in0
311da177e4SLinus Torvalds	mov pr.rot = 1 << 16
321da177e4SLinus Torvalds	;;
331da177e4SLinus Torvalds	.rotr s1[6+1], s2[6+1], d[2]
341da177e4SLinus Torvalds	.rotp p[6+2]
351da177e4SLinus Torvalds0:
361da177e4SLinus Torvalds(p[0])	ld8.nta s1[0] = [r16], 8
371da177e4SLinus Torvalds(p[0])	ld8.nta s2[0] = [r17], 8
381da177e4SLinus Torvalds(p[6])	xor d[0] = s1[6], s2[6]
391da177e4SLinus Torvalds(p[6+1])st8.nta [r8] = d[1], 8
401da177e4SLinus Torvalds	nop.f 0
411da177e4SLinus Torvalds	br.ctop.dptk.few 0b
421da177e4SLinus Torvalds	;;
431da177e4SLinus Torvalds	mov ar.lc = r30
441da177e4SLinus Torvalds	mov pr = r29, -1
451da177e4SLinus Torvalds	br.ret.sptk.few rp
461da177e4SLinus TorvaldsEND(xor_ia64_2)
47e007c533SAl ViroEXPORT_SYMBOL(xor_ia64_2)
481da177e4SLinus Torvalds
491da177e4SLinus TorvaldsGLOBAL_ENTRY(xor_ia64_3)
501da177e4SLinus Torvalds	.prologue
511da177e4SLinus Torvalds	.fframe 0
521da177e4SLinus Torvalds	.save ar.pfs, r31
531da177e4SLinus Torvalds	alloc r31 = ar.pfs, 4, 0, 20, 24
541da177e4SLinus Torvalds	.save ar.lc, r30
551da177e4SLinus Torvalds	mov r30 = ar.lc
561da177e4SLinus Torvalds	.save pr, r29
571da177e4SLinus Torvalds	mov r29 = pr
581da177e4SLinus Torvalds	;;
591da177e4SLinus Torvalds	.body
601da177e4SLinus Torvalds	mov r8 = in1
611da177e4SLinus Torvalds	mov ar.ec = 6 + 2
621da177e4SLinus Torvalds	shr in0 = in0, 3
631da177e4SLinus Torvalds	;;
641da177e4SLinus Torvalds	adds in0 = -1, in0
651da177e4SLinus Torvalds	mov r16 = in1
661da177e4SLinus Torvalds	mov r17 = in2
671da177e4SLinus Torvalds	;;
681da177e4SLinus Torvalds	mov r18 = in3
691da177e4SLinus Torvalds	mov ar.lc = in0
701da177e4SLinus Torvalds	mov pr.rot = 1 << 16
711da177e4SLinus Torvalds	;;
721da177e4SLinus Torvalds	.rotr s1[6+1], s2[6+1], s3[6+1], d[2]
731da177e4SLinus Torvalds	.rotp p[6+2]
741da177e4SLinus Torvalds0:
751da177e4SLinus Torvalds(p[0])	ld8.nta s1[0] = [r16], 8
761da177e4SLinus Torvalds(p[0])	ld8.nta s2[0] = [r17], 8
771da177e4SLinus Torvalds(p[6])	xor d[0] = s1[6], s2[6]
781da177e4SLinus Torvalds	;;
791da177e4SLinus Torvalds(p[0])	ld8.nta s3[0] = [r18], 8
801da177e4SLinus Torvalds(p[6+1])st8.nta [r8] = d[1], 8
811da177e4SLinus Torvalds(p[6])	xor d[0] = d[0], s3[6]
821da177e4SLinus Torvalds	br.ctop.dptk.few 0b
831da177e4SLinus Torvalds	;;
841da177e4SLinus Torvalds	mov ar.lc = r30
851da177e4SLinus Torvalds	mov pr = r29, -1
861da177e4SLinus Torvalds	br.ret.sptk.few rp
871da177e4SLinus TorvaldsEND(xor_ia64_3)
88e007c533SAl ViroEXPORT_SYMBOL(xor_ia64_3)
891da177e4SLinus Torvalds
901da177e4SLinus TorvaldsGLOBAL_ENTRY(xor_ia64_4)
911da177e4SLinus Torvalds	.prologue
921da177e4SLinus Torvalds	.fframe 0
931da177e4SLinus Torvalds	.save ar.pfs, r31
941da177e4SLinus Torvalds	alloc r31 = ar.pfs, 5, 0, 27, 32
951da177e4SLinus Torvalds	.save ar.lc, r30
961da177e4SLinus Torvalds	mov r30 = ar.lc
971da177e4SLinus Torvalds	.save pr, r29
981da177e4SLinus Torvalds	mov r29 = pr
991da177e4SLinus Torvalds	;;
1001da177e4SLinus Torvalds	.body
1011da177e4SLinus Torvalds	mov r8 = in1
1021da177e4SLinus Torvalds	mov ar.ec = 6 + 2
1031da177e4SLinus Torvalds	shr in0 = in0, 3
1041da177e4SLinus Torvalds	;;
1051da177e4SLinus Torvalds	adds in0 = -1, in0
1061da177e4SLinus Torvalds	mov r16 = in1
1071da177e4SLinus Torvalds	mov r17 = in2
1081da177e4SLinus Torvalds	;;
1091da177e4SLinus Torvalds	mov r18 = in3
1101da177e4SLinus Torvalds	mov ar.lc = in0
1111da177e4SLinus Torvalds	mov pr.rot = 1 << 16
1121da177e4SLinus Torvalds	mov r19 = in4
1131da177e4SLinus Torvalds	;;
1141da177e4SLinus Torvalds	.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
1151da177e4SLinus Torvalds	.rotp p[6+2]
1161da177e4SLinus Torvalds0:
1171da177e4SLinus Torvalds(p[0])	ld8.nta s1[0] = [r16], 8
1181da177e4SLinus Torvalds(p[0])	ld8.nta s2[0] = [r17], 8
1191da177e4SLinus Torvalds(p[6])	xor d[0] = s1[6], s2[6]
1201da177e4SLinus Torvalds(p[0])	ld8.nta s3[0] = [r18], 8
1211da177e4SLinus Torvalds(p[0])	ld8.nta s4[0] = [r19], 8
1221da177e4SLinus Torvalds(p[6])	xor r20 = s3[6], s4[6]
1231da177e4SLinus Torvalds	;;
1241da177e4SLinus Torvalds(p[6+1])st8.nta [r8] = d[1], 8
1251da177e4SLinus Torvalds(p[6])	xor d[0] = d[0], r20
1261da177e4SLinus Torvalds	br.ctop.dptk.few 0b
1271da177e4SLinus Torvalds	;;
1281da177e4SLinus Torvalds	mov ar.lc = r30
1291da177e4SLinus Torvalds	mov pr = r29, -1
1301da177e4SLinus Torvalds	br.ret.sptk.few rp
1311da177e4SLinus TorvaldsEND(xor_ia64_4)
132e007c533SAl ViroEXPORT_SYMBOL(xor_ia64_4)
1331da177e4SLinus Torvalds
1341da177e4SLinus TorvaldsGLOBAL_ENTRY(xor_ia64_5)
1351da177e4SLinus Torvalds	.prologue
1361da177e4SLinus Torvalds	.fframe 0
1371da177e4SLinus Torvalds	.save ar.pfs, r31
1381da177e4SLinus Torvalds	alloc r31 = ar.pfs, 6, 0, 34, 40
1391da177e4SLinus Torvalds	.save ar.lc, r30
1401da177e4SLinus Torvalds	mov r30 = ar.lc
1411da177e4SLinus Torvalds	.save pr, r29
1421da177e4SLinus Torvalds	mov r29 = pr
1431da177e4SLinus Torvalds	;;
1441da177e4SLinus Torvalds	.body
1451da177e4SLinus Torvalds	mov r8 = in1
1461da177e4SLinus Torvalds	mov ar.ec = 6 + 2
1471da177e4SLinus Torvalds	shr in0 = in0, 3
1481da177e4SLinus Torvalds	;;
1491da177e4SLinus Torvalds	adds in0 = -1, in0
1501da177e4SLinus Torvalds	mov r16 = in1
1511da177e4SLinus Torvalds	mov r17 = in2
1521da177e4SLinus Torvalds	;;
1531da177e4SLinus Torvalds	mov r18 = in3
1541da177e4SLinus Torvalds	mov ar.lc = in0
1551da177e4SLinus Torvalds	mov pr.rot = 1 << 16
1561da177e4SLinus Torvalds	mov r19 = in4
1571da177e4SLinus Torvalds	mov r20 = in5
1581da177e4SLinus Torvalds	;;
1591da177e4SLinus Torvalds	.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
1601da177e4SLinus Torvalds	.rotp p[6+2]
1611da177e4SLinus Torvalds0:
1621da177e4SLinus Torvalds(p[0])	ld8.nta s1[0] = [r16], 8
1631da177e4SLinus Torvalds(p[0])	ld8.nta s2[0] = [r17], 8
1641da177e4SLinus Torvalds(p[6])	xor d[0] = s1[6], s2[6]
1651da177e4SLinus Torvalds(p[0])	ld8.nta s3[0] = [r18], 8
1661da177e4SLinus Torvalds(p[0])	ld8.nta s4[0] = [r19], 8
1671da177e4SLinus Torvalds(p[6])	xor r21 = s3[6], s4[6]
1681da177e4SLinus Torvalds	;;
1691da177e4SLinus Torvalds(p[0])	ld8.nta s5[0] = [r20], 8
1701da177e4SLinus Torvalds(p[6+1])st8.nta [r8] = d[1], 8
1711da177e4SLinus Torvalds(p[6])	xor d[0] = d[0], r21
1721da177e4SLinus Torvalds	;;
1731da177e4SLinus Torvalds(p[6])	  xor d[0] = d[0], s5[6]
1741da177e4SLinus Torvalds	nop.f 0
1751da177e4SLinus Torvalds	br.ctop.dptk.few 0b
1761da177e4SLinus Torvalds	;;
1771da177e4SLinus Torvalds	mov ar.lc = r30
1781da177e4SLinus Torvalds	mov pr = r29, -1
1791da177e4SLinus Torvalds	br.ret.sptk.few rp
1801da177e4SLinus TorvaldsEND(xor_ia64_5)
181e007c533SAl ViroEXPORT_SYMBOL(xor_ia64_5)
182