1af1a8899SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-or-later */ 21da177e4SLinus Torvalds/* 31da177e4SLinus Torvalds * arch/ia64/lib/xor.S 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * Optimized RAID-5 checksumming functions for IA-64. 61da177e4SLinus Torvalds */ 71da177e4SLinus Torvalds 8*ab03e604SMasahiro Yamada#include <linux/export.h> 91da177e4SLinus Torvalds#include <asm/asmmacro.h> 101da177e4SLinus Torvalds 111da177e4SLinus TorvaldsGLOBAL_ENTRY(xor_ia64_2) 121da177e4SLinus Torvalds .prologue 131da177e4SLinus Torvalds .fframe 0 141da177e4SLinus Torvalds .save ar.pfs, r31 151da177e4SLinus Torvalds alloc r31 = ar.pfs, 3, 0, 13, 16 161da177e4SLinus Torvalds .save ar.lc, r30 171da177e4SLinus Torvalds mov r30 = ar.lc 181da177e4SLinus Torvalds .save pr, r29 191da177e4SLinus Torvalds mov r29 = pr 201da177e4SLinus Torvalds ;; 211da177e4SLinus Torvalds .body 221da177e4SLinus Torvalds mov r8 = in1 231da177e4SLinus Torvalds mov ar.ec = 6 + 2 241da177e4SLinus Torvalds shr in0 = in0, 3 251da177e4SLinus Torvalds ;; 261da177e4SLinus Torvalds adds in0 = -1, in0 271da177e4SLinus Torvalds mov r16 = in1 281da177e4SLinus Torvalds mov r17 = in2 291da177e4SLinus Torvalds ;; 301da177e4SLinus Torvalds mov ar.lc = in0 311da177e4SLinus Torvalds mov pr.rot = 1 << 16 321da177e4SLinus Torvalds ;; 331da177e4SLinus Torvalds .rotr s1[6+1], s2[6+1], d[2] 341da177e4SLinus Torvalds .rotp p[6+2] 351da177e4SLinus Torvalds0: 361da177e4SLinus Torvalds(p[0]) ld8.nta s1[0] = [r16], 8 371da177e4SLinus Torvalds(p[0]) ld8.nta s2[0] = [r17], 8 381da177e4SLinus Torvalds(p[6]) xor d[0] = s1[6], s2[6] 391da177e4SLinus Torvalds(p[6+1])st8.nta [r8] = d[1], 8 401da177e4SLinus Torvalds nop.f 0 411da177e4SLinus Torvalds br.ctop.dptk.few 0b 421da177e4SLinus Torvalds ;; 431da177e4SLinus Torvalds mov ar.lc = r30 441da177e4SLinus Torvalds mov pr = r29, -1 451da177e4SLinus Torvalds br.ret.sptk.few rp 461da177e4SLinus TorvaldsEND(xor_ia64_2) 47e007c533SAl ViroEXPORT_SYMBOL(xor_ia64_2) 481da177e4SLinus Torvalds 491da177e4SLinus TorvaldsGLOBAL_ENTRY(xor_ia64_3) 501da177e4SLinus Torvalds .prologue 511da177e4SLinus Torvalds .fframe 0 521da177e4SLinus Torvalds .save ar.pfs, r31 531da177e4SLinus Torvalds alloc r31 = ar.pfs, 4, 0, 20, 24 541da177e4SLinus Torvalds .save ar.lc, r30 551da177e4SLinus Torvalds mov r30 = ar.lc 561da177e4SLinus Torvalds .save pr, r29 571da177e4SLinus Torvalds mov r29 = pr 581da177e4SLinus Torvalds ;; 591da177e4SLinus Torvalds .body 601da177e4SLinus Torvalds mov r8 = in1 611da177e4SLinus Torvalds mov ar.ec = 6 + 2 621da177e4SLinus Torvalds shr in0 = in0, 3 631da177e4SLinus Torvalds ;; 641da177e4SLinus Torvalds adds in0 = -1, in0 651da177e4SLinus Torvalds mov r16 = in1 661da177e4SLinus Torvalds mov r17 = in2 671da177e4SLinus Torvalds ;; 681da177e4SLinus Torvalds mov r18 = in3 691da177e4SLinus Torvalds mov ar.lc = in0 701da177e4SLinus Torvalds mov pr.rot = 1 << 16 711da177e4SLinus Torvalds ;; 721da177e4SLinus Torvalds .rotr s1[6+1], s2[6+1], s3[6+1], d[2] 731da177e4SLinus Torvalds .rotp p[6+2] 741da177e4SLinus Torvalds0: 751da177e4SLinus Torvalds(p[0]) ld8.nta s1[0] = [r16], 8 761da177e4SLinus Torvalds(p[0]) ld8.nta s2[0] = [r17], 8 771da177e4SLinus Torvalds(p[6]) xor d[0] = s1[6], s2[6] 781da177e4SLinus Torvalds ;; 791da177e4SLinus Torvalds(p[0]) ld8.nta s3[0] = [r18], 8 801da177e4SLinus Torvalds(p[6+1])st8.nta [r8] = d[1], 8 811da177e4SLinus Torvalds(p[6]) xor d[0] = d[0], s3[6] 821da177e4SLinus Torvalds br.ctop.dptk.few 0b 831da177e4SLinus Torvalds ;; 841da177e4SLinus Torvalds mov ar.lc = r30 851da177e4SLinus Torvalds mov pr = r29, -1 861da177e4SLinus Torvalds br.ret.sptk.few rp 871da177e4SLinus TorvaldsEND(xor_ia64_3) 88e007c533SAl ViroEXPORT_SYMBOL(xor_ia64_3) 891da177e4SLinus Torvalds 901da177e4SLinus TorvaldsGLOBAL_ENTRY(xor_ia64_4) 911da177e4SLinus Torvalds .prologue 921da177e4SLinus Torvalds .fframe 0 931da177e4SLinus Torvalds .save ar.pfs, r31 941da177e4SLinus Torvalds alloc r31 = ar.pfs, 5, 0, 27, 32 951da177e4SLinus Torvalds .save ar.lc, r30 961da177e4SLinus Torvalds mov r30 = ar.lc 971da177e4SLinus Torvalds .save pr, r29 981da177e4SLinus Torvalds mov r29 = pr 991da177e4SLinus Torvalds ;; 1001da177e4SLinus Torvalds .body 1011da177e4SLinus Torvalds mov r8 = in1 1021da177e4SLinus Torvalds mov ar.ec = 6 + 2 1031da177e4SLinus Torvalds shr in0 = in0, 3 1041da177e4SLinus Torvalds ;; 1051da177e4SLinus Torvalds adds in0 = -1, in0 1061da177e4SLinus Torvalds mov r16 = in1 1071da177e4SLinus Torvalds mov r17 = in2 1081da177e4SLinus Torvalds ;; 1091da177e4SLinus Torvalds mov r18 = in3 1101da177e4SLinus Torvalds mov ar.lc = in0 1111da177e4SLinus Torvalds mov pr.rot = 1 << 16 1121da177e4SLinus Torvalds mov r19 = in4 1131da177e4SLinus Torvalds ;; 1141da177e4SLinus Torvalds .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2] 1151da177e4SLinus Torvalds .rotp p[6+2] 1161da177e4SLinus Torvalds0: 1171da177e4SLinus Torvalds(p[0]) ld8.nta s1[0] = [r16], 8 1181da177e4SLinus Torvalds(p[0]) ld8.nta s2[0] = [r17], 8 1191da177e4SLinus Torvalds(p[6]) xor d[0] = s1[6], s2[6] 1201da177e4SLinus Torvalds(p[0]) ld8.nta s3[0] = [r18], 8 1211da177e4SLinus Torvalds(p[0]) ld8.nta s4[0] = [r19], 8 1221da177e4SLinus Torvalds(p[6]) xor r20 = s3[6], s4[6] 1231da177e4SLinus Torvalds ;; 1241da177e4SLinus Torvalds(p[6+1])st8.nta [r8] = d[1], 8 1251da177e4SLinus Torvalds(p[6]) xor d[0] = d[0], r20 1261da177e4SLinus Torvalds br.ctop.dptk.few 0b 1271da177e4SLinus Torvalds ;; 1281da177e4SLinus Torvalds mov ar.lc = r30 1291da177e4SLinus Torvalds mov pr = r29, -1 1301da177e4SLinus Torvalds br.ret.sptk.few rp 1311da177e4SLinus TorvaldsEND(xor_ia64_4) 132e007c533SAl ViroEXPORT_SYMBOL(xor_ia64_4) 1331da177e4SLinus Torvalds 1341da177e4SLinus TorvaldsGLOBAL_ENTRY(xor_ia64_5) 1351da177e4SLinus Torvalds .prologue 1361da177e4SLinus Torvalds .fframe 0 1371da177e4SLinus Torvalds .save ar.pfs, r31 1381da177e4SLinus Torvalds alloc r31 = ar.pfs, 6, 0, 34, 40 1391da177e4SLinus Torvalds .save ar.lc, r30 1401da177e4SLinus Torvalds mov r30 = ar.lc 1411da177e4SLinus Torvalds .save pr, r29 1421da177e4SLinus Torvalds mov r29 = pr 1431da177e4SLinus Torvalds ;; 1441da177e4SLinus Torvalds .body 1451da177e4SLinus Torvalds mov r8 = in1 1461da177e4SLinus Torvalds mov ar.ec = 6 + 2 1471da177e4SLinus Torvalds shr in0 = in0, 3 1481da177e4SLinus Torvalds ;; 1491da177e4SLinus Torvalds adds in0 = -1, in0 1501da177e4SLinus Torvalds mov r16 = in1 1511da177e4SLinus Torvalds mov r17 = in2 1521da177e4SLinus Torvalds ;; 1531da177e4SLinus Torvalds mov r18 = in3 1541da177e4SLinus Torvalds mov ar.lc = in0 1551da177e4SLinus Torvalds mov pr.rot = 1 << 16 1561da177e4SLinus Torvalds mov r19 = in4 1571da177e4SLinus Torvalds mov r20 = in5 1581da177e4SLinus Torvalds ;; 1591da177e4SLinus Torvalds .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2] 1601da177e4SLinus Torvalds .rotp p[6+2] 1611da177e4SLinus Torvalds0: 1621da177e4SLinus Torvalds(p[0]) ld8.nta s1[0] = [r16], 8 1631da177e4SLinus Torvalds(p[0]) ld8.nta s2[0] = [r17], 8 1641da177e4SLinus Torvalds(p[6]) xor d[0] = s1[6], s2[6] 1651da177e4SLinus Torvalds(p[0]) ld8.nta s3[0] = [r18], 8 1661da177e4SLinus Torvalds(p[0]) ld8.nta s4[0] = [r19], 8 1671da177e4SLinus Torvalds(p[6]) xor r21 = s3[6], s4[6] 1681da177e4SLinus Torvalds ;; 1691da177e4SLinus Torvalds(p[0]) ld8.nta s5[0] = [r20], 8 1701da177e4SLinus Torvalds(p[6+1])st8.nta [r8] = d[1], 8 1711da177e4SLinus Torvalds(p[6]) xor d[0] = d[0], r21 1721da177e4SLinus Torvalds ;; 1731da177e4SLinus Torvalds(p[6]) xor d[0] = d[0], s5[6] 1741da177e4SLinus Torvalds nop.f 0 1751da177e4SLinus Torvalds br.ctop.dptk.few 0b 1761da177e4SLinus Torvalds ;; 1771da177e4SLinus Torvalds mov ar.lc = r30 1781da177e4SLinus Torvalds mov pr = r29, -1 1791da177e4SLinus Torvalds br.ret.sptk.few rp 1801da177e4SLinus TorvaldsEND(xor_ia64_5) 181e007c533SAl ViroEXPORT_SYMBOL(xor_ia64_5) 182