1/* 2 * arch/ia64/lib/xor.S 3 * 4 * Optimized RAID-5 checksumming functions for IA-64. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2, or (at your option) 9 * any later version. 10 * 11 * You should have received a copy of the GNU General Public License 12 * (for example /usr/src/linux/COPYING); if not, write to the Free 13 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 14 */ 15 16#include <asm/asmmacro.h> 17 18GLOBAL_ENTRY(xor_ia64_2) 19 .prologue 20 .fframe 0 21 .save ar.pfs, r31 22 alloc r31 = ar.pfs, 3, 0, 13, 16 23 .save ar.lc, r30 24 mov r30 = ar.lc 25 .save pr, r29 26 mov r29 = pr 27 ;; 28 .body 29 mov r8 = in1 30 mov ar.ec = 6 + 2 31 shr in0 = in0, 3 32 ;; 33 adds in0 = -1, in0 34 mov r16 = in1 35 mov r17 = in2 36 ;; 37 mov ar.lc = in0 38 mov pr.rot = 1 << 16 39 ;; 40 .rotr s1[6+1], s2[6+1], d[2] 41 .rotp p[6+2] 420: 43(p[0]) ld8.nta s1[0] = [r16], 8 44(p[0]) ld8.nta s2[0] = [r17], 8 45(p[6]) xor d[0] = s1[6], s2[6] 46(p[6+1])st8.nta [r8] = d[1], 8 47 nop.f 0 48 br.ctop.dptk.few 0b 49 ;; 50 mov ar.lc = r30 51 mov pr = r29, -1 52 br.ret.sptk.few rp 53END(xor_ia64_2) 54 55GLOBAL_ENTRY(xor_ia64_3) 56 .prologue 57 .fframe 0 58 .save ar.pfs, r31 59 alloc r31 = ar.pfs, 4, 0, 20, 24 60 .save ar.lc, r30 61 mov r30 = ar.lc 62 .save pr, r29 63 mov r29 = pr 64 ;; 65 .body 66 mov r8 = in1 67 mov ar.ec = 6 + 2 68 shr in0 = in0, 3 69 ;; 70 adds in0 = -1, in0 71 mov r16 = in1 72 mov r17 = in2 73 ;; 74 mov r18 = in3 75 mov ar.lc = in0 76 mov pr.rot = 1 << 16 77 ;; 78 .rotr s1[6+1], s2[6+1], s3[6+1], d[2] 79 .rotp p[6+2] 800: 81(p[0]) ld8.nta s1[0] = [r16], 8 82(p[0]) ld8.nta s2[0] = [r17], 8 83(p[6]) xor d[0] = s1[6], s2[6] 84 ;; 85(p[0]) ld8.nta s3[0] = [r18], 8 86(p[6+1])st8.nta [r8] = d[1], 8 87(p[6]) xor d[0] = d[0], s3[6] 88 br.ctop.dptk.few 0b 89 ;; 90 mov ar.lc = r30 91 mov pr = r29, -1 92 br.ret.sptk.few rp 93END(xor_ia64_3) 94 95GLOBAL_ENTRY(xor_ia64_4) 96 .prologue 97 .fframe 0 98 .save ar.pfs, r31 99 alloc r31 = ar.pfs, 5, 0, 27, 32 100 .save ar.lc, r30 101 mov r30 = ar.lc 102 .save pr, r29 103 mov r29 = pr 104 ;; 105 .body 106 mov r8 = in1 107 mov ar.ec = 6 + 2 108 shr in0 = in0, 3 109 ;; 110 adds in0 = -1, in0 111 mov r16 = in1 112 mov r17 = in2 113 ;; 114 mov r18 = in3 115 mov ar.lc = in0 116 mov pr.rot = 1 << 16 117 mov r19 = in4 118 ;; 119 .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2] 120 .rotp p[6+2] 1210: 122(p[0]) ld8.nta s1[0] = [r16], 8 123(p[0]) ld8.nta s2[0] = [r17], 8 124(p[6]) xor d[0] = s1[6], s2[6] 125(p[0]) ld8.nta s3[0] = [r18], 8 126(p[0]) ld8.nta s4[0] = [r19], 8 127(p[6]) xor r20 = s3[6], s4[6] 128 ;; 129(p[6+1])st8.nta [r8] = d[1], 8 130(p[6]) xor d[0] = d[0], r20 131 br.ctop.dptk.few 0b 132 ;; 133 mov ar.lc = r30 134 mov pr = r29, -1 135 br.ret.sptk.few rp 136END(xor_ia64_4) 137 138GLOBAL_ENTRY(xor_ia64_5) 139 .prologue 140 .fframe 0 141 .save ar.pfs, r31 142 alloc r31 = ar.pfs, 6, 0, 34, 40 143 .save ar.lc, r30 144 mov r30 = ar.lc 145 .save pr, r29 146 mov r29 = pr 147 ;; 148 .body 149 mov r8 = in1 150 mov ar.ec = 6 + 2 151 shr in0 = in0, 3 152 ;; 153 adds in0 = -1, in0 154 mov r16 = in1 155 mov r17 = in2 156 ;; 157 mov r18 = in3 158 mov ar.lc = in0 159 mov pr.rot = 1 << 16 160 mov r19 = in4 161 mov r20 = in5 162 ;; 163 .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2] 164 .rotp p[6+2] 1650: 166(p[0]) ld8.nta s1[0] = [r16], 8 167(p[0]) ld8.nta s2[0] = [r17], 8 168(p[6]) xor d[0] = s1[6], s2[6] 169(p[0]) ld8.nta s3[0] = [r18], 8 170(p[0]) ld8.nta s4[0] = [r19], 8 171(p[6]) xor r21 = s3[6], s4[6] 172 ;; 173(p[0]) ld8.nta s5[0] = [r20], 8 174(p[6+1])st8.nta [r8] = d[1], 8 175(p[6]) xor d[0] = d[0], r21 176 ;; 177(p[6]) xor d[0] = d[0], s5[6] 178 nop.f 0 179 br.ctop.dptk.few 0b 180 ;; 181 mov ar.lc = r30 182 mov pr = r29, -1 183 br.ret.sptk.few rp 184END(xor_ia64_5) 185