xref: /openbmc/linux/include/asm-generic/xor.h (revision 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2)
1*1da177e4SLinus Torvalds /*
2*1da177e4SLinus Torvalds  * include/asm-generic/xor.h
3*1da177e4SLinus Torvalds  *
4*1da177e4SLinus Torvalds  * Generic optimized RAID-5 checksumming functions.
5*1da177e4SLinus Torvalds  *
6*1da177e4SLinus Torvalds  * This program is free software; you can redistribute it and/or modify
7*1da177e4SLinus Torvalds  * it under the terms of the GNU General Public License as published by
8*1da177e4SLinus Torvalds  * the Free Software Foundation; either version 2, or (at your option)
9*1da177e4SLinus Torvalds  * any later version.
10*1da177e4SLinus Torvalds  *
11*1da177e4SLinus Torvalds  * You should have received a copy of the GNU General Public License
12*1da177e4SLinus Torvalds  * (for example /usr/src/linux/COPYING); if not, write to the Free
13*1da177e4SLinus Torvalds  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14*1da177e4SLinus Torvalds  */
15*1da177e4SLinus Torvalds 
16*1da177e4SLinus Torvalds #include <asm/processor.h>
17*1da177e4SLinus Torvalds 
18*1da177e4SLinus Torvalds static void
19*1da177e4SLinus Torvalds xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
20*1da177e4SLinus Torvalds {
21*1da177e4SLinus Torvalds 	long lines = bytes / (sizeof (long)) / 8;
22*1da177e4SLinus Torvalds 
23*1da177e4SLinus Torvalds 	do {
24*1da177e4SLinus Torvalds 		p1[0] ^= p2[0];
25*1da177e4SLinus Torvalds 		p1[1] ^= p2[1];
26*1da177e4SLinus Torvalds 		p1[2] ^= p2[2];
27*1da177e4SLinus Torvalds 		p1[3] ^= p2[3];
28*1da177e4SLinus Torvalds 		p1[4] ^= p2[4];
29*1da177e4SLinus Torvalds 		p1[5] ^= p2[5];
30*1da177e4SLinus Torvalds 		p1[6] ^= p2[6];
31*1da177e4SLinus Torvalds 		p1[7] ^= p2[7];
32*1da177e4SLinus Torvalds 		p1 += 8;
33*1da177e4SLinus Torvalds 		p2 += 8;
34*1da177e4SLinus Torvalds 	} while (--lines > 0);
35*1da177e4SLinus Torvalds }
36*1da177e4SLinus Torvalds 
37*1da177e4SLinus Torvalds static void
38*1da177e4SLinus Torvalds xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
39*1da177e4SLinus Torvalds 	    unsigned long *p3)
40*1da177e4SLinus Torvalds {
41*1da177e4SLinus Torvalds 	long lines = bytes / (sizeof (long)) / 8;
42*1da177e4SLinus Torvalds 
43*1da177e4SLinus Torvalds 	do {
44*1da177e4SLinus Torvalds 		p1[0] ^= p2[0] ^ p3[0];
45*1da177e4SLinus Torvalds 		p1[1] ^= p2[1] ^ p3[1];
46*1da177e4SLinus Torvalds 		p1[2] ^= p2[2] ^ p3[2];
47*1da177e4SLinus Torvalds 		p1[3] ^= p2[3] ^ p3[3];
48*1da177e4SLinus Torvalds 		p1[4] ^= p2[4] ^ p3[4];
49*1da177e4SLinus Torvalds 		p1[5] ^= p2[5] ^ p3[5];
50*1da177e4SLinus Torvalds 		p1[6] ^= p2[6] ^ p3[6];
51*1da177e4SLinus Torvalds 		p1[7] ^= p2[7] ^ p3[7];
52*1da177e4SLinus Torvalds 		p1 += 8;
53*1da177e4SLinus Torvalds 		p2 += 8;
54*1da177e4SLinus Torvalds 		p3 += 8;
55*1da177e4SLinus Torvalds 	} while (--lines > 0);
56*1da177e4SLinus Torvalds }
57*1da177e4SLinus Torvalds 
58*1da177e4SLinus Torvalds static void
59*1da177e4SLinus Torvalds xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
60*1da177e4SLinus Torvalds 	    unsigned long *p3, unsigned long *p4)
61*1da177e4SLinus Torvalds {
62*1da177e4SLinus Torvalds 	long lines = bytes / (sizeof (long)) / 8;
63*1da177e4SLinus Torvalds 
64*1da177e4SLinus Torvalds 	do {
65*1da177e4SLinus Torvalds 		p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
66*1da177e4SLinus Torvalds 		p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
67*1da177e4SLinus Torvalds 		p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
68*1da177e4SLinus Torvalds 		p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
69*1da177e4SLinus Torvalds 		p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
70*1da177e4SLinus Torvalds 		p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
71*1da177e4SLinus Torvalds 		p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
72*1da177e4SLinus Torvalds 		p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
73*1da177e4SLinus Torvalds 		p1 += 8;
74*1da177e4SLinus Torvalds 		p2 += 8;
75*1da177e4SLinus Torvalds 		p3 += 8;
76*1da177e4SLinus Torvalds 		p4 += 8;
77*1da177e4SLinus Torvalds 	} while (--lines > 0);
78*1da177e4SLinus Torvalds }
79*1da177e4SLinus Torvalds 
80*1da177e4SLinus Torvalds static void
81*1da177e4SLinus Torvalds xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
82*1da177e4SLinus Torvalds 	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
83*1da177e4SLinus Torvalds {
84*1da177e4SLinus Torvalds 	long lines = bytes / (sizeof (long)) / 8;
85*1da177e4SLinus Torvalds 
86*1da177e4SLinus Torvalds 	do {
87*1da177e4SLinus Torvalds 		p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
88*1da177e4SLinus Torvalds 		p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
89*1da177e4SLinus Torvalds 		p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
90*1da177e4SLinus Torvalds 		p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
91*1da177e4SLinus Torvalds 		p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
92*1da177e4SLinus Torvalds 		p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
93*1da177e4SLinus Torvalds 		p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
94*1da177e4SLinus Torvalds 		p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
95*1da177e4SLinus Torvalds 		p1 += 8;
96*1da177e4SLinus Torvalds 		p2 += 8;
97*1da177e4SLinus Torvalds 		p3 += 8;
98*1da177e4SLinus Torvalds 		p4 += 8;
99*1da177e4SLinus Torvalds 		p5 += 8;
100*1da177e4SLinus Torvalds 	} while (--lines > 0);
101*1da177e4SLinus Torvalds }
102*1da177e4SLinus Torvalds 
103*1da177e4SLinus Torvalds static void
104*1da177e4SLinus Torvalds xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
105*1da177e4SLinus Torvalds {
106*1da177e4SLinus Torvalds 	long lines = bytes / (sizeof (long)) / 8;
107*1da177e4SLinus Torvalds 
108*1da177e4SLinus Torvalds 	do {
109*1da177e4SLinus Torvalds 		register long d0, d1, d2, d3, d4, d5, d6, d7;
110*1da177e4SLinus Torvalds 		d0 = p1[0];	/* Pull the stuff into registers	*/
111*1da177e4SLinus Torvalds 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
112*1da177e4SLinus Torvalds 		d2 = p1[2];
113*1da177e4SLinus Torvalds 		d3 = p1[3];
114*1da177e4SLinus Torvalds 		d4 = p1[4];
115*1da177e4SLinus Torvalds 		d5 = p1[5];
116*1da177e4SLinus Torvalds 		d6 = p1[6];
117*1da177e4SLinus Torvalds 		d7 = p1[7];
118*1da177e4SLinus Torvalds 		d0 ^= p2[0];
119*1da177e4SLinus Torvalds 		d1 ^= p2[1];
120*1da177e4SLinus Torvalds 		d2 ^= p2[2];
121*1da177e4SLinus Torvalds 		d3 ^= p2[3];
122*1da177e4SLinus Torvalds 		d4 ^= p2[4];
123*1da177e4SLinus Torvalds 		d5 ^= p2[5];
124*1da177e4SLinus Torvalds 		d6 ^= p2[6];
125*1da177e4SLinus Torvalds 		d7 ^= p2[7];
126*1da177e4SLinus Torvalds 		p1[0] = d0;	/* Store the result (in bursts)		*/
127*1da177e4SLinus Torvalds 		p1[1] = d1;
128*1da177e4SLinus Torvalds 		p1[2] = d2;
129*1da177e4SLinus Torvalds 		p1[3] = d3;
130*1da177e4SLinus Torvalds 		p1[4] = d4;
131*1da177e4SLinus Torvalds 		p1[5] = d5;
132*1da177e4SLinus Torvalds 		p1[6] = d6;
133*1da177e4SLinus Torvalds 		p1[7] = d7;
134*1da177e4SLinus Torvalds 		p1 += 8;
135*1da177e4SLinus Torvalds 		p2 += 8;
136*1da177e4SLinus Torvalds 	} while (--lines > 0);
137*1da177e4SLinus Torvalds }
138*1da177e4SLinus Torvalds 
139*1da177e4SLinus Torvalds static void
140*1da177e4SLinus Torvalds xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
141*1da177e4SLinus Torvalds 	    unsigned long *p3)
142*1da177e4SLinus Torvalds {
143*1da177e4SLinus Torvalds 	long lines = bytes / (sizeof (long)) / 8;
144*1da177e4SLinus Torvalds 
145*1da177e4SLinus Torvalds 	do {
146*1da177e4SLinus Torvalds 		register long d0, d1, d2, d3, d4, d5, d6, d7;
147*1da177e4SLinus Torvalds 		d0 = p1[0];	/* Pull the stuff into registers	*/
148*1da177e4SLinus Torvalds 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
149*1da177e4SLinus Torvalds 		d2 = p1[2];
150*1da177e4SLinus Torvalds 		d3 = p1[3];
151*1da177e4SLinus Torvalds 		d4 = p1[4];
152*1da177e4SLinus Torvalds 		d5 = p1[5];
153*1da177e4SLinus Torvalds 		d6 = p1[6];
154*1da177e4SLinus Torvalds 		d7 = p1[7];
155*1da177e4SLinus Torvalds 		d0 ^= p2[0];
156*1da177e4SLinus Torvalds 		d1 ^= p2[1];
157*1da177e4SLinus Torvalds 		d2 ^= p2[2];
158*1da177e4SLinus Torvalds 		d3 ^= p2[3];
159*1da177e4SLinus Torvalds 		d4 ^= p2[4];
160*1da177e4SLinus Torvalds 		d5 ^= p2[5];
161*1da177e4SLinus Torvalds 		d6 ^= p2[6];
162*1da177e4SLinus Torvalds 		d7 ^= p2[7];
163*1da177e4SLinus Torvalds 		d0 ^= p3[0];
164*1da177e4SLinus Torvalds 		d1 ^= p3[1];
165*1da177e4SLinus Torvalds 		d2 ^= p3[2];
166*1da177e4SLinus Torvalds 		d3 ^= p3[3];
167*1da177e4SLinus Torvalds 		d4 ^= p3[4];
168*1da177e4SLinus Torvalds 		d5 ^= p3[5];
169*1da177e4SLinus Torvalds 		d6 ^= p3[6];
170*1da177e4SLinus Torvalds 		d7 ^= p3[7];
171*1da177e4SLinus Torvalds 		p1[0] = d0;	/* Store the result (in bursts)		*/
172*1da177e4SLinus Torvalds 		p1[1] = d1;
173*1da177e4SLinus Torvalds 		p1[2] = d2;
174*1da177e4SLinus Torvalds 		p1[3] = d3;
175*1da177e4SLinus Torvalds 		p1[4] = d4;
176*1da177e4SLinus Torvalds 		p1[5] = d5;
177*1da177e4SLinus Torvalds 		p1[6] = d6;
178*1da177e4SLinus Torvalds 		p1[7] = d7;
179*1da177e4SLinus Torvalds 		p1 += 8;
180*1da177e4SLinus Torvalds 		p2 += 8;
181*1da177e4SLinus Torvalds 		p3 += 8;
182*1da177e4SLinus Torvalds 	} while (--lines > 0);
183*1da177e4SLinus Torvalds }
184*1da177e4SLinus Torvalds 
185*1da177e4SLinus Torvalds static void
186*1da177e4SLinus Torvalds xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
187*1da177e4SLinus Torvalds 	    unsigned long *p3, unsigned long *p4)
188*1da177e4SLinus Torvalds {
189*1da177e4SLinus Torvalds 	long lines = bytes / (sizeof (long)) / 8;
190*1da177e4SLinus Torvalds 
191*1da177e4SLinus Torvalds 	do {
192*1da177e4SLinus Torvalds 		register long d0, d1, d2, d3, d4, d5, d6, d7;
193*1da177e4SLinus Torvalds 		d0 = p1[0];	/* Pull the stuff into registers	*/
194*1da177e4SLinus Torvalds 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
195*1da177e4SLinus Torvalds 		d2 = p1[2];
196*1da177e4SLinus Torvalds 		d3 = p1[3];
197*1da177e4SLinus Torvalds 		d4 = p1[4];
198*1da177e4SLinus Torvalds 		d5 = p1[5];
199*1da177e4SLinus Torvalds 		d6 = p1[6];
200*1da177e4SLinus Torvalds 		d7 = p1[7];
201*1da177e4SLinus Torvalds 		d0 ^= p2[0];
202*1da177e4SLinus Torvalds 		d1 ^= p2[1];
203*1da177e4SLinus Torvalds 		d2 ^= p2[2];
204*1da177e4SLinus Torvalds 		d3 ^= p2[3];
205*1da177e4SLinus Torvalds 		d4 ^= p2[4];
206*1da177e4SLinus Torvalds 		d5 ^= p2[5];
207*1da177e4SLinus Torvalds 		d6 ^= p2[6];
208*1da177e4SLinus Torvalds 		d7 ^= p2[7];
209*1da177e4SLinus Torvalds 		d0 ^= p3[0];
210*1da177e4SLinus Torvalds 		d1 ^= p3[1];
211*1da177e4SLinus Torvalds 		d2 ^= p3[2];
212*1da177e4SLinus Torvalds 		d3 ^= p3[3];
213*1da177e4SLinus Torvalds 		d4 ^= p3[4];
214*1da177e4SLinus Torvalds 		d5 ^= p3[5];
215*1da177e4SLinus Torvalds 		d6 ^= p3[6];
216*1da177e4SLinus Torvalds 		d7 ^= p3[7];
217*1da177e4SLinus Torvalds 		d0 ^= p4[0];
218*1da177e4SLinus Torvalds 		d1 ^= p4[1];
219*1da177e4SLinus Torvalds 		d2 ^= p4[2];
220*1da177e4SLinus Torvalds 		d3 ^= p4[3];
221*1da177e4SLinus Torvalds 		d4 ^= p4[4];
222*1da177e4SLinus Torvalds 		d5 ^= p4[5];
223*1da177e4SLinus Torvalds 		d6 ^= p4[6];
224*1da177e4SLinus Torvalds 		d7 ^= p4[7];
225*1da177e4SLinus Torvalds 		p1[0] = d0;	/* Store the result (in bursts)		*/
226*1da177e4SLinus Torvalds 		p1[1] = d1;
227*1da177e4SLinus Torvalds 		p1[2] = d2;
228*1da177e4SLinus Torvalds 		p1[3] = d3;
229*1da177e4SLinus Torvalds 		p1[4] = d4;
230*1da177e4SLinus Torvalds 		p1[5] = d5;
231*1da177e4SLinus Torvalds 		p1[6] = d6;
232*1da177e4SLinus Torvalds 		p1[7] = d7;
233*1da177e4SLinus Torvalds 		p1 += 8;
234*1da177e4SLinus Torvalds 		p2 += 8;
235*1da177e4SLinus Torvalds 		p3 += 8;
236*1da177e4SLinus Torvalds 		p4 += 8;
237*1da177e4SLinus Torvalds 	} while (--lines > 0);
238*1da177e4SLinus Torvalds }
239*1da177e4SLinus Torvalds 
240*1da177e4SLinus Torvalds static void
241*1da177e4SLinus Torvalds xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
242*1da177e4SLinus Torvalds 	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
243*1da177e4SLinus Torvalds {
244*1da177e4SLinus Torvalds 	long lines = bytes / (sizeof (long)) / 8;
245*1da177e4SLinus Torvalds 
246*1da177e4SLinus Torvalds 	do {
247*1da177e4SLinus Torvalds 		register long d0, d1, d2, d3, d4, d5, d6, d7;
248*1da177e4SLinus Torvalds 		d0 = p1[0];	/* Pull the stuff into registers	*/
249*1da177e4SLinus Torvalds 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
250*1da177e4SLinus Torvalds 		d2 = p1[2];
251*1da177e4SLinus Torvalds 		d3 = p1[3];
252*1da177e4SLinus Torvalds 		d4 = p1[4];
253*1da177e4SLinus Torvalds 		d5 = p1[5];
254*1da177e4SLinus Torvalds 		d6 = p1[6];
255*1da177e4SLinus Torvalds 		d7 = p1[7];
256*1da177e4SLinus Torvalds 		d0 ^= p2[0];
257*1da177e4SLinus Torvalds 		d1 ^= p2[1];
258*1da177e4SLinus Torvalds 		d2 ^= p2[2];
259*1da177e4SLinus Torvalds 		d3 ^= p2[3];
260*1da177e4SLinus Torvalds 		d4 ^= p2[4];
261*1da177e4SLinus Torvalds 		d5 ^= p2[5];
262*1da177e4SLinus Torvalds 		d6 ^= p2[6];
263*1da177e4SLinus Torvalds 		d7 ^= p2[7];
264*1da177e4SLinus Torvalds 		d0 ^= p3[0];
265*1da177e4SLinus Torvalds 		d1 ^= p3[1];
266*1da177e4SLinus Torvalds 		d2 ^= p3[2];
267*1da177e4SLinus Torvalds 		d3 ^= p3[3];
268*1da177e4SLinus Torvalds 		d4 ^= p3[4];
269*1da177e4SLinus Torvalds 		d5 ^= p3[5];
270*1da177e4SLinus Torvalds 		d6 ^= p3[6];
271*1da177e4SLinus Torvalds 		d7 ^= p3[7];
272*1da177e4SLinus Torvalds 		d0 ^= p4[0];
273*1da177e4SLinus Torvalds 		d1 ^= p4[1];
274*1da177e4SLinus Torvalds 		d2 ^= p4[2];
275*1da177e4SLinus Torvalds 		d3 ^= p4[3];
276*1da177e4SLinus Torvalds 		d4 ^= p4[4];
277*1da177e4SLinus Torvalds 		d5 ^= p4[5];
278*1da177e4SLinus Torvalds 		d6 ^= p4[6];
279*1da177e4SLinus Torvalds 		d7 ^= p4[7];
280*1da177e4SLinus Torvalds 		d0 ^= p5[0];
281*1da177e4SLinus Torvalds 		d1 ^= p5[1];
282*1da177e4SLinus Torvalds 		d2 ^= p5[2];
283*1da177e4SLinus Torvalds 		d3 ^= p5[3];
284*1da177e4SLinus Torvalds 		d4 ^= p5[4];
285*1da177e4SLinus Torvalds 		d5 ^= p5[5];
286*1da177e4SLinus Torvalds 		d6 ^= p5[6];
287*1da177e4SLinus Torvalds 		d7 ^= p5[7];
288*1da177e4SLinus Torvalds 		p1[0] = d0;	/* Store the result (in bursts)		*/
289*1da177e4SLinus Torvalds 		p1[1] = d1;
290*1da177e4SLinus Torvalds 		p1[2] = d2;
291*1da177e4SLinus Torvalds 		p1[3] = d3;
292*1da177e4SLinus Torvalds 		p1[4] = d4;
293*1da177e4SLinus Torvalds 		p1[5] = d5;
294*1da177e4SLinus Torvalds 		p1[6] = d6;
295*1da177e4SLinus Torvalds 		p1[7] = d7;
296*1da177e4SLinus Torvalds 		p1 += 8;
297*1da177e4SLinus Torvalds 		p2 += 8;
298*1da177e4SLinus Torvalds 		p3 += 8;
299*1da177e4SLinus Torvalds 		p4 += 8;
300*1da177e4SLinus Torvalds 		p5 += 8;
301*1da177e4SLinus Torvalds 	} while (--lines > 0);
302*1da177e4SLinus Torvalds }
303*1da177e4SLinus Torvalds 
304*1da177e4SLinus Torvalds static void
305*1da177e4SLinus Torvalds xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
306*1da177e4SLinus Torvalds {
307*1da177e4SLinus Torvalds 	long lines = bytes / (sizeof (long)) / 8 - 1;
308*1da177e4SLinus Torvalds 	prefetchw(p1);
309*1da177e4SLinus Torvalds 	prefetch(p2);
310*1da177e4SLinus Torvalds 
311*1da177e4SLinus Torvalds 	do {
312*1da177e4SLinus Torvalds 		prefetchw(p1+8);
313*1da177e4SLinus Torvalds 		prefetch(p2+8);
314*1da177e4SLinus Torvalds  once_more:
315*1da177e4SLinus Torvalds 		p1[0] ^= p2[0];
316*1da177e4SLinus Torvalds 		p1[1] ^= p2[1];
317*1da177e4SLinus Torvalds 		p1[2] ^= p2[2];
318*1da177e4SLinus Torvalds 		p1[3] ^= p2[3];
319*1da177e4SLinus Torvalds 		p1[4] ^= p2[4];
320*1da177e4SLinus Torvalds 		p1[5] ^= p2[5];
321*1da177e4SLinus Torvalds 		p1[6] ^= p2[6];
322*1da177e4SLinus Torvalds 		p1[7] ^= p2[7];
323*1da177e4SLinus Torvalds 		p1 += 8;
324*1da177e4SLinus Torvalds 		p2 += 8;
325*1da177e4SLinus Torvalds 	} while (--lines > 0);
326*1da177e4SLinus Torvalds 	if (lines == 0)
327*1da177e4SLinus Torvalds 		goto once_more;
328*1da177e4SLinus Torvalds }
329*1da177e4SLinus Torvalds 
330*1da177e4SLinus Torvalds static void
331*1da177e4SLinus Torvalds xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
332*1da177e4SLinus Torvalds 	    unsigned long *p3)
333*1da177e4SLinus Torvalds {
334*1da177e4SLinus Torvalds 	long lines = bytes / (sizeof (long)) / 8 - 1;
335*1da177e4SLinus Torvalds 	prefetchw(p1);
336*1da177e4SLinus Torvalds 	prefetch(p2);
337*1da177e4SLinus Torvalds 	prefetch(p3);
338*1da177e4SLinus Torvalds 
339*1da177e4SLinus Torvalds 	do {
340*1da177e4SLinus Torvalds 		prefetchw(p1+8);
341*1da177e4SLinus Torvalds 		prefetch(p2+8);
342*1da177e4SLinus Torvalds 		prefetch(p3+8);
343*1da177e4SLinus Torvalds  once_more:
344*1da177e4SLinus Torvalds 		p1[0] ^= p2[0] ^ p3[0];
345*1da177e4SLinus Torvalds 		p1[1] ^= p2[1] ^ p3[1];
346*1da177e4SLinus Torvalds 		p1[2] ^= p2[2] ^ p3[2];
347*1da177e4SLinus Torvalds 		p1[3] ^= p2[3] ^ p3[3];
348*1da177e4SLinus Torvalds 		p1[4] ^= p2[4] ^ p3[4];
349*1da177e4SLinus Torvalds 		p1[5] ^= p2[5] ^ p3[5];
350*1da177e4SLinus Torvalds 		p1[6] ^= p2[6] ^ p3[6];
351*1da177e4SLinus Torvalds 		p1[7] ^= p2[7] ^ p3[7];
352*1da177e4SLinus Torvalds 		p1 += 8;
353*1da177e4SLinus Torvalds 		p2 += 8;
354*1da177e4SLinus Torvalds 		p3 += 8;
355*1da177e4SLinus Torvalds 	} while (--lines > 0);
356*1da177e4SLinus Torvalds 	if (lines == 0)
357*1da177e4SLinus Torvalds 		goto once_more;
358*1da177e4SLinus Torvalds }
359*1da177e4SLinus Torvalds 
360*1da177e4SLinus Torvalds static void
361*1da177e4SLinus Torvalds xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
362*1da177e4SLinus Torvalds 	    unsigned long *p3, unsigned long *p4)
363*1da177e4SLinus Torvalds {
364*1da177e4SLinus Torvalds 	long lines = bytes / (sizeof (long)) / 8 - 1;
365*1da177e4SLinus Torvalds 
366*1da177e4SLinus Torvalds 	prefetchw(p1);
367*1da177e4SLinus Torvalds 	prefetch(p2);
368*1da177e4SLinus Torvalds 	prefetch(p3);
369*1da177e4SLinus Torvalds 	prefetch(p4);
370*1da177e4SLinus Torvalds 
371*1da177e4SLinus Torvalds 	do {
372*1da177e4SLinus Torvalds 		prefetchw(p1+8);
373*1da177e4SLinus Torvalds 		prefetch(p2+8);
374*1da177e4SLinus Torvalds 		prefetch(p3+8);
375*1da177e4SLinus Torvalds 		prefetch(p4+8);
376*1da177e4SLinus Torvalds  once_more:
377*1da177e4SLinus Torvalds 		p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
378*1da177e4SLinus Torvalds 		p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
379*1da177e4SLinus Torvalds 		p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
380*1da177e4SLinus Torvalds 		p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
381*1da177e4SLinus Torvalds 		p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
382*1da177e4SLinus Torvalds 		p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
383*1da177e4SLinus Torvalds 		p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
384*1da177e4SLinus Torvalds 		p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
385*1da177e4SLinus Torvalds 		p1 += 8;
386*1da177e4SLinus Torvalds 		p2 += 8;
387*1da177e4SLinus Torvalds 		p3 += 8;
388*1da177e4SLinus Torvalds 		p4 += 8;
389*1da177e4SLinus Torvalds 	} while (--lines > 0);
390*1da177e4SLinus Torvalds 	if (lines == 0)
391*1da177e4SLinus Torvalds 		goto once_more;
392*1da177e4SLinus Torvalds }
393*1da177e4SLinus Torvalds 
394*1da177e4SLinus Torvalds static void
395*1da177e4SLinus Torvalds xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
396*1da177e4SLinus Torvalds 	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
397*1da177e4SLinus Torvalds {
398*1da177e4SLinus Torvalds 	long lines = bytes / (sizeof (long)) / 8 - 1;
399*1da177e4SLinus Torvalds 
400*1da177e4SLinus Torvalds 	prefetchw(p1);
401*1da177e4SLinus Torvalds 	prefetch(p2);
402*1da177e4SLinus Torvalds 	prefetch(p3);
403*1da177e4SLinus Torvalds 	prefetch(p4);
404*1da177e4SLinus Torvalds 	prefetch(p5);
405*1da177e4SLinus Torvalds 
406*1da177e4SLinus Torvalds 	do {
407*1da177e4SLinus Torvalds 		prefetchw(p1+8);
408*1da177e4SLinus Torvalds 		prefetch(p2+8);
409*1da177e4SLinus Torvalds 		prefetch(p3+8);
410*1da177e4SLinus Torvalds 		prefetch(p4+8);
411*1da177e4SLinus Torvalds 		prefetch(p5+8);
412*1da177e4SLinus Torvalds  once_more:
413*1da177e4SLinus Torvalds 		p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
414*1da177e4SLinus Torvalds 		p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
415*1da177e4SLinus Torvalds 		p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
416*1da177e4SLinus Torvalds 		p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
417*1da177e4SLinus Torvalds 		p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
418*1da177e4SLinus Torvalds 		p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
419*1da177e4SLinus Torvalds 		p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
420*1da177e4SLinus Torvalds 		p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
421*1da177e4SLinus Torvalds 		p1 += 8;
422*1da177e4SLinus Torvalds 		p2 += 8;
423*1da177e4SLinus Torvalds 		p3 += 8;
424*1da177e4SLinus Torvalds 		p4 += 8;
425*1da177e4SLinus Torvalds 		p5 += 8;
426*1da177e4SLinus Torvalds 	} while (--lines > 0);
427*1da177e4SLinus Torvalds 	if (lines == 0)
428*1da177e4SLinus Torvalds 		goto once_more;
429*1da177e4SLinus Torvalds }
430*1da177e4SLinus Torvalds 
431*1da177e4SLinus Torvalds static void
432*1da177e4SLinus Torvalds xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
433*1da177e4SLinus Torvalds {
434*1da177e4SLinus Torvalds 	long lines = bytes / (sizeof (long)) / 8 - 1;
435*1da177e4SLinus Torvalds 
436*1da177e4SLinus Torvalds 	prefetchw(p1);
437*1da177e4SLinus Torvalds 	prefetch(p2);
438*1da177e4SLinus Torvalds 
439*1da177e4SLinus Torvalds 	do {
440*1da177e4SLinus Torvalds 		register long d0, d1, d2, d3, d4, d5, d6, d7;
441*1da177e4SLinus Torvalds 
442*1da177e4SLinus Torvalds 		prefetchw(p1+8);
443*1da177e4SLinus Torvalds 		prefetch(p2+8);
444*1da177e4SLinus Torvalds  once_more:
445*1da177e4SLinus Torvalds 		d0 = p1[0];	/* Pull the stuff into registers	*/
446*1da177e4SLinus Torvalds 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
447*1da177e4SLinus Torvalds 		d2 = p1[2];
448*1da177e4SLinus Torvalds 		d3 = p1[3];
449*1da177e4SLinus Torvalds 		d4 = p1[4];
450*1da177e4SLinus Torvalds 		d5 = p1[5];
451*1da177e4SLinus Torvalds 		d6 = p1[6];
452*1da177e4SLinus Torvalds 		d7 = p1[7];
453*1da177e4SLinus Torvalds 		d0 ^= p2[0];
454*1da177e4SLinus Torvalds 		d1 ^= p2[1];
455*1da177e4SLinus Torvalds 		d2 ^= p2[2];
456*1da177e4SLinus Torvalds 		d3 ^= p2[3];
457*1da177e4SLinus Torvalds 		d4 ^= p2[4];
458*1da177e4SLinus Torvalds 		d5 ^= p2[5];
459*1da177e4SLinus Torvalds 		d6 ^= p2[6];
460*1da177e4SLinus Torvalds 		d7 ^= p2[7];
461*1da177e4SLinus Torvalds 		p1[0] = d0;	/* Store the result (in bursts)		*/
462*1da177e4SLinus Torvalds 		p1[1] = d1;
463*1da177e4SLinus Torvalds 		p1[2] = d2;
464*1da177e4SLinus Torvalds 		p1[3] = d3;
465*1da177e4SLinus Torvalds 		p1[4] = d4;
466*1da177e4SLinus Torvalds 		p1[5] = d5;
467*1da177e4SLinus Torvalds 		p1[6] = d6;
468*1da177e4SLinus Torvalds 		p1[7] = d7;
469*1da177e4SLinus Torvalds 		p1 += 8;
470*1da177e4SLinus Torvalds 		p2 += 8;
471*1da177e4SLinus Torvalds 	} while (--lines > 0);
472*1da177e4SLinus Torvalds 	if (lines == 0)
473*1da177e4SLinus Torvalds 		goto once_more;
474*1da177e4SLinus Torvalds }
475*1da177e4SLinus Torvalds 
476*1da177e4SLinus Torvalds static void
477*1da177e4SLinus Torvalds xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
478*1da177e4SLinus Torvalds 	    unsigned long *p3)
479*1da177e4SLinus Torvalds {
480*1da177e4SLinus Torvalds 	long lines = bytes / (sizeof (long)) / 8 - 1;
481*1da177e4SLinus Torvalds 
482*1da177e4SLinus Torvalds 	prefetchw(p1);
483*1da177e4SLinus Torvalds 	prefetch(p2);
484*1da177e4SLinus Torvalds 	prefetch(p3);
485*1da177e4SLinus Torvalds 
486*1da177e4SLinus Torvalds 	do {
487*1da177e4SLinus Torvalds 		register long d0, d1, d2, d3, d4, d5, d6, d7;
488*1da177e4SLinus Torvalds 
489*1da177e4SLinus Torvalds 		prefetchw(p1+8);
490*1da177e4SLinus Torvalds 		prefetch(p2+8);
491*1da177e4SLinus Torvalds 		prefetch(p3+8);
492*1da177e4SLinus Torvalds  once_more:
493*1da177e4SLinus Torvalds 		d0 = p1[0];	/* Pull the stuff into registers	*/
494*1da177e4SLinus Torvalds 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
495*1da177e4SLinus Torvalds 		d2 = p1[2];
496*1da177e4SLinus Torvalds 		d3 = p1[3];
497*1da177e4SLinus Torvalds 		d4 = p1[4];
498*1da177e4SLinus Torvalds 		d5 = p1[5];
499*1da177e4SLinus Torvalds 		d6 = p1[6];
500*1da177e4SLinus Torvalds 		d7 = p1[7];
501*1da177e4SLinus Torvalds 		d0 ^= p2[0];
502*1da177e4SLinus Torvalds 		d1 ^= p2[1];
503*1da177e4SLinus Torvalds 		d2 ^= p2[2];
504*1da177e4SLinus Torvalds 		d3 ^= p2[3];
505*1da177e4SLinus Torvalds 		d4 ^= p2[4];
506*1da177e4SLinus Torvalds 		d5 ^= p2[5];
507*1da177e4SLinus Torvalds 		d6 ^= p2[6];
508*1da177e4SLinus Torvalds 		d7 ^= p2[7];
509*1da177e4SLinus Torvalds 		d0 ^= p3[0];
510*1da177e4SLinus Torvalds 		d1 ^= p3[1];
511*1da177e4SLinus Torvalds 		d2 ^= p3[2];
512*1da177e4SLinus Torvalds 		d3 ^= p3[3];
513*1da177e4SLinus Torvalds 		d4 ^= p3[4];
514*1da177e4SLinus Torvalds 		d5 ^= p3[5];
515*1da177e4SLinus Torvalds 		d6 ^= p3[6];
516*1da177e4SLinus Torvalds 		d7 ^= p3[7];
517*1da177e4SLinus Torvalds 		p1[0] = d0;	/* Store the result (in bursts)		*/
518*1da177e4SLinus Torvalds 		p1[1] = d1;
519*1da177e4SLinus Torvalds 		p1[2] = d2;
520*1da177e4SLinus Torvalds 		p1[3] = d3;
521*1da177e4SLinus Torvalds 		p1[4] = d4;
522*1da177e4SLinus Torvalds 		p1[5] = d5;
523*1da177e4SLinus Torvalds 		p1[6] = d6;
524*1da177e4SLinus Torvalds 		p1[7] = d7;
525*1da177e4SLinus Torvalds 		p1 += 8;
526*1da177e4SLinus Torvalds 		p2 += 8;
527*1da177e4SLinus Torvalds 		p3 += 8;
528*1da177e4SLinus Torvalds 	} while (--lines > 0);
529*1da177e4SLinus Torvalds 	if (lines == 0)
530*1da177e4SLinus Torvalds 		goto once_more;
531*1da177e4SLinus Torvalds }
532*1da177e4SLinus Torvalds 
533*1da177e4SLinus Torvalds static void
534*1da177e4SLinus Torvalds xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
535*1da177e4SLinus Torvalds 	    unsigned long *p3, unsigned long *p4)
536*1da177e4SLinus Torvalds {
537*1da177e4SLinus Torvalds 	long lines = bytes / (sizeof (long)) / 8 - 1;
538*1da177e4SLinus Torvalds 
539*1da177e4SLinus Torvalds 	prefetchw(p1);
540*1da177e4SLinus Torvalds 	prefetch(p2);
541*1da177e4SLinus Torvalds 	prefetch(p3);
542*1da177e4SLinus Torvalds 	prefetch(p4);
543*1da177e4SLinus Torvalds 
544*1da177e4SLinus Torvalds 	do {
545*1da177e4SLinus Torvalds 		register long d0, d1, d2, d3, d4, d5, d6, d7;
546*1da177e4SLinus Torvalds 
547*1da177e4SLinus Torvalds 		prefetchw(p1+8);
548*1da177e4SLinus Torvalds 		prefetch(p2+8);
549*1da177e4SLinus Torvalds 		prefetch(p3+8);
550*1da177e4SLinus Torvalds 		prefetch(p4+8);
551*1da177e4SLinus Torvalds  once_more:
552*1da177e4SLinus Torvalds 		d0 = p1[0];	/* Pull the stuff into registers	*/
553*1da177e4SLinus Torvalds 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
554*1da177e4SLinus Torvalds 		d2 = p1[2];
555*1da177e4SLinus Torvalds 		d3 = p1[3];
556*1da177e4SLinus Torvalds 		d4 = p1[4];
557*1da177e4SLinus Torvalds 		d5 = p1[5];
558*1da177e4SLinus Torvalds 		d6 = p1[6];
559*1da177e4SLinus Torvalds 		d7 = p1[7];
560*1da177e4SLinus Torvalds 		d0 ^= p2[0];
561*1da177e4SLinus Torvalds 		d1 ^= p2[1];
562*1da177e4SLinus Torvalds 		d2 ^= p2[2];
563*1da177e4SLinus Torvalds 		d3 ^= p2[3];
564*1da177e4SLinus Torvalds 		d4 ^= p2[4];
565*1da177e4SLinus Torvalds 		d5 ^= p2[5];
566*1da177e4SLinus Torvalds 		d6 ^= p2[6];
567*1da177e4SLinus Torvalds 		d7 ^= p2[7];
568*1da177e4SLinus Torvalds 		d0 ^= p3[0];
569*1da177e4SLinus Torvalds 		d1 ^= p3[1];
570*1da177e4SLinus Torvalds 		d2 ^= p3[2];
571*1da177e4SLinus Torvalds 		d3 ^= p3[3];
572*1da177e4SLinus Torvalds 		d4 ^= p3[4];
573*1da177e4SLinus Torvalds 		d5 ^= p3[5];
574*1da177e4SLinus Torvalds 		d6 ^= p3[6];
575*1da177e4SLinus Torvalds 		d7 ^= p3[7];
576*1da177e4SLinus Torvalds 		d0 ^= p4[0];
577*1da177e4SLinus Torvalds 		d1 ^= p4[1];
578*1da177e4SLinus Torvalds 		d2 ^= p4[2];
579*1da177e4SLinus Torvalds 		d3 ^= p4[3];
580*1da177e4SLinus Torvalds 		d4 ^= p4[4];
581*1da177e4SLinus Torvalds 		d5 ^= p4[5];
582*1da177e4SLinus Torvalds 		d6 ^= p4[6];
583*1da177e4SLinus Torvalds 		d7 ^= p4[7];
584*1da177e4SLinus Torvalds 		p1[0] = d0;	/* Store the result (in bursts)		*/
585*1da177e4SLinus Torvalds 		p1[1] = d1;
586*1da177e4SLinus Torvalds 		p1[2] = d2;
587*1da177e4SLinus Torvalds 		p1[3] = d3;
588*1da177e4SLinus Torvalds 		p1[4] = d4;
589*1da177e4SLinus Torvalds 		p1[5] = d5;
590*1da177e4SLinus Torvalds 		p1[6] = d6;
591*1da177e4SLinus Torvalds 		p1[7] = d7;
592*1da177e4SLinus Torvalds 		p1 += 8;
593*1da177e4SLinus Torvalds 		p2 += 8;
594*1da177e4SLinus Torvalds 		p3 += 8;
595*1da177e4SLinus Torvalds 		p4 += 8;
596*1da177e4SLinus Torvalds 	} while (--lines > 0);
597*1da177e4SLinus Torvalds 	if (lines == 0)
598*1da177e4SLinus Torvalds 		goto once_more;
599*1da177e4SLinus Torvalds }
600*1da177e4SLinus Torvalds 
601*1da177e4SLinus Torvalds static void
602*1da177e4SLinus Torvalds xor_32regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
603*1da177e4SLinus Torvalds 	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
604*1da177e4SLinus Torvalds {
605*1da177e4SLinus Torvalds 	long lines = bytes / (sizeof (long)) / 8 - 1;
606*1da177e4SLinus Torvalds 
607*1da177e4SLinus Torvalds 	prefetchw(p1);
608*1da177e4SLinus Torvalds 	prefetch(p2);
609*1da177e4SLinus Torvalds 	prefetch(p3);
610*1da177e4SLinus Torvalds 	prefetch(p4);
611*1da177e4SLinus Torvalds 	prefetch(p5);
612*1da177e4SLinus Torvalds 
613*1da177e4SLinus Torvalds 	do {
614*1da177e4SLinus Torvalds 		register long d0, d1, d2, d3, d4, d5, d6, d7;
615*1da177e4SLinus Torvalds 
616*1da177e4SLinus Torvalds 		prefetchw(p1+8);
617*1da177e4SLinus Torvalds 		prefetch(p2+8);
618*1da177e4SLinus Torvalds 		prefetch(p3+8);
619*1da177e4SLinus Torvalds 		prefetch(p4+8);
620*1da177e4SLinus Torvalds 		prefetch(p5+8);
621*1da177e4SLinus Torvalds  once_more:
622*1da177e4SLinus Torvalds 		d0 = p1[0];	/* Pull the stuff into registers	*/
623*1da177e4SLinus Torvalds 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
624*1da177e4SLinus Torvalds 		d2 = p1[2];
625*1da177e4SLinus Torvalds 		d3 = p1[3];
626*1da177e4SLinus Torvalds 		d4 = p1[4];
627*1da177e4SLinus Torvalds 		d5 = p1[5];
628*1da177e4SLinus Torvalds 		d6 = p1[6];
629*1da177e4SLinus Torvalds 		d7 = p1[7];
630*1da177e4SLinus Torvalds 		d0 ^= p2[0];
631*1da177e4SLinus Torvalds 		d1 ^= p2[1];
632*1da177e4SLinus Torvalds 		d2 ^= p2[2];
633*1da177e4SLinus Torvalds 		d3 ^= p2[3];
634*1da177e4SLinus Torvalds 		d4 ^= p2[4];
635*1da177e4SLinus Torvalds 		d5 ^= p2[5];
636*1da177e4SLinus Torvalds 		d6 ^= p2[6];
637*1da177e4SLinus Torvalds 		d7 ^= p2[7];
638*1da177e4SLinus Torvalds 		d0 ^= p3[0];
639*1da177e4SLinus Torvalds 		d1 ^= p3[1];
640*1da177e4SLinus Torvalds 		d2 ^= p3[2];
641*1da177e4SLinus Torvalds 		d3 ^= p3[3];
642*1da177e4SLinus Torvalds 		d4 ^= p3[4];
643*1da177e4SLinus Torvalds 		d5 ^= p3[5];
644*1da177e4SLinus Torvalds 		d6 ^= p3[6];
645*1da177e4SLinus Torvalds 		d7 ^= p3[7];
646*1da177e4SLinus Torvalds 		d0 ^= p4[0];
647*1da177e4SLinus Torvalds 		d1 ^= p4[1];
648*1da177e4SLinus Torvalds 		d2 ^= p4[2];
649*1da177e4SLinus Torvalds 		d3 ^= p4[3];
650*1da177e4SLinus Torvalds 		d4 ^= p4[4];
651*1da177e4SLinus Torvalds 		d5 ^= p4[5];
652*1da177e4SLinus Torvalds 		d6 ^= p4[6];
653*1da177e4SLinus Torvalds 		d7 ^= p4[7];
654*1da177e4SLinus Torvalds 		d0 ^= p5[0];
655*1da177e4SLinus Torvalds 		d1 ^= p5[1];
656*1da177e4SLinus Torvalds 		d2 ^= p5[2];
657*1da177e4SLinus Torvalds 		d3 ^= p5[3];
658*1da177e4SLinus Torvalds 		d4 ^= p5[4];
659*1da177e4SLinus Torvalds 		d5 ^= p5[5];
660*1da177e4SLinus Torvalds 		d6 ^= p5[6];
661*1da177e4SLinus Torvalds 		d7 ^= p5[7];
662*1da177e4SLinus Torvalds 		p1[0] = d0;	/* Store the result (in bursts)		*/
663*1da177e4SLinus Torvalds 		p1[1] = d1;
664*1da177e4SLinus Torvalds 		p1[2] = d2;
665*1da177e4SLinus Torvalds 		p1[3] = d3;
666*1da177e4SLinus Torvalds 		p1[4] = d4;
667*1da177e4SLinus Torvalds 		p1[5] = d5;
668*1da177e4SLinus Torvalds 		p1[6] = d6;
669*1da177e4SLinus Torvalds 		p1[7] = d7;
670*1da177e4SLinus Torvalds 		p1 += 8;
671*1da177e4SLinus Torvalds 		p2 += 8;
672*1da177e4SLinus Torvalds 		p3 += 8;
673*1da177e4SLinus Torvalds 		p4 += 8;
674*1da177e4SLinus Torvalds 		p5 += 8;
675*1da177e4SLinus Torvalds 	} while (--lines > 0);
676*1da177e4SLinus Torvalds 	if (lines == 0)
677*1da177e4SLinus Torvalds 		goto once_more;
678*1da177e4SLinus Torvalds }
679*1da177e4SLinus Torvalds 
680*1da177e4SLinus Torvalds static struct xor_block_template xor_block_8regs = {
681*1da177e4SLinus Torvalds 	.name = "8regs",
682*1da177e4SLinus Torvalds 	.do_2 = xor_8regs_2,
683*1da177e4SLinus Torvalds 	.do_3 = xor_8regs_3,
684*1da177e4SLinus Torvalds 	.do_4 = xor_8regs_4,
685*1da177e4SLinus Torvalds 	.do_5 = xor_8regs_5,
686*1da177e4SLinus Torvalds };
687*1da177e4SLinus Torvalds 
688*1da177e4SLinus Torvalds static struct xor_block_template xor_block_32regs = {
689*1da177e4SLinus Torvalds 	.name = "32regs",
690*1da177e4SLinus Torvalds 	.do_2 = xor_32regs_2,
691*1da177e4SLinus Torvalds 	.do_3 = xor_32regs_3,
692*1da177e4SLinus Torvalds 	.do_4 = xor_32regs_4,
693*1da177e4SLinus Torvalds 	.do_5 = xor_32regs_5,
694*1da177e4SLinus Torvalds };
695*1da177e4SLinus Torvalds 
696*1da177e4SLinus Torvalds static struct xor_block_template xor_block_8regs_p = {
697*1da177e4SLinus Torvalds 	.name = "8regs_prefetch",
698*1da177e4SLinus Torvalds 	.do_2 = xor_8regs_p_2,
699*1da177e4SLinus Torvalds 	.do_3 = xor_8regs_p_3,
700*1da177e4SLinus Torvalds 	.do_4 = xor_8regs_p_4,
701*1da177e4SLinus Torvalds 	.do_5 = xor_8regs_p_5,
702*1da177e4SLinus Torvalds };
703*1da177e4SLinus Torvalds 
704*1da177e4SLinus Torvalds static struct xor_block_template xor_block_32regs_p = {
705*1da177e4SLinus Torvalds 	.name = "32regs_prefetch",
706*1da177e4SLinus Torvalds 	.do_2 = xor_32regs_p_2,
707*1da177e4SLinus Torvalds 	.do_3 = xor_32regs_p_3,
708*1da177e4SLinus Torvalds 	.do_4 = xor_32regs_p_4,
709*1da177e4SLinus Torvalds 	.do_5 = xor_32regs_p_5,
710*1da177e4SLinus Torvalds };
711*1da177e4SLinus Torvalds 
712*1da177e4SLinus Torvalds #define XOR_TRY_TEMPLATES			\
713*1da177e4SLinus Torvalds 	do {					\
714*1da177e4SLinus Torvalds 		xor_speed(&xor_block_8regs);	\
715*1da177e4SLinus Torvalds 		xor_speed(&xor_block_8regs_p);	\
716*1da177e4SLinus Torvalds 		xor_speed(&xor_block_32regs);	\
717*1da177e4SLinus Torvalds 		xor_speed(&xor_block_32regs_p);	\
718*1da177e4SLinus Torvalds 	} while (0)
719