xref: /openbmc/linux/lib/raid6/sse1.c (revision cc4589ebfae6f8dbb5cf880a0a67eedab3416492)
1*cc4589ebSDavid Woodhouse /* -*- linux-c -*- ------------------------------------------------------- *
2*cc4589ebSDavid Woodhouse  *
3*cc4589ebSDavid Woodhouse  *   Copyright 2002 H. Peter Anvin - All Rights Reserved
4*cc4589ebSDavid Woodhouse  *
5*cc4589ebSDavid Woodhouse  *   This program is free software; you can redistribute it and/or modify
6*cc4589ebSDavid Woodhouse  *   it under the terms of the GNU General Public License as published by
7*cc4589ebSDavid Woodhouse  *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8*cc4589ebSDavid Woodhouse  *   Boston MA 02111-1307, USA; either version 2 of the License, or
9*cc4589ebSDavid Woodhouse  *   (at your option) any later version; incorporated herein by reference.
10*cc4589ebSDavid Woodhouse  *
11*cc4589ebSDavid Woodhouse  * ----------------------------------------------------------------------- */
12*cc4589ebSDavid Woodhouse 
13*cc4589ebSDavid Woodhouse /*
14*cc4589ebSDavid Woodhouse  * raid6sse1.c
15*cc4589ebSDavid Woodhouse  *
16*cc4589ebSDavid Woodhouse  * SSE-1/MMXEXT implementation of RAID-6 syndrome functions
17*cc4589ebSDavid Woodhouse  *
18*cc4589ebSDavid Woodhouse  * This is really an MMX implementation, but it requires SSE-1 or
19*cc4589ebSDavid Woodhouse  * AMD MMXEXT for prefetch support and a few other features.  The
20*cc4589ebSDavid Woodhouse  * support for nontemporal memory accesses is enough to make this
21*cc4589ebSDavid Woodhouse  * worthwhile as a separate implementation.
22*cc4589ebSDavid Woodhouse  */
23*cc4589ebSDavid Woodhouse 
24*cc4589ebSDavid Woodhouse #if defined(__i386__) && !defined(__arch_um__)
25*cc4589ebSDavid Woodhouse 
26*cc4589ebSDavid Woodhouse #include <linux/raid/pq.h>
27*cc4589ebSDavid Woodhouse #include "raid6x86.h"
28*cc4589ebSDavid Woodhouse 
29*cc4589ebSDavid Woodhouse /* Defined in raid6mmx.c */
30*cc4589ebSDavid Woodhouse extern const struct raid6_mmx_constants {
31*cc4589ebSDavid Woodhouse 	u64 x1d;
32*cc4589ebSDavid Woodhouse } raid6_mmx_constants;
33*cc4589ebSDavid Woodhouse 
34*cc4589ebSDavid Woodhouse static int raid6_have_sse1_or_mmxext(void)
35*cc4589ebSDavid Woodhouse {
36*cc4589ebSDavid Woodhouse 	/* Not really boot_cpu but "all_cpus" */
37*cc4589ebSDavid Woodhouse 	return boot_cpu_has(X86_FEATURE_MMX) &&
38*cc4589ebSDavid Woodhouse 		(boot_cpu_has(X86_FEATURE_XMM) ||
39*cc4589ebSDavid Woodhouse 		 boot_cpu_has(X86_FEATURE_MMXEXT));
40*cc4589ebSDavid Woodhouse }
41*cc4589ebSDavid Woodhouse 
42*cc4589ebSDavid Woodhouse /*
43*cc4589ebSDavid Woodhouse  * Plain SSE1 implementation
44*cc4589ebSDavid Woodhouse  */
45*cc4589ebSDavid Woodhouse static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
46*cc4589ebSDavid Woodhouse {
47*cc4589ebSDavid Woodhouse 	u8 **dptr = (u8 **)ptrs;
48*cc4589ebSDavid Woodhouse 	u8 *p, *q;
49*cc4589ebSDavid Woodhouse 	int d, z, z0;
50*cc4589ebSDavid Woodhouse 
51*cc4589ebSDavid Woodhouse 	z0 = disks - 3;		/* Highest data disk */
52*cc4589ebSDavid Woodhouse 	p = dptr[z0+1];		/* XOR parity */
53*cc4589ebSDavid Woodhouse 	q = dptr[z0+2];		/* RS syndrome */
54*cc4589ebSDavid Woodhouse 
55*cc4589ebSDavid Woodhouse 	kernel_fpu_begin();
56*cc4589ebSDavid Woodhouse 
57*cc4589ebSDavid Woodhouse 	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
58*cc4589ebSDavid Woodhouse 	asm volatile("pxor %mm5,%mm5");	/* Zero temp */
59*cc4589ebSDavid Woodhouse 
60*cc4589ebSDavid Woodhouse 	for ( d = 0 ; d < bytes ; d += 8 ) {
61*cc4589ebSDavid Woodhouse 		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
62*cc4589ebSDavid Woodhouse 		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
63*cc4589ebSDavid Woodhouse 		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
64*cc4589ebSDavid Woodhouse 		asm volatile("movq %mm2,%mm4");	/* Q[0] */
65*cc4589ebSDavid Woodhouse 		asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d]));
66*cc4589ebSDavid Woodhouse 		for ( z = z0-2 ; z >= 0 ; z-- ) {
67*cc4589ebSDavid Woodhouse 			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
68*cc4589ebSDavid Woodhouse 			asm volatile("pcmpgtb %mm4,%mm5");
69*cc4589ebSDavid Woodhouse 			asm volatile("paddb %mm4,%mm4");
70*cc4589ebSDavid Woodhouse 			asm volatile("pand %mm0,%mm5");
71*cc4589ebSDavid Woodhouse 			asm volatile("pxor %mm5,%mm4");
72*cc4589ebSDavid Woodhouse 			asm volatile("pxor %mm5,%mm5");
73*cc4589ebSDavid Woodhouse 			asm volatile("pxor %mm6,%mm2");
74*cc4589ebSDavid Woodhouse 			asm volatile("pxor %mm6,%mm4");
75*cc4589ebSDavid Woodhouse 			asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
76*cc4589ebSDavid Woodhouse 		}
77*cc4589ebSDavid Woodhouse 		asm volatile("pcmpgtb %mm4,%mm5");
78*cc4589ebSDavid Woodhouse 		asm volatile("paddb %mm4,%mm4");
79*cc4589ebSDavid Woodhouse 		asm volatile("pand %mm0,%mm5");
80*cc4589ebSDavid Woodhouse 		asm volatile("pxor %mm5,%mm4");
81*cc4589ebSDavid Woodhouse 		asm volatile("pxor %mm5,%mm5");
82*cc4589ebSDavid Woodhouse 		asm volatile("pxor %mm6,%mm2");
83*cc4589ebSDavid Woodhouse 		asm volatile("pxor %mm6,%mm4");
84*cc4589ebSDavid Woodhouse 
85*cc4589ebSDavid Woodhouse 		asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
86*cc4589ebSDavid Woodhouse 		asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
87*cc4589ebSDavid Woodhouse 	}
88*cc4589ebSDavid Woodhouse 
89*cc4589ebSDavid Woodhouse 	asm volatile("sfence" : : : "memory");
90*cc4589ebSDavid Woodhouse 	kernel_fpu_end();
91*cc4589ebSDavid Woodhouse }
92*cc4589ebSDavid Woodhouse 
93*cc4589ebSDavid Woodhouse const struct raid6_calls raid6_sse1x1 = {
94*cc4589ebSDavid Woodhouse 	raid6_sse11_gen_syndrome,
95*cc4589ebSDavid Woodhouse 	raid6_have_sse1_or_mmxext,
96*cc4589ebSDavid Woodhouse 	"sse1x1",
97*cc4589ebSDavid Woodhouse 	1			/* Has cache hints */
98*cc4589ebSDavid Woodhouse };
99*cc4589ebSDavid Woodhouse 
100*cc4589ebSDavid Woodhouse /*
101*cc4589ebSDavid Woodhouse  * Unrolled-by-2 SSE1 implementation
102*cc4589ebSDavid Woodhouse  */
103*cc4589ebSDavid Woodhouse static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
104*cc4589ebSDavid Woodhouse {
105*cc4589ebSDavid Woodhouse 	u8 **dptr = (u8 **)ptrs;
106*cc4589ebSDavid Woodhouse 	u8 *p, *q;
107*cc4589ebSDavid Woodhouse 	int d, z, z0;
108*cc4589ebSDavid Woodhouse 
109*cc4589ebSDavid Woodhouse 	z0 = disks - 3;		/* Highest data disk */
110*cc4589ebSDavid Woodhouse 	p = dptr[z0+1];		/* XOR parity */
111*cc4589ebSDavid Woodhouse 	q = dptr[z0+2];		/* RS syndrome */
112*cc4589ebSDavid Woodhouse 
113*cc4589ebSDavid Woodhouse 	kernel_fpu_begin();
114*cc4589ebSDavid Woodhouse 
115*cc4589ebSDavid Woodhouse 	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
116*cc4589ebSDavid Woodhouse 	asm volatile("pxor %mm5,%mm5");	/* Zero temp */
117*cc4589ebSDavid Woodhouse 	asm volatile("pxor %mm7,%mm7"); /* Zero temp */
118*cc4589ebSDavid Woodhouse 
119*cc4589ebSDavid Woodhouse 	/* We uniformly assume a single prefetch covers at least 16 bytes */
120*cc4589ebSDavid Woodhouse 	for ( d = 0 ; d < bytes ; d += 16 ) {
121*cc4589ebSDavid Woodhouse 		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
122*cc4589ebSDavid Woodhouse 		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
123*cc4589ebSDavid Woodhouse 		asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */
124*cc4589ebSDavid Woodhouse 		asm volatile("movq %mm2,%mm4");	/* Q[0] */
125*cc4589ebSDavid Woodhouse 		asm volatile("movq %mm3,%mm6"); /* Q[1] */
126*cc4589ebSDavid Woodhouse 		for ( z = z0-1 ; z >= 0 ; z-- ) {
127*cc4589ebSDavid Woodhouse 			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
128*cc4589ebSDavid Woodhouse 			asm volatile("pcmpgtb %mm4,%mm5");
129*cc4589ebSDavid Woodhouse 			asm volatile("pcmpgtb %mm6,%mm7");
130*cc4589ebSDavid Woodhouse 			asm volatile("paddb %mm4,%mm4");
131*cc4589ebSDavid Woodhouse 			asm volatile("paddb %mm6,%mm6");
132*cc4589ebSDavid Woodhouse 			asm volatile("pand %mm0,%mm5");
133*cc4589ebSDavid Woodhouse 			asm volatile("pand %mm0,%mm7");
134*cc4589ebSDavid Woodhouse 			asm volatile("pxor %mm5,%mm4");
135*cc4589ebSDavid Woodhouse 			asm volatile("pxor %mm7,%mm6");
136*cc4589ebSDavid Woodhouse 			asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
137*cc4589ebSDavid Woodhouse 			asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
138*cc4589ebSDavid Woodhouse 			asm volatile("pxor %mm5,%mm2");
139*cc4589ebSDavid Woodhouse 			asm volatile("pxor %mm7,%mm3");
140*cc4589ebSDavid Woodhouse 			asm volatile("pxor %mm5,%mm4");
141*cc4589ebSDavid Woodhouse 			asm volatile("pxor %mm7,%mm6");
142*cc4589ebSDavid Woodhouse 			asm volatile("pxor %mm5,%mm5");
143*cc4589ebSDavid Woodhouse 			asm volatile("pxor %mm7,%mm7");
144*cc4589ebSDavid Woodhouse 		}
145*cc4589ebSDavid Woodhouse 		asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
146*cc4589ebSDavid Woodhouse 		asm volatile("movntq %%mm3,%0" : "=m" (p[d+8]));
147*cc4589ebSDavid Woodhouse 		asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
148*cc4589ebSDavid Woodhouse 		asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
149*cc4589ebSDavid Woodhouse 	}
150*cc4589ebSDavid Woodhouse 
151*cc4589ebSDavid Woodhouse 	asm volatile("sfence" : :: "memory");
152*cc4589ebSDavid Woodhouse 	kernel_fpu_end();
153*cc4589ebSDavid Woodhouse }
154*cc4589ebSDavid Woodhouse 
155*cc4589ebSDavid Woodhouse const struct raid6_calls raid6_sse1x2 = {
156*cc4589ebSDavid Woodhouse 	raid6_sse12_gen_syndrome,
157*cc4589ebSDavid Woodhouse 	raid6_have_sse1_or_mmxext,
158*cc4589ebSDavid Woodhouse 	"sse1x2",
159*cc4589ebSDavid Woodhouse 	1			/* Has cache hints */
160*cc4589ebSDavid Woodhouse };
161*cc4589ebSDavid Woodhouse 
162*cc4589ebSDavid Woodhouse #endif
163