xref: /openbmc/linux/lib/raid6/avx512.c (revision 36dacddb)
1dd165a65SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
2e0a491c1SGayatri Kammela /* -*- linux-c -*- --------------------------------------------------------
3e0a491c1SGayatri Kammela  *
4e0a491c1SGayatri Kammela  *   Copyright (C) 2016 Intel Corporation
5e0a491c1SGayatri Kammela  *
6e0a491c1SGayatri Kammela  *   Author: Gayatri Kammela <gayatri.kammela@intel.com>
7e0a491c1SGayatri Kammela  *   Author: Megha Dey <megha.dey@linux.intel.com>
8e0a491c1SGayatri Kammela  *
9e0a491c1SGayatri Kammela  *   Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved
10e0a491c1SGayatri Kammela  *   Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
11e0a491c1SGayatri Kammela  *
12e0a491c1SGayatri Kammela  * -----------------------------------------------------------------------
13e0a491c1SGayatri Kammela  */
14e0a491c1SGayatri Kammela 
15e0a491c1SGayatri Kammela /*
16e0a491c1SGayatri Kammela  * AVX512 implementation of RAID-6 syndrome functions
17e0a491c1SGayatri Kammela  *
18e0a491c1SGayatri Kammela  */
19e0a491c1SGayatri Kammela 
20e0a491c1SGayatri Kammela #ifdef CONFIG_AS_AVX512
21e0a491c1SGayatri Kammela 
22e0a491c1SGayatri Kammela #include <linux/raid/pq.h>
23e0a491c1SGayatri Kammela #include "x86.h"
24e0a491c1SGayatri Kammela 
25e0a491c1SGayatri Kammela static const struct raid6_avx512_constants {
26e0a491c1SGayatri Kammela 	u64 x1d[8];
27b5e0fff1SDenys Vlasenko } raid6_avx512_constants __aligned(512/8) = {
28e0a491c1SGayatri Kammela 	{ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
29e0a491c1SGayatri Kammela 	  0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
30e0a491c1SGayatri Kammela 	  0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
31e0a491c1SGayatri Kammela 	  0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
32e0a491c1SGayatri Kammela };
33e0a491c1SGayatri Kammela 
raid6_have_avx512(void)34e0a491c1SGayatri Kammela static int raid6_have_avx512(void)
35e0a491c1SGayatri Kammela {
36e0a491c1SGayatri Kammela 	return boot_cpu_has(X86_FEATURE_AVX2) &&
37e0a491c1SGayatri Kammela 		boot_cpu_has(X86_FEATURE_AVX) &&
38e0a491c1SGayatri Kammela 		boot_cpu_has(X86_FEATURE_AVX512F) &&
39e0a491c1SGayatri Kammela 		boot_cpu_has(X86_FEATURE_AVX512BW) &&
40e0a491c1SGayatri Kammela 		boot_cpu_has(X86_FEATURE_AVX512VL) &&
41e0a491c1SGayatri Kammela 		boot_cpu_has(X86_FEATURE_AVX512DQ);
42e0a491c1SGayatri Kammela }
43e0a491c1SGayatri Kammela 
raid6_avx5121_gen_syndrome(int disks,size_t bytes,void ** ptrs)44e0a491c1SGayatri Kammela static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs)
45e0a491c1SGayatri Kammela {
46e0a491c1SGayatri Kammela 	u8 **dptr = (u8 **)ptrs;
47e0a491c1SGayatri Kammela 	u8 *p, *q;
48e0a491c1SGayatri Kammela 	int d, z, z0;
49e0a491c1SGayatri Kammela 
50e0a491c1SGayatri Kammela 	z0 = disks - 3;         /* Highest data disk */
51e0a491c1SGayatri Kammela 	p = dptr[z0+1];         /* XOR parity */
52e0a491c1SGayatri Kammela 	q = dptr[z0+2];         /* RS syndrome */
53e0a491c1SGayatri Kammela 
54e0a491c1SGayatri Kammela 	kernel_fpu_begin();
55e0a491c1SGayatri Kammela 
56e0a491c1SGayatri Kammela 	asm volatile("vmovdqa64 %0,%%zmm0\n\t"
57e0a491c1SGayatri Kammela 		     "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
58e0a491c1SGayatri Kammela 		     :
59e0a491c1SGayatri Kammela 		     : "m" (raid6_avx512_constants.x1d[0]));
60e0a491c1SGayatri Kammela 
61e0a491c1SGayatri Kammela 	for (d = 0; d < bytes; d += 64) {
62e0a491c1SGayatri Kammela 		asm volatile("prefetchnta %0\n\t"
63e0a491c1SGayatri Kammela 			     "vmovdqa64 %0,%%zmm2\n\t"     /* P[0] */
64e0a491c1SGayatri Kammela 			     "prefetchnta %1\n\t"
65e0a491c1SGayatri Kammela 			     "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
66e0a491c1SGayatri Kammela 			     "vmovdqa64 %1,%%zmm6"
67e0a491c1SGayatri Kammela 			     :
68e0a491c1SGayatri Kammela 			     : "m" (dptr[z0][d]), "m" (dptr[z0-1][d]));
69e0a491c1SGayatri Kammela 		for (z = z0-2; z >= 0; z--) {
70e0a491c1SGayatri Kammela 			asm volatile("prefetchnta %0\n\t"
71e0a491c1SGayatri Kammela 				     "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
72e0a491c1SGayatri Kammela 				     "vpmovm2b %%k1,%%zmm5\n\t"
73e0a491c1SGayatri Kammela 				     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
74e0a491c1SGayatri Kammela 				     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
75e0a491c1SGayatri Kammela 				     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
76e0a491c1SGayatri Kammela 				     "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
77e0a491c1SGayatri Kammela 				     "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
78e0a491c1SGayatri Kammela 				     "vmovdqa64 %0,%%zmm6"
79e0a491c1SGayatri Kammela 				     :
80e0a491c1SGayatri Kammela 				     : "m" (dptr[z][d]));
81e0a491c1SGayatri Kammela 		}
82e0a491c1SGayatri Kammela 		asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
83e0a491c1SGayatri Kammela 			     "vpmovm2b %%k1,%%zmm5\n\t"
84e0a491c1SGayatri Kammela 			     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
85e0a491c1SGayatri Kammela 			     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
86e0a491c1SGayatri Kammela 			     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
87e0a491c1SGayatri Kammela 			     "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
88e0a491c1SGayatri Kammela 			     "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
89e0a491c1SGayatri Kammela 			     "vmovntdq %%zmm2,%0\n\t"
90e0a491c1SGayatri Kammela 			     "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
91e0a491c1SGayatri Kammela 			     "vmovntdq %%zmm4,%1\n\t"
92e0a491c1SGayatri Kammela 			     "vpxorq %%zmm4,%%zmm4,%%zmm4"
93e0a491c1SGayatri Kammela 			     :
94e0a491c1SGayatri Kammela 			     : "m" (p[d]), "m" (q[d]));
95e0a491c1SGayatri Kammela 	}
96e0a491c1SGayatri Kammela 
97e0a491c1SGayatri Kammela 	asm volatile("sfence" : : : "memory");
98e0a491c1SGayatri Kammela 	kernel_fpu_end();
99e0a491c1SGayatri Kammela }
100e0a491c1SGayatri Kammela 
raid6_avx5121_xor_syndrome(int disks,int start,int stop,size_t bytes,void ** ptrs)101694dda62SGayatri Kammela static void raid6_avx5121_xor_syndrome(int disks, int start, int stop,
102694dda62SGayatri Kammela 				       size_t bytes, void **ptrs)
103694dda62SGayatri Kammela {
104694dda62SGayatri Kammela 	u8 **dptr = (u8 **)ptrs;
105694dda62SGayatri Kammela 	u8 *p, *q;
106694dda62SGayatri Kammela 	int d, z, z0;
107694dda62SGayatri Kammela 
108694dda62SGayatri Kammela 	z0 = stop;		/* P/Q right side optimization */
109694dda62SGayatri Kammela 	p = dptr[disks-2];	/* XOR parity */
110694dda62SGayatri Kammela 	q = dptr[disks-1];	/* RS syndrome */
111694dda62SGayatri Kammela 
112694dda62SGayatri Kammela 	kernel_fpu_begin();
113694dda62SGayatri Kammela 
114694dda62SGayatri Kammela 	asm volatile("vmovdqa64 %0,%%zmm0"
115694dda62SGayatri Kammela 		     : : "m" (raid6_avx512_constants.x1d[0]));
116694dda62SGayatri Kammela 
117694dda62SGayatri Kammela 	for (d = 0 ; d < bytes ; d += 64) {
118694dda62SGayatri Kammela 		asm volatile("vmovdqa64 %0,%%zmm4\n\t"
119694dda62SGayatri Kammela 			     "vmovdqa64 %1,%%zmm2\n\t"
120694dda62SGayatri Kammela 			     "vpxorq %%zmm4,%%zmm2,%%zmm2"
121694dda62SGayatri Kammela 			     :
122694dda62SGayatri Kammela 			     : "m" (dptr[z0][d]),  "m" (p[d]));
123694dda62SGayatri Kammela 		/* P/Q data pages */
124694dda62SGayatri Kammela 		for (z = z0-1 ; z >= start ; z--) {
125694dda62SGayatri Kammela 			asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
126694dda62SGayatri Kammela 				     "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
127694dda62SGayatri Kammela 				     "vpmovm2b %%k1,%%zmm5\n\t"
128694dda62SGayatri Kammela 				     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
129694dda62SGayatri Kammela 				     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
130694dda62SGayatri Kammela 				     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
131694dda62SGayatri Kammela 				     "vmovdqa64 %0,%%zmm5\n\t"
132694dda62SGayatri Kammela 				     "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
133694dda62SGayatri Kammela 				     "vpxorq %%zmm5,%%zmm4,%%zmm4"
134694dda62SGayatri Kammela 				     :
135694dda62SGayatri Kammela 				     : "m" (dptr[z][d]));
136694dda62SGayatri Kammela 		}
137694dda62SGayatri Kammela 		/* P/Q left side optimization */
138694dda62SGayatri Kammela 		for (z = start-1 ; z >= 0 ; z--) {
139694dda62SGayatri Kammela 			asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
140694dda62SGayatri Kammela 				     "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
141694dda62SGayatri Kammela 				     "vpmovm2b %%k1,%%zmm5\n\t"
142694dda62SGayatri Kammela 				     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
143694dda62SGayatri Kammela 				     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
144694dda62SGayatri Kammela 				     "vpxorq %%zmm5,%%zmm4,%%zmm4"
145694dda62SGayatri Kammela 				     :
146694dda62SGayatri Kammela 				     : );
147694dda62SGayatri Kammela 		}
148694dda62SGayatri Kammela 		asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
149694dda62SGayatri Kammela 		/* Don't use movntdq for r/w memory area < cache line */
150694dda62SGayatri Kammela 			     "vmovdqa64 %%zmm4,%0\n\t"
151694dda62SGayatri Kammela 			     "vmovdqa64 %%zmm2,%1"
152694dda62SGayatri Kammela 			     :
153694dda62SGayatri Kammela 			     : "m" (q[d]), "m" (p[d]));
154694dda62SGayatri Kammela 	}
155694dda62SGayatri Kammela 
156694dda62SGayatri Kammela 	asm volatile("sfence" : : : "memory");
157694dda62SGayatri Kammela 	kernel_fpu_end();
158694dda62SGayatri Kammela }
159694dda62SGayatri Kammela 
160e0a491c1SGayatri Kammela const struct raid6_calls raid6_avx512x1 = {
161e0a491c1SGayatri Kammela 	raid6_avx5121_gen_syndrome,
162694dda62SGayatri Kammela 	raid6_avx5121_xor_syndrome,
163e0a491c1SGayatri Kammela 	raid6_have_avx512,
164e0a491c1SGayatri Kammela 	"avx512x1",
165*36dacddbSDirk Müller 	.priority = 2		/* Prefer AVX512 over priority 1 (SSE2 and others) */
166e0a491c1SGayatri Kammela };
167e0a491c1SGayatri Kammela 
168e0a491c1SGayatri Kammela /*
169e0a491c1SGayatri Kammela  * Unrolled-by-2 AVX512 implementation
170e0a491c1SGayatri Kammela  */
raid6_avx5122_gen_syndrome(int disks,size_t bytes,void ** ptrs)171e0a491c1SGayatri Kammela static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs)
172e0a491c1SGayatri Kammela {
173e0a491c1SGayatri Kammela 	u8 **dptr = (u8 **)ptrs;
174e0a491c1SGayatri Kammela 	u8 *p, *q;
175e0a491c1SGayatri Kammela 	int d, z, z0;
176e0a491c1SGayatri Kammela 
177e0a491c1SGayatri Kammela 	z0 = disks - 3;         /* Highest data disk */
178e0a491c1SGayatri Kammela 	p = dptr[z0+1];         /* XOR parity */
179e0a491c1SGayatri Kammela 	q = dptr[z0+2];         /* RS syndrome */
180e0a491c1SGayatri Kammela 
181e0a491c1SGayatri Kammela 	kernel_fpu_begin();
182e0a491c1SGayatri Kammela 
183e0a491c1SGayatri Kammela 	asm volatile("vmovdqa64 %0,%%zmm0\n\t"
184e0a491c1SGayatri Kammela 		     "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
185e0a491c1SGayatri Kammela 		     :
186e0a491c1SGayatri Kammela 		     : "m" (raid6_avx512_constants.x1d[0]));
187e0a491c1SGayatri Kammela 
188e0a491c1SGayatri Kammela 	/* We uniformly assume a single prefetch covers at least 64 bytes */
189e0a491c1SGayatri Kammela 	for (d = 0; d < bytes; d += 128) {
190e0a491c1SGayatri Kammela 		asm volatile("prefetchnta %0\n\t"
191e0a491c1SGayatri Kammela 			     "prefetchnta %1\n\t"
192e0a491c1SGayatri Kammela 			     "vmovdqa64 %0,%%zmm2\n\t"      /* P[0] */
193e0a491c1SGayatri Kammela 			     "vmovdqa64 %1,%%zmm3\n\t"      /* P[1] */
194e0a491c1SGayatri Kammela 			     "vmovdqa64 %%zmm2,%%zmm4\n\t"  /* Q[0] */
195e0a491c1SGayatri Kammela 			     "vmovdqa64 %%zmm3,%%zmm6"      /* Q[1] */
196e0a491c1SGayatri Kammela 			     :
197e0a491c1SGayatri Kammela 			     : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]));
198e0a491c1SGayatri Kammela 		for (z = z0-1; z >= 0; z--) {
199e0a491c1SGayatri Kammela 			asm volatile("prefetchnta %0\n\t"
200e0a491c1SGayatri Kammela 				     "prefetchnta %1\n\t"
201e0a491c1SGayatri Kammela 				     "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
202e0a491c1SGayatri Kammela 				     "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
203e0a491c1SGayatri Kammela 				     "vpmovm2b %%k1,%%zmm5\n\t"
204e0a491c1SGayatri Kammela 				     "vpmovm2b %%k2,%%zmm7\n\t"
205e0a491c1SGayatri Kammela 				     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
206e0a491c1SGayatri Kammela 				     "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
207e0a491c1SGayatri Kammela 				     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
208e0a491c1SGayatri Kammela 				     "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
209e0a491c1SGayatri Kammela 				     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
210e0a491c1SGayatri Kammela 				     "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
211e0a491c1SGayatri Kammela 				     "vmovdqa64 %0,%%zmm5\n\t"
212e0a491c1SGayatri Kammela 				     "vmovdqa64 %1,%%zmm7\n\t"
213e0a491c1SGayatri Kammela 				     "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
214e0a491c1SGayatri Kammela 				     "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
215e0a491c1SGayatri Kammela 				     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
216e0a491c1SGayatri Kammela 				     "vpxorq %%zmm7,%%zmm6,%%zmm6"
217e0a491c1SGayatri Kammela 				     :
218e0a491c1SGayatri Kammela 				     : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
219e0a491c1SGayatri Kammela 		}
220e0a491c1SGayatri Kammela 		asm volatile("vmovntdq %%zmm2,%0\n\t"
221e0a491c1SGayatri Kammela 			     "vmovntdq %%zmm3,%1\n\t"
222e0a491c1SGayatri Kammela 			     "vmovntdq %%zmm4,%2\n\t"
223e0a491c1SGayatri Kammela 			     "vmovntdq %%zmm6,%3"
224e0a491c1SGayatri Kammela 			     :
225e0a491c1SGayatri Kammela 			     : "m" (p[d]), "m" (p[d+64]), "m" (q[d]),
226e0a491c1SGayatri Kammela 			       "m" (q[d+64]));
227e0a491c1SGayatri Kammela 	}
228e0a491c1SGayatri Kammela 
229e0a491c1SGayatri Kammela 	asm volatile("sfence" : : : "memory");
230e0a491c1SGayatri Kammela 	kernel_fpu_end();
231e0a491c1SGayatri Kammela }
232e0a491c1SGayatri Kammela 
raid6_avx5122_xor_syndrome(int disks,int start,int stop,size_t bytes,void ** ptrs)233694dda62SGayatri Kammela static void raid6_avx5122_xor_syndrome(int disks, int start, int stop,
234694dda62SGayatri Kammela 				       size_t bytes, void **ptrs)
235694dda62SGayatri Kammela {
236694dda62SGayatri Kammela 	u8 **dptr = (u8 **)ptrs;
237694dda62SGayatri Kammela 	u8 *p, *q;
238694dda62SGayatri Kammela 	int d, z, z0;
239694dda62SGayatri Kammela 
240694dda62SGayatri Kammela 	z0 = stop;		/* P/Q right side optimization */
241694dda62SGayatri Kammela 	p = dptr[disks-2];	/* XOR parity */
242694dda62SGayatri Kammela 	q = dptr[disks-1];	/* RS syndrome */
243694dda62SGayatri Kammela 
244694dda62SGayatri Kammela 	kernel_fpu_begin();
245694dda62SGayatri Kammela 
246694dda62SGayatri Kammela 	asm volatile("vmovdqa64 %0,%%zmm0"
247694dda62SGayatri Kammela 		     : : "m" (raid6_avx512_constants.x1d[0]));
248694dda62SGayatri Kammela 
249694dda62SGayatri Kammela 	for (d = 0 ; d < bytes ; d += 128) {
250694dda62SGayatri Kammela 		asm volatile("vmovdqa64 %0,%%zmm4\n\t"
251694dda62SGayatri Kammela 			     "vmovdqa64 %1,%%zmm6\n\t"
252694dda62SGayatri Kammela 			     "vmovdqa64 %2,%%zmm2\n\t"
253694dda62SGayatri Kammela 			     "vmovdqa64 %3,%%zmm3\n\t"
254694dda62SGayatri Kammela 			     "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
255694dda62SGayatri Kammela 			     "vpxorq %%zmm6,%%zmm3,%%zmm3"
256694dda62SGayatri Kammela 			     :
257694dda62SGayatri Kammela 			     : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
258694dda62SGayatri Kammela 			       "m" (p[d]), "m" (p[d+64]));
259694dda62SGayatri Kammela 		/* P/Q data pages */
260694dda62SGayatri Kammela 		for (z = z0-1 ; z >= start ; z--) {
261694dda62SGayatri Kammela 			asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
262694dda62SGayatri Kammela 				     "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
263694dda62SGayatri Kammela 				     "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
264694dda62SGayatri Kammela 				     "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
265694dda62SGayatri Kammela 				     "vpmovm2b %%k1,%%zmm5\n\t"
266694dda62SGayatri Kammela 				     "vpmovm2b %%k2,%%zmm7\n\t"
267694dda62SGayatri Kammela 				     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
268694dda62SGayatri Kammela 				     "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
269694dda62SGayatri Kammela 				     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
270694dda62SGayatri Kammela 				     "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
271694dda62SGayatri Kammela 				     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
272694dda62SGayatri Kammela 				     "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
273694dda62SGayatri Kammela 				     "vmovdqa64 %0,%%zmm5\n\t"
274694dda62SGayatri Kammela 				     "vmovdqa64 %1,%%zmm7\n\t"
275694dda62SGayatri Kammela 				     "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
276694dda62SGayatri Kammela 				     "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
277694dda62SGayatri Kammela 				     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
278694dda62SGayatri Kammela 				     "vpxorq %%zmm7,%%zmm6,%%zmm6"
279694dda62SGayatri Kammela 				     :
280694dda62SGayatri Kammela 				     : "m" (dptr[z][d]),  "m" (dptr[z][d+64]));
281694dda62SGayatri Kammela 		}
282694dda62SGayatri Kammela 		/* P/Q left side optimization */
283694dda62SGayatri Kammela 		for (z = start-1 ; z >= 0 ; z--) {
284694dda62SGayatri Kammela 			asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
285694dda62SGayatri Kammela 				     "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
286694dda62SGayatri Kammela 				     "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
287694dda62SGayatri Kammela 				     "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
288694dda62SGayatri Kammela 				     "vpmovm2b %%k1,%%zmm5\n\t"
289694dda62SGayatri Kammela 				     "vpmovm2b %%k2,%%zmm7\n\t"
290694dda62SGayatri Kammela 				     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
291694dda62SGayatri Kammela 				     "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
292694dda62SGayatri Kammela 				     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
293694dda62SGayatri Kammela 				     "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
294694dda62SGayatri Kammela 				     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
295694dda62SGayatri Kammela 				     "vpxorq %%zmm7,%%zmm6,%%zmm6"
296694dda62SGayatri Kammela 				     :
297694dda62SGayatri Kammela 				     : );
298694dda62SGayatri Kammela 		}
299694dda62SGayatri Kammela 		asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
300694dda62SGayatri Kammela 			     "vpxorq %1,%%zmm6,%%zmm6\n\t"
301694dda62SGayatri Kammela 			     /* Don't use movntdq for r/w
302694dda62SGayatri Kammela 			      * memory area < cache line
303694dda62SGayatri Kammela 			      */
304694dda62SGayatri Kammela 			     "vmovdqa64 %%zmm4,%0\n\t"
305694dda62SGayatri Kammela 			     "vmovdqa64 %%zmm6,%1\n\t"
306694dda62SGayatri Kammela 			     "vmovdqa64 %%zmm2,%2\n\t"
307694dda62SGayatri Kammela 			     "vmovdqa64 %%zmm3,%3"
308694dda62SGayatri Kammela 			     :
309694dda62SGayatri Kammela 			     : "m" (q[d]), "m" (q[d+64]), "m" (p[d]),
310694dda62SGayatri Kammela 			       "m" (p[d+64]));
311694dda62SGayatri Kammela 	}
312694dda62SGayatri Kammela 
313694dda62SGayatri Kammela 	asm volatile("sfence" : : : "memory");
314694dda62SGayatri Kammela 	kernel_fpu_end();
315694dda62SGayatri Kammela }
316694dda62SGayatri Kammela 
317e0a491c1SGayatri Kammela const struct raid6_calls raid6_avx512x2 = {
318e0a491c1SGayatri Kammela 	raid6_avx5122_gen_syndrome,
319694dda62SGayatri Kammela 	raid6_avx5122_xor_syndrome,
320e0a491c1SGayatri Kammela 	raid6_have_avx512,
321e0a491c1SGayatri Kammela 	"avx512x2",
322*36dacddbSDirk Müller 	.priority = 2		/* Prefer AVX512 over priority 1 (SSE2 and others) */
323e0a491c1SGayatri Kammela };
324e0a491c1SGayatri Kammela 
325e0a491c1SGayatri Kammela #ifdef CONFIG_X86_64
326e0a491c1SGayatri Kammela 
327e0a491c1SGayatri Kammela /*
328e0a491c1SGayatri Kammela  * Unrolled-by-4 AVX2 implementation
329e0a491c1SGayatri Kammela  */
raid6_avx5124_gen_syndrome(int disks,size_t bytes,void ** ptrs)330e0a491c1SGayatri Kammela static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs)
331e0a491c1SGayatri Kammela {
332e0a491c1SGayatri Kammela 	u8 **dptr = (u8 **)ptrs;
333e0a491c1SGayatri Kammela 	u8 *p, *q;
334e0a491c1SGayatri Kammela 	int d, z, z0;
335e0a491c1SGayatri Kammela 
336e0a491c1SGayatri Kammela 	z0 = disks - 3;         /* Highest data disk */
337e0a491c1SGayatri Kammela 	p = dptr[z0+1];         /* XOR parity */
338e0a491c1SGayatri Kammela 	q = dptr[z0+2];         /* RS syndrome */
339e0a491c1SGayatri Kammela 
340e0a491c1SGayatri Kammela 	kernel_fpu_begin();
341e0a491c1SGayatri Kammela 
342e0a491c1SGayatri Kammela 	asm volatile("vmovdqa64 %0,%%zmm0\n\t"
343e0a491c1SGayatri Kammela 		     "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t"       /* Zero temp */
344e0a491c1SGayatri Kammela 		     "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"       /* P[0] */
345e0a491c1SGayatri Kammela 		     "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"       /* P[1] */
346e0a491c1SGayatri Kammela 		     "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"       /* Q[0] */
347e0a491c1SGayatri Kammela 		     "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"       /* Q[1] */
348e0a491c1SGayatri Kammela 		     "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"    /* P[2] */
349e0a491c1SGayatri Kammela 		     "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"    /* P[3] */
350e0a491c1SGayatri Kammela 		     "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"    /* Q[2] */
351e0a491c1SGayatri Kammela 		     "vpxorq %%zmm14,%%zmm14,%%zmm14"        /* Q[3] */
352e0a491c1SGayatri Kammela 		     :
353e0a491c1SGayatri Kammela 		     : "m" (raid6_avx512_constants.x1d[0]));
354e0a491c1SGayatri Kammela 
355e0a491c1SGayatri Kammela 	for (d = 0; d < bytes; d += 256) {
356e0a491c1SGayatri Kammela 		for (z = z0; z >= 0; z--) {
357e0a491c1SGayatri Kammela 		asm volatile("prefetchnta %0\n\t"
358e0a491c1SGayatri Kammela 			     "prefetchnta %1\n\t"
359e0a491c1SGayatri Kammela 			     "prefetchnta %2\n\t"
360e0a491c1SGayatri Kammela 			     "prefetchnta %3\n\t"
361e0a491c1SGayatri Kammela 			     "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
362e0a491c1SGayatri Kammela 			     "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
363e0a491c1SGayatri Kammela 			     "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t"
364e0a491c1SGayatri Kammela 			     "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t"
365e0a491c1SGayatri Kammela 			     "vpmovm2b %%k1,%%zmm5\n\t"
366e0a491c1SGayatri Kammela 			     "vpmovm2b %%k2,%%zmm7\n\t"
367e0a491c1SGayatri Kammela 			     "vpmovm2b %%k3,%%zmm13\n\t"
368e0a491c1SGayatri Kammela 			     "vpmovm2b %%k4,%%zmm15\n\t"
369e0a491c1SGayatri Kammela 			     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
370e0a491c1SGayatri Kammela 			     "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
371e0a491c1SGayatri Kammela 			     "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
372e0a491c1SGayatri Kammela 			     "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
373e0a491c1SGayatri Kammela 			     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
374e0a491c1SGayatri Kammela 			     "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
375e0a491c1SGayatri Kammela 			     "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
376e0a491c1SGayatri Kammela 			     "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
377e0a491c1SGayatri Kammela 			     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
378e0a491c1SGayatri Kammela 			     "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
379e0a491c1SGayatri Kammela 			     "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
380e0a491c1SGayatri Kammela 			     "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
381e0a491c1SGayatri Kammela 			     "vmovdqa64 %0,%%zmm5\n\t"
382e0a491c1SGayatri Kammela 			     "vmovdqa64 %1,%%zmm7\n\t"
383e0a491c1SGayatri Kammela 			     "vmovdqa64 %2,%%zmm13\n\t"
384e0a491c1SGayatri Kammela 			     "vmovdqa64 %3,%%zmm15\n\t"
385e0a491c1SGayatri Kammela 			     "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
386e0a491c1SGayatri Kammela 			     "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
387e0a491c1SGayatri Kammela 			     "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
388e0a491c1SGayatri Kammela 			     "vpxorq %%zmm15,%%zmm11,%%zmm11\n"
389e0a491c1SGayatri Kammela 			     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
390e0a491c1SGayatri Kammela 			     "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
391e0a491c1SGayatri Kammela 			     "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
392e0a491c1SGayatri Kammela 			     "vpxorq %%zmm15,%%zmm14,%%zmm14"
393e0a491c1SGayatri Kammela 			     :
394e0a491c1SGayatri Kammela 			     : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
395e0a491c1SGayatri Kammela 			       "m" (dptr[z][d+128]), "m" (dptr[z][d+192]));
396e0a491c1SGayatri Kammela 		}
397e0a491c1SGayatri Kammela 		asm volatile("vmovntdq %%zmm2,%0\n\t"
398e0a491c1SGayatri Kammela 			     "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
399e0a491c1SGayatri Kammela 			     "vmovntdq %%zmm3,%1\n\t"
400e0a491c1SGayatri Kammela 			     "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"
401e0a491c1SGayatri Kammela 			     "vmovntdq %%zmm10,%2\n\t"
402e0a491c1SGayatri Kammela 			     "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"
403e0a491c1SGayatri Kammela 			     "vmovntdq %%zmm11,%3\n\t"
404e0a491c1SGayatri Kammela 			     "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"
405e0a491c1SGayatri Kammela 			     "vmovntdq %%zmm4,%4\n\t"
406e0a491c1SGayatri Kammela 			     "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"
407e0a491c1SGayatri Kammela 			     "vmovntdq %%zmm6,%5\n\t"
408e0a491c1SGayatri Kammela 			     "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"
409e0a491c1SGayatri Kammela 			     "vmovntdq %%zmm12,%6\n\t"
410e0a491c1SGayatri Kammela 			     "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"
411e0a491c1SGayatri Kammela 			     "vmovntdq %%zmm14,%7\n\t"
412e0a491c1SGayatri Kammela 			     "vpxorq %%zmm14,%%zmm14,%%zmm14"
413e0a491c1SGayatri Kammela 			     :
414e0a491c1SGayatri Kammela 			     : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
415e0a491c1SGayatri Kammela 			       "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
416e0a491c1SGayatri Kammela 			       "m" (q[d+128]), "m" (q[d+192]));
417e0a491c1SGayatri Kammela 	}
418e0a491c1SGayatri Kammela 
419e0a491c1SGayatri Kammela 	asm volatile("sfence" : : : "memory");
420e0a491c1SGayatri Kammela 	kernel_fpu_end();
421e0a491c1SGayatri Kammela }
422e0a491c1SGayatri Kammela 
raid6_avx5124_xor_syndrome(int disks,int start,int stop,size_t bytes,void ** ptrs)423694dda62SGayatri Kammela static void raid6_avx5124_xor_syndrome(int disks, int start, int stop,
424694dda62SGayatri Kammela 				       size_t bytes, void **ptrs)
425694dda62SGayatri Kammela {
426694dda62SGayatri Kammela 	u8 **dptr = (u8 **)ptrs;
427694dda62SGayatri Kammela 	u8 *p, *q;
428694dda62SGayatri Kammela 	int d, z, z0;
429694dda62SGayatri Kammela 
430694dda62SGayatri Kammela 	z0 = stop;		/* P/Q right side optimization */
431694dda62SGayatri Kammela 	p = dptr[disks-2];	/* XOR parity */
432694dda62SGayatri Kammela 	q = dptr[disks-1];	/* RS syndrome */
433694dda62SGayatri Kammela 
434694dda62SGayatri Kammela 	kernel_fpu_begin();
435694dda62SGayatri Kammela 
436694dda62SGayatri Kammela 	asm volatile("vmovdqa64 %0,%%zmm0"
437694dda62SGayatri Kammela 		     :: "m" (raid6_avx512_constants.x1d[0]));
438694dda62SGayatri Kammela 
439694dda62SGayatri Kammela 	for (d = 0 ; d < bytes ; d += 256) {
440694dda62SGayatri Kammela 		asm volatile("vmovdqa64 %0,%%zmm4\n\t"
441694dda62SGayatri Kammela 			     "vmovdqa64 %1,%%zmm6\n\t"
442694dda62SGayatri Kammela 			     "vmovdqa64 %2,%%zmm12\n\t"
443694dda62SGayatri Kammela 			     "vmovdqa64 %3,%%zmm14\n\t"
444694dda62SGayatri Kammela 			     "vmovdqa64 %4,%%zmm2\n\t"
445694dda62SGayatri Kammela 			     "vmovdqa64 %5,%%zmm3\n\t"
446694dda62SGayatri Kammela 			     "vmovdqa64 %6,%%zmm10\n\t"
447694dda62SGayatri Kammela 			     "vmovdqa64 %7,%%zmm11\n\t"
448694dda62SGayatri Kammela 			     "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
449694dda62SGayatri Kammela 			     "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t"
450694dda62SGayatri Kammela 			     "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t"
451694dda62SGayatri Kammela 			     "vpxorq %%zmm14,%%zmm11,%%zmm11"
452694dda62SGayatri Kammela 			     :
453694dda62SGayatri Kammela 			     : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
454694dda62SGayatri Kammela 			       "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]),
455694dda62SGayatri Kammela 			       "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
456694dda62SGayatri Kammela 			       "m" (p[d+192]));
457694dda62SGayatri Kammela 		/* P/Q data pages */
458694dda62SGayatri Kammela 		for (z = z0-1 ; z >= start ; z--) {
459694dda62SGayatri Kammela 			asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
460694dda62SGayatri Kammela 				     "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
461694dda62SGayatri Kammela 				     "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
462694dda62SGayatri Kammela 				     "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
463694dda62SGayatri Kammela 				     "prefetchnta %0\n\t"
464694dda62SGayatri Kammela 				     "prefetchnta %2\n\t"
465694dda62SGayatri Kammela 				     "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
466694dda62SGayatri Kammela 				     "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
467694dda62SGayatri Kammela 				     "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
468694dda62SGayatri Kammela 				     "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
469694dda62SGayatri Kammela 				     "vpmovm2b %%k1,%%zmm5\n\t"
470694dda62SGayatri Kammela 				     "vpmovm2b %%k2,%%zmm7\n\t"
471694dda62SGayatri Kammela 				     "vpmovm2b %%k3,%%zmm13\n\t"
472694dda62SGayatri Kammela 				     "vpmovm2b %%k4,%%zmm15\n\t"
473694dda62SGayatri Kammela 				     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
474694dda62SGayatri Kammela 				     "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
475694dda62SGayatri Kammela 				     "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
476694dda62SGayatri Kammela 				     "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t"
477694dda62SGayatri Kammela 				     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
478694dda62SGayatri Kammela 				     "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
479694dda62SGayatri Kammela 				     "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
480694dda62SGayatri Kammela 				     "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
481694dda62SGayatri Kammela 				     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
482694dda62SGayatri Kammela 				     "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
483694dda62SGayatri Kammela 				     "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
484694dda62SGayatri Kammela 				     "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
485694dda62SGayatri Kammela 				     "vmovdqa64 %0,%%zmm5\n\t"
486694dda62SGayatri Kammela 				     "vmovdqa64 %1,%%zmm7\n\t"
487694dda62SGayatri Kammela 				     "vmovdqa64 %2,%%zmm13\n\t"
488694dda62SGayatri Kammela 				     "vmovdqa64 %3,%%zmm15\n\t"
489694dda62SGayatri Kammela 				     "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
490694dda62SGayatri Kammela 				     "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
491694dda62SGayatri Kammela 				     "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
492694dda62SGayatri Kammela 				     "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t"
493694dda62SGayatri Kammela 				     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
494694dda62SGayatri Kammela 				     "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
495694dda62SGayatri Kammela 				     "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
496694dda62SGayatri Kammela 				     "vpxorq %%zmm15,%%zmm14,%%zmm14"
497694dda62SGayatri Kammela 				     :
498694dda62SGayatri Kammela 				     : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
499694dda62SGayatri Kammela 				       "m" (dptr[z][d+128]),
500694dda62SGayatri Kammela 				       "m" (dptr[z][d+192]));
501694dda62SGayatri Kammela 		}
502694dda62SGayatri Kammela 		asm volatile("prefetchnta %0\n\t"
503694dda62SGayatri Kammela 			     "prefetchnta %1\n\t"
504694dda62SGayatri Kammela 			     :
505694dda62SGayatri Kammela 			     : "m" (q[d]), "m" (q[d+128]));
506694dda62SGayatri Kammela 		/* P/Q left side optimization */
507694dda62SGayatri Kammela 		for (z = start-1 ; z >= 0 ; z--) {
508694dda62SGayatri Kammela 			asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
509694dda62SGayatri Kammela 				     "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
510694dda62SGayatri Kammela 				     "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
511694dda62SGayatri Kammela 				     "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
512694dda62SGayatri Kammela 				     "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
513694dda62SGayatri Kammela 				     "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
514694dda62SGayatri Kammela 				     "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
515694dda62SGayatri Kammela 				     "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
516694dda62SGayatri Kammela 				     "vpmovm2b %%k1,%%zmm5\n\t"
517694dda62SGayatri Kammela 				     "vpmovm2b %%k2,%%zmm7\n\t"
518694dda62SGayatri Kammela 				     "vpmovm2b %%k3,%%zmm13\n\t"
519694dda62SGayatri Kammela 				     "vpmovm2b %%k4,%%zmm15\n\t"
520694dda62SGayatri Kammela 				     "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
521694dda62SGayatri Kammela 				     "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
522694dda62SGayatri Kammela 				     "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
523694dda62SGayatri Kammela 				     "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
524694dda62SGayatri Kammela 				     "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
525694dda62SGayatri Kammela 				     "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
526694dda62SGayatri Kammela 				     "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
527694dda62SGayatri Kammela 				     "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
528694dda62SGayatri Kammela 				     "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
529694dda62SGayatri Kammela 				     "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
530694dda62SGayatri Kammela 				     "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
531694dda62SGayatri Kammela 				     "vpxorq %%zmm15,%%zmm14,%%zmm14"
532694dda62SGayatri Kammela 				     :
533694dda62SGayatri Kammela 				     : );
534694dda62SGayatri Kammela 		}
535694dda62SGayatri Kammela 		asm volatile("vmovntdq %%zmm2,%0\n\t"
536694dda62SGayatri Kammela 			     "vmovntdq %%zmm3,%1\n\t"
537694dda62SGayatri Kammela 			     "vmovntdq %%zmm10,%2\n\t"
538694dda62SGayatri Kammela 			     "vmovntdq %%zmm11,%3\n\t"
539694dda62SGayatri Kammela 			     "vpxorq %4,%%zmm4,%%zmm4\n\t"
540694dda62SGayatri Kammela 			     "vpxorq %5,%%zmm6,%%zmm6\n\t"
541694dda62SGayatri Kammela 			     "vpxorq %6,%%zmm12,%%zmm12\n\t"
542694dda62SGayatri Kammela 			     "vpxorq %7,%%zmm14,%%zmm14\n\t"
543694dda62SGayatri Kammela 			     "vmovntdq %%zmm4,%4\n\t"
544694dda62SGayatri Kammela 			     "vmovntdq %%zmm6,%5\n\t"
545694dda62SGayatri Kammela 			     "vmovntdq %%zmm12,%6\n\t"
546694dda62SGayatri Kammela 			     "vmovntdq %%zmm14,%7"
547694dda62SGayatri Kammela 			     :
548694dda62SGayatri Kammela 			     : "m" (p[d]),  "m" (p[d+64]), "m" (p[d+128]),
549694dda62SGayatri Kammela 			       "m" (p[d+192]), "m" (q[d]),  "m" (q[d+64]),
550694dda62SGayatri Kammela 			       "m" (q[d+128]), "m" (q[d+192]));
551694dda62SGayatri Kammela 	}
552694dda62SGayatri Kammela 	asm volatile("sfence" : : : "memory");
553694dda62SGayatri Kammela 	kernel_fpu_end();
554694dda62SGayatri Kammela }
555e0a491c1SGayatri Kammela const struct raid6_calls raid6_avx512x4 = {
556e0a491c1SGayatri Kammela 	raid6_avx5124_gen_syndrome,
557694dda62SGayatri Kammela 	raid6_avx5124_xor_syndrome,
558e0a491c1SGayatri Kammela 	raid6_have_avx512,
559e0a491c1SGayatri Kammela 	"avx512x4",
560*36dacddbSDirk Müller 	.priority = 2		/* Prefer AVX512 over priority 1 (SSE2 and others) */
561e0a491c1SGayatri Kammela };
562e0a491c1SGayatri Kammela #endif
563e0a491c1SGayatri Kammela 
564e0a491c1SGayatri Kammela #endif /* CONFIG_AS_AVX512 */
565