1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2016 Intel Corporation 4 * 5 * Author: Gayatri Kammela <gayatri.kammela@intel.com> 6 * Author: Megha Dey <megha.dey@linux.intel.com> 7 */ 8 9 #ifdef CONFIG_AS_AVX512 10 11 #include <linux/raid/pq.h> 12 #include "x86.h" 13 14 static int raid6_has_avx512(void) 15 { 16 return boot_cpu_has(X86_FEATURE_AVX2) && 17 boot_cpu_has(X86_FEATURE_AVX) && 18 boot_cpu_has(X86_FEATURE_AVX512F) && 19 boot_cpu_has(X86_FEATURE_AVX512BW) && 20 boot_cpu_has(X86_FEATURE_AVX512VL) && 21 boot_cpu_has(X86_FEATURE_AVX512DQ); 22 } 23 24 static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila, 25 int failb, void **ptrs) 26 { 27 u8 *p, *q, *dp, *dq; 28 const u8 *pbmul; /* P multiplier table for B data */ 29 const u8 *qmul; /* Q multiplier table (for both) */ 30 const u8 x0f = 0x0f; 31 32 p = (u8 *)ptrs[disks-2]; 33 q = (u8 *)ptrs[disks-1]; 34 35 /* 36 * Compute syndrome with zero for the missing data pages 37 * Use the dead data pages as temporary storage for 38 * delta p and delta q 39 */ 40 41 dp = (u8 *)ptrs[faila]; 42 ptrs[faila] = (void *)raid6_empty_zero_page; 43 ptrs[disks-2] = dp; 44 dq = (u8 *)ptrs[failb]; 45 ptrs[failb] = (void *)raid6_empty_zero_page; 46 ptrs[disks-1] = dq; 47 48 raid6_call.gen_syndrome(disks, bytes, ptrs); 49 50 /* Restore pointer table */ 51 ptrs[faila] = dp; 52 ptrs[failb] = dq; 53 ptrs[disks-2] = p; 54 ptrs[disks-1] = q; 55 56 /* Now, pick the proper data tables */ 57 pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; 58 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ 59 raid6_gfexp[failb]]]; 60 61 kernel_fpu_begin(); 62 63 /* zmm0 = x0f[16] */ 64 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); 65 66 while (bytes) { 67 #ifdef CONFIG_X86_64 68 asm volatile("vmovdqa64 %0, %%zmm1\n\t" 69 "vmovdqa64 %1, %%zmm9\n\t" 70 "vmovdqa64 %2, %%zmm0\n\t" 71 "vmovdqa64 %3, %%zmm8\n\t" 72 "vpxorq %4, %%zmm1, %%zmm1\n\t" 73 "vpxorq %5, %%zmm9, %%zmm9\n\t" 74 "vpxorq %6, %%zmm0, %%zmm0\n\t" 75 "vpxorq %7, %%zmm8, %%zmm8" 76 : 77 : "m" (q[0]), "m" (q[64]), "m" (p[0]), 78 "m" (p[64]), "m" (dq[0]), "m" (dq[64]), 79 "m" (dp[0]), "m" (dp[64])); 80 81 /* 82 * 1 = dq[0] ^ q[0] 83 * 9 = dq[64] ^ q[64] 84 * 0 = dp[0] ^ p[0] 85 * 8 = dp[64] ^ p[64] 86 */ 87 88 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 89 "vbroadcasti64x2 %1, %%zmm5" 90 : 91 : "m" (qmul[0]), "m" (qmul[16])); 92 93 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" 94 "vpsraw $4, %%zmm9, %%zmm12\n\t" 95 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" 96 "vpandq %%zmm7, %%zmm9, %%zmm9\n\t" 97 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 98 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" 99 "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t" 100 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" 101 "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t" 102 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" 103 "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t" 104 "vpxorq %%zmm4, %%zmm5, %%zmm5" 105 : 106 : ); 107 108 /* 109 * 5 = qx[0] 110 * 15 = qx[64] 111 */ 112 113 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 114 "vbroadcasti64x2 %1, %%zmm1\n\t" 115 "vpsraw $4, %%zmm0, %%zmm2\n\t" 116 "vpsraw $4, %%zmm8, %%zmm6\n\t" 117 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" 118 "vpandq %%zmm7, %%zmm8, %%zmm14\n\t" 119 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" 120 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 121 "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t" 122 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" 123 "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t" 124 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" 125 "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t" 126 "vpxorq %%zmm12, %%zmm13, %%zmm13" 127 : 128 : "m" (pbmul[0]), "m" (pbmul[16])); 129 130 /* 131 * 1 = pbmul[px[0]] 132 * 13 = pbmul[px[64]] 133 */ 134 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" 135 "vpxorq %%zmm15, %%zmm13, %%zmm13" 136 : 137 : ); 138 139 /* 140 * 1 = db = DQ 141 * 13 = db[64] = DQ[64] 142 */ 143 asm volatile("vmovdqa64 %%zmm1, %0\n\t" 144 "vmovdqa64 %%zmm13,%1\n\t" 145 "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" 146 "vpxorq %%zmm13, %%zmm8, %%zmm8" 147 : 148 : "m" (dq[0]), "m" (dq[64])); 149 150 asm volatile("vmovdqa64 %%zmm0, %0\n\t" 151 "vmovdqa64 %%zmm8, %1" 152 : 153 : "m" (dp[0]), "m" (dp[64])); 154 155 bytes -= 128; 156 p += 128; 157 q += 128; 158 dp += 128; 159 dq += 128; 160 #else 161 asm volatile("vmovdqa64 %0, %%zmm1\n\t" 162 "vmovdqa64 %1, %%zmm0\n\t" 163 "vpxorq %2, %%zmm1, %%zmm1\n\t" 164 "vpxorq %3, %%zmm0, %%zmm0" 165 : 166 : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp)); 167 168 /* 1 = dq ^ q; 0 = dp ^ p */ 169 170 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 171 "vbroadcasti64x2 %1, %%zmm5" 172 : 173 : "m" (qmul[0]), "m" (qmul[16])); 174 175 /* 176 * 1 = dq ^ q 177 * 3 = dq ^ p >> 4 178 */ 179 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" 180 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" 181 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 182 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" 183 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" 184 "vpxorq %%zmm4, %%zmm5, %%zmm5" 185 : 186 : ); 187 188 /* 5 = qx */ 189 190 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 191 "vbroadcasti64x2 %1, %%zmm1" 192 : 193 : "m" (pbmul[0]), "m" (pbmul[16])); 194 195 asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t" 196 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" 197 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" 198 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" 199 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" 200 "vpxorq %%zmm4, %%zmm1, %%zmm1" 201 : 202 : ); 203 204 /* 1 = pbmul[px] */ 205 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" 206 /* 1 = db = DQ */ 207 "vmovdqa64 %%zmm1, %0\n\t" 208 : 209 : "m" (dq[0])); 210 211 asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" 212 "vmovdqa64 %%zmm0, %0" 213 : 214 : "m" (dp[0])); 215 216 bytes -= 64; 217 p += 64; 218 q += 64; 219 dp += 64; 220 dq += 64; 221 #endif 222 } 223 224 kernel_fpu_end(); 225 } 226 227 static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila, 228 void **ptrs) 229 { 230 u8 *p, *q, *dq; 231 const u8 *qmul; /* Q multiplier table */ 232 const u8 x0f = 0x0f; 233 234 p = (u8 *)ptrs[disks-2]; 235 q = (u8 *)ptrs[disks-1]; 236 237 /* 238 * Compute syndrome with zero for the missing data page 239 * Use the dead data page as temporary storage for delta q 240 */ 241 242 dq = (u8 *)ptrs[faila]; 243 ptrs[faila] = (void *)raid6_empty_zero_page; 244 ptrs[disks-1] = dq; 245 246 raid6_call.gen_syndrome(disks, bytes, ptrs); 247 248 /* Restore pointer table */ 249 ptrs[faila] = dq; 250 ptrs[disks-1] = q; 251 252 /* Now, pick the proper data tables */ 253 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 254 255 kernel_fpu_begin(); 256 257 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); 258 259 while (bytes) { 260 #ifdef CONFIG_X86_64 261 asm volatile("vmovdqa64 %0, %%zmm3\n\t" 262 "vmovdqa64 %1, %%zmm8\n\t" 263 "vpxorq %2, %%zmm3, %%zmm3\n\t" 264 "vpxorq %3, %%zmm8, %%zmm8" 265 : 266 : "m" (dq[0]), "m" (dq[64]), "m" (q[0]), 267 "m" (q[64])); 268 269 /* 270 * 3 = q[0] ^ dq[0] 271 * 8 = q[64] ^ dq[64] 272 */ 273 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" 274 "vmovapd %%zmm0, %%zmm13\n\t" 275 "vbroadcasti64x2 %1, %%zmm1\n\t" 276 "vmovapd %%zmm1, %%zmm14" 277 : 278 : "m" (qmul[0]), "m" (qmul[16])); 279 280 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" 281 "vpsraw $4, %%zmm8, %%zmm12\n\t" 282 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 283 "vpandq %%zmm7, %%zmm8, %%zmm8\n\t" 284 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 285 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" 286 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" 287 "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t" 288 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" 289 "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t" 290 "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t" 291 "vpxorq %%zmm13, %%zmm14, %%zmm14" 292 : 293 : ); 294 295 /* 296 * 1 = qmul[q[0] ^ dq[0]] 297 * 14 = qmul[q[64] ^ dq[64]] 298 */ 299 asm volatile("vmovdqa64 %0, %%zmm2\n\t" 300 "vmovdqa64 %1, %%zmm12\n\t" 301 "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t" 302 "vpxorq %%zmm14, %%zmm12, %%zmm12" 303 : 304 : "m" (p[0]), "m" (p[64])); 305 306 /* 307 * 2 = p[0] ^ qmul[q[0] ^ dq[0]] 308 * 12 = p[64] ^ qmul[q[64] ^ dq[64]] 309 */ 310 311 asm volatile("vmovdqa64 %%zmm1, %0\n\t" 312 "vmovdqa64 %%zmm14, %1\n\t" 313 "vmovdqa64 %%zmm2, %2\n\t" 314 "vmovdqa64 %%zmm12,%3" 315 : 316 : "m" (dq[0]), "m" (dq[64]), "m" (p[0]), 317 "m" (p[64])); 318 319 bytes -= 128; 320 p += 128; 321 q += 128; 322 dq += 128; 323 #else 324 asm volatile("vmovdqa64 %0, %%zmm3\n\t" 325 "vpxorq %1, %%zmm3, %%zmm3" 326 : 327 : "m" (dq[0]), "m" (q[0])); 328 329 /* 3 = q ^ dq */ 330 331 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" 332 "vbroadcasti64x2 %1, %%zmm1" 333 : 334 : "m" (qmul[0]), "m" (qmul[16])); 335 336 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" 337 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 338 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 339 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" 340 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" 341 "vpxorq %%zmm0, %%zmm1, %%zmm1" 342 : 343 : ); 344 345 /* 1 = qmul[q ^ dq] */ 346 347 asm volatile("vmovdqa64 %0, %%zmm2\n\t" 348 "vpxorq %%zmm1, %%zmm2, %%zmm2" 349 : 350 : "m" (p[0])); 351 352 /* 2 = p ^ qmul[q ^ dq] */ 353 354 asm volatile("vmovdqa64 %%zmm1, %0\n\t" 355 "vmovdqa64 %%zmm2, %1" 356 : 357 : "m" (dq[0]), "m" (p[0])); 358 359 bytes -= 64; 360 p += 64; 361 q += 64; 362 dq += 64; 363 #endif 364 } 365 366 kernel_fpu_end(); 367 } 368 369 const struct raid6_recov_calls raid6_recov_avx512 = { 370 .data2 = raid6_2data_recov_avx512, 371 .datap = raid6_datap_recov_avx512, 372 .valid = raid6_has_avx512, 373 #ifdef CONFIG_X86_64 374 .name = "avx512x2", 375 #else 376 .name = "avx512x1", 377 #endif 378 .priority = 3, 379 }; 380 381 #else 382 #warning "your version of binutils lacks AVX512 support" 383 #endif 384