1 /* -*- linux-c -*- ------------------------------------------------------- * 2 * 3 * Copyright (C) 2012 Intel Corporation 4 * Author: Yuanhan Liu <yuanhan.liu@linux.intel.com> 5 * 6 * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved 7 * 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License as published by 11 * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 12 * Boston MA 02111-1307, USA; either version 2 of the License, or 13 * (at your option) any later version; incorporated herein by reference. 14 * 15 * ----------------------------------------------------------------------- */ 16 17 /* 18 * AVX2 implementation of RAID-6 syndrome functions 19 * 20 */ 21 22 #ifdef CONFIG_AS_AVX2 23 24 #include <linux/raid/pq.h> 25 #include "x86.h" 26 27 static const struct raid6_avx2_constants { 28 u64 x1d[4]; 29 } raid6_avx2_constants __aligned(32) = { 30 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, 31 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,}, 32 }; 33 34 static int raid6_have_avx2(void) 35 { 36 return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX); 37 } 38 39 /* 40 * Plain AVX2 implementation 41 */ 42 static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs) 43 { 44 u8 **dptr = (u8 **)ptrs; 45 u8 *p, *q; 46 int d, z, z0; 47 48 z0 = disks - 3; /* Highest data disk */ 49 p = dptr[z0+1]; /* XOR parity */ 50 q = dptr[z0+2]; /* RS syndrome */ 51 52 kernel_fpu_begin(); 53 54 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); 55 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* Zero temp */ 56 57 for (d = 0; d < bytes; d += 32) { 58 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); 59 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */ 60 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); 61 asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */ 62 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d])); 63 for (z = z0-2; z >= 0; z--) { 64 asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 65 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5"); 66 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 67 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 68 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 69 asm volatile("vpxor %ymm6,%ymm2,%ymm2"); 70 asm volatile("vpxor %ymm6,%ymm4,%ymm4"); 71 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d])); 72 } 73 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5"); 74 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 75 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 76 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 77 asm volatile("vpxor %ymm6,%ymm2,%ymm2"); 78 asm volatile("vpxor %ymm6,%ymm4,%ymm4"); 79 80 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); 81 asm volatile("vpxor %ymm2,%ymm2,%ymm2"); 82 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); 83 asm volatile("vpxor %ymm4,%ymm4,%ymm4"); 84 } 85 86 asm volatile("sfence" : : : "memory"); 87 kernel_fpu_end(); 88 } 89 90 const struct raid6_calls raid6_avx2x1 = { 91 raid6_avx21_gen_syndrome, 92 NULL, /* XOR not yet implemented */ 93 raid6_have_avx2, 94 "avx2x1", 95 1 /* Has cache hints */ 96 }; 97 98 /* 99 * Unrolled-by-2 AVX2 implementation 100 */ 101 static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs) 102 { 103 u8 **dptr = (u8 **)ptrs; 104 u8 *p, *q; 105 int d, z, z0; 106 107 z0 = disks - 3; /* Highest data disk */ 108 p = dptr[z0+1]; /* XOR parity */ 109 q = dptr[z0+2]; /* RS syndrome */ 110 111 kernel_fpu_begin(); 112 113 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); 114 asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */ 115 116 /* We uniformly assume a single prefetch covers at least 32 bytes */ 117 for (d = 0; d < bytes; d += 64) { 118 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); 119 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32])); 120 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */ 121 asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */ 122 asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */ 123 asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */ 124 for (z = z0-1; z >= 0; z--) { 125 asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 126 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32])); 127 asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5"); 128 asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7"); 129 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 130 asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); 131 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 132 asm volatile("vpand %ymm0,%ymm7,%ymm7"); 133 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 134 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 135 asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d])); 136 asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32])); 137 asm volatile("vpxor %ymm5,%ymm2,%ymm2"); 138 asm volatile("vpxor %ymm7,%ymm3,%ymm3"); 139 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 140 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 141 } 142 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); 143 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); 144 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); 145 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); 146 } 147 148 asm volatile("sfence" : : : "memory"); 149 kernel_fpu_end(); 150 } 151 152 const struct raid6_calls raid6_avx2x2 = { 153 raid6_avx22_gen_syndrome, 154 NULL, /* XOR not yet implemented */ 155 raid6_have_avx2, 156 "avx2x2", 157 1 /* Has cache hints */ 158 }; 159 160 #ifdef CONFIG_X86_64 161 162 /* 163 * Unrolled-by-4 AVX2 implementation 164 */ 165 static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs) 166 { 167 u8 **dptr = (u8 **)ptrs; 168 u8 *p, *q; 169 int d, z, z0; 170 171 z0 = disks - 3; /* Highest data disk */ 172 p = dptr[z0+1]; /* XOR parity */ 173 q = dptr[z0+2]; /* RS syndrome */ 174 175 kernel_fpu_begin(); 176 177 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); 178 asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */ 179 asm volatile("vpxor %ymm2,%ymm2,%ymm2"); /* P[0] */ 180 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* P[1] */ 181 asm volatile("vpxor %ymm4,%ymm4,%ymm4"); /* Q[0] */ 182 asm volatile("vpxor %ymm6,%ymm6,%ymm6"); /* Q[1] */ 183 asm volatile("vpxor %ymm10,%ymm10,%ymm10"); /* P[2] */ 184 asm volatile("vpxor %ymm11,%ymm11,%ymm11"); /* P[3] */ 185 asm volatile("vpxor %ymm12,%ymm12,%ymm12"); /* Q[2] */ 186 asm volatile("vpxor %ymm14,%ymm14,%ymm14"); /* Q[3] */ 187 188 for (d = 0; d < bytes; d += 128) { 189 for (z = z0; z >= 0; z--) { 190 asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 191 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32])); 192 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64])); 193 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96])); 194 asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5"); 195 asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7"); 196 asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13"); 197 asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15"); 198 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 199 asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); 200 asm volatile("vpaddb %ymm12,%ymm12,%ymm12"); 201 asm volatile("vpaddb %ymm14,%ymm14,%ymm14"); 202 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 203 asm volatile("vpand %ymm0,%ymm7,%ymm7"); 204 asm volatile("vpand %ymm0,%ymm13,%ymm13"); 205 asm volatile("vpand %ymm0,%ymm15,%ymm15"); 206 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 207 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 208 asm volatile("vpxor %ymm13,%ymm12,%ymm12"); 209 asm volatile("vpxor %ymm15,%ymm14,%ymm14"); 210 asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d])); 211 asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32])); 212 asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64])); 213 asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96])); 214 asm volatile("vpxor %ymm5,%ymm2,%ymm2"); 215 asm volatile("vpxor %ymm7,%ymm3,%ymm3"); 216 asm volatile("vpxor %ymm13,%ymm10,%ymm10"); 217 asm volatile("vpxor %ymm15,%ymm11,%ymm11"); 218 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 219 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 220 asm volatile("vpxor %ymm13,%ymm12,%ymm12"); 221 asm volatile("vpxor %ymm15,%ymm14,%ymm14"); 222 } 223 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); 224 asm volatile("vpxor %ymm2,%ymm2,%ymm2"); 225 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); 226 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); 227 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64])); 228 asm volatile("vpxor %ymm10,%ymm10,%ymm10"); 229 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96])); 230 asm volatile("vpxor %ymm11,%ymm11,%ymm11"); 231 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); 232 asm volatile("vpxor %ymm4,%ymm4,%ymm4"); 233 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); 234 asm volatile("vpxor %ymm6,%ymm6,%ymm6"); 235 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64])); 236 asm volatile("vpxor %ymm12,%ymm12,%ymm12"); 237 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96])); 238 asm volatile("vpxor %ymm14,%ymm14,%ymm14"); 239 } 240 241 asm volatile("sfence" : : : "memory"); 242 kernel_fpu_end(); 243 } 244 245 const struct raid6_calls raid6_avx2x4 = { 246 raid6_avx24_gen_syndrome, 247 NULL, /* XOR not yet implemented */ 248 raid6_have_avx2, 249 "avx2x4", 250 1 /* Has cache hints */ 251 }; 252 #endif 253 254 #endif /* CONFIG_AS_AVX2 */ 255