1 /* -*- linux-c -*- ------------------------------------------------------- * 2 * 3 * Copyright (C) 2012 Intel Corporation 4 * Author: Yuanhan Liu <yuanhan.liu@linux.intel.com> 5 * 6 * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved 7 * 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License as published by 11 * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 12 * Boston MA 02111-1307, USA; either version 2 of the License, or 13 * (at your option) any later version; incorporated herein by reference. 14 * 15 * ----------------------------------------------------------------------- */ 16 17 /* 18 * AVX2 implementation of RAID-6 syndrome functions 19 * 20 */ 21 22 #ifdef CONFIG_AS_AVX2 23 24 #include <linux/raid/pq.h> 25 #include "x86.h" 26 27 static const struct raid6_avx2_constants { 28 u64 x1d[4]; 29 } raid6_avx2_constants __aligned(32) = { 30 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, 31 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,}, 32 }; 33 34 static int raid6_have_avx2(void) 35 { 36 return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX); 37 } 38 39 /* 40 * Plain AVX2 implementation 41 */ 42 static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs) 43 { 44 u8 **dptr = (u8 **)ptrs; 45 u8 *p, *q; 46 int d, z, z0; 47 48 z0 = disks - 3; /* Highest data disk */ 49 p = dptr[z0+1]; /* XOR parity */ 50 q = dptr[z0+2]; /* RS syndrome */ 51 52 kernel_fpu_begin(); 53 54 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); 55 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* Zero temp */ 56 57 for (d = 0; d < bytes; d += 32) { 58 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); 59 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */ 60 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); 61 asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */ 62 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d])); 63 for (z = z0-2; z >= 0; z--) { 64 asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 65 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5"); 66 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 67 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 68 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 69 asm volatile("vpxor %ymm6,%ymm2,%ymm2"); 70 asm volatile("vpxor %ymm6,%ymm4,%ymm4"); 71 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d])); 72 } 73 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5"); 74 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 75 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 76 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 77 asm volatile("vpxor %ymm6,%ymm2,%ymm2"); 78 asm volatile("vpxor %ymm6,%ymm4,%ymm4"); 79 80 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); 81 asm volatile("vpxor %ymm2,%ymm2,%ymm2"); 82 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); 83 asm volatile("vpxor %ymm4,%ymm4,%ymm4"); 84 } 85 86 asm volatile("sfence" : : : "memory"); 87 kernel_fpu_end(); 88 } 89 90 const struct raid6_calls raid6_avx2x1 = { 91 raid6_avx21_gen_syndrome, 92 raid6_have_avx2, 93 "avx2x1", 94 1 /* Has cache hints */ 95 }; 96 97 /* 98 * Unrolled-by-2 AVX2 implementation 99 */ 100 static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs) 101 { 102 u8 **dptr = (u8 **)ptrs; 103 u8 *p, *q; 104 int d, z, z0; 105 106 z0 = disks - 3; /* Highest data disk */ 107 p = dptr[z0+1]; /* XOR parity */ 108 q = dptr[z0+2]; /* RS syndrome */ 109 110 kernel_fpu_begin(); 111 112 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); 113 asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */ 114 115 /* We uniformly assume a single prefetch covers at least 32 bytes */ 116 for (d = 0; d < bytes; d += 64) { 117 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); 118 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32])); 119 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */ 120 asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */ 121 asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */ 122 asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */ 123 for (z = z0-1; z >= 0; z--) { 124 asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 125 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32])); 126 asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5"); 127 asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7"); 128 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 129 asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); 130 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 131 asm volatile("vpand %ymm0,%ymm7,%ymm7"); 132 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 133 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 134 asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d])); 135 asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32])); 136 asm volatile("vpxor %ymm5,%ymm2,%ymm2"); 137 asm volatile("vpxor %ymm7,%ymm3,%ymm3"); 138 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 139 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 140 } 141 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); 142 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); 143 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); 144 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); 145 } 146 147 asm volatile("sfence" : : : "memory"); 148 kernel_fpu_end(); 149 } 150 151 const struct raid6_calls raid6_avx2x2 = { 152 raid6_avx22_gen_syndrome, 153 raid6_have_avx2, 154 "avx2x2", 155 1 /* Has cache hints */ 156 }; 157 158 #ifdef CONFIG_X86_64 159 160 /* 161 * Unrolled-by-4 AVX2 implementation 162 */ 163 static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs) 164 { 165 u8 **dptr = (u8 **)ptrs; 166 u8 *p, *q; 167 int d, z, z0; 168 169 z0 = disks - 3; /* Highest data disk */ 170 p = dptr[z0+1]; /* XOR parity */ 171 q = dptr[z0+2]; /* RS syndrome */ 172 173 kernel_fpu_begin(); 174 175 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); 176 asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */ 177 asm volatile("vpxor %ymm2,%ymm2,%ymm2"); /* P[0] */ 178 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* P[1] */ 179 asm volatile("vpxor %ymm4,%ymm4,%ymm4"); /* Q[0] */ 180 asm volatile("vpxor %ymm6,%ymm6,%ymm6"); /* Q[1] */ 181 asm volatile("vpxor %ymm10,%ymm10,%ymm10"); /* P[2] */ 182 asm volatile("vpxor %ymm11,%ymm11,%ymm11"); /* P[3] */ 183 asm volatile("vpxor %ymm12,%ymm12,%ymm12"); /* Q[2] */ 184 asm volatile("vpxor %ymm14,%ymm14,%ymm14"); /* Q[3] */ 185 186 for (d = 0; d < bytes; d += 128) { 187 for (z = z0; z >= 0; z--) { 188 asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 189 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32])); 190 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64])); 191 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96])); 192 asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5"); 193 asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7"); 194 asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13"); 195 asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15"); 196 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 197 asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); 198 asm volatile("vpaddb %ymm12,%ymm12,%ymm12"); 199 asm volatile("vpaddb %ymm14,%ymm14,%ymm14"); 200 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 201 asm volatile("vpand %ymm0,%ymm7,%ymm7"); 202 asm volatile("vpand %ymm0,%ymm13,%ymm13"); 203 asm volatile("vpand %ymm0,%ymm15,%ymm15"); 204 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 205 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 206 asm volatile("vpxor %ymm13,%ymm12,%ymm12"); 207 asm volatile("vpxor %ymm15,%ymm14,%ymm14"); 208 asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d])); 209 asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32])); 210 asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64])); 211 asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96])); 212 asm volatile("vpxor %ymm5,%ymm2,%ymm2"); 213 asm volatile("vpxor %ymm7,%ymm3,%ymm3"); 214 asm volatile("vpxor %ymm13,%ymm10,%ymm10"); 215 asm volatile("vpxor %ymm15,%ymm11,%ymm11"); 216 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 217 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 218 asm volatile("vpxor %ymm13,%ymm12,%ymm12"); 219 asm volatile("vpxor %ymm15,%ymm14,%ymm14"); 220 } 221 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); 222 asm volatile("vpxor %ymm2,%ymm2,%ymm2"); 223 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); 224 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); 225 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64])); 226 asm volatile("vpxor %ymm10,%ymm10,%ymm10"); 227 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96])); 228 asm volatile("vpxor %ymm11,%ymm11,%ymm11"); 229 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); 230 asm volatile("vpxor %ymm4,%ymm4,%ymm4"); 231 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); 232 asm volatile("vpxor %ymm6,%ymm6,%ymm6"); 233 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64])); 234 asm volatile("vpxor %ymm12,%ymm12,%ymm12"); 235 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96])); 236 asm volatile("vpxor %ymm14,%ymm14,%ymm14"); 237 } 238 239 asm volatile("sfence" : : : "memory"); 240 kernel_fpu_end(); 241 } 242 243 const struct raid6_calls raid6_avx2x4 = { 244 raid6_avx24_gen_syndrome, 245 raid6_have_avx2, 246 "avx2x4", 247 1 /* Has cache hints */ 248 }; 249 #endif 250 251 #endif /* CONFIG_AS_AVX2 */ 252