1*cc4589ebSDavid Woodhouse /* -*- linux-c -*- ------------------------------------------------------- * 2*cc4589ebSDavid Woodhouse * 3*cc4589ebSDavid Woodhouse * Copyright 2002 H. Peter Anvin - All Rights Reserved 4*cc4589ebSDavid Woodhouse * 5*cc4589ebSDavid Woodhouse * This program is free software; you can redistribute it and/or modify 6*cc4589ebSDavid Woodhouse * it under the terms of the GNU General Public License as published by 7*cc4589ebSDavid Woodhouse * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 8*cc4589ebSDavid Woodhouse * Boston MA 02111-1307, USA; either version 2 of the License, or 9*cc4589ebSDavid Woodhouse * (at your option) any later version; incorporated herein by reference. 10*cc4589ebSDavid Woodhouse * 11*cc4589ebSDavid Woodhouse * ----------------------------------------------------------------------- */ 12*cc4589ebSDavid Woodhouse 13*cc4589ebSDavid Woodhouse /* 14*cc4589ebSDavid Woodhouse * raid6sse1.c 15*cc4589ebSDavid Woodhouse * 16*cc4589ebSDavid Woodhouse * SSE-1/MMXEXT implementation of RAID-6 syndrome functions 17*cc4589ebSDavid Woodhouse * 18*cc4589ebSDavid Woodhouse * This is really an MMX implementation, but it requires SSE-1 or 19*cc4589ebSDavid Woodhouse * AMD MMXEXT for prefetch support and a few other features. The 20*cc4589ebSDavid Woodhouse * support for nontemporal memory accesses is enough to make this 21*cc4589ebSDavid Woodhouse * worthwhile as a separate implementation. 22*cc4589ebSDavid Woodhouse */ 23*cc4589ebSDavid Woodhouse 24*cc4589ebSDavid Woodhouse #if defined(__i386__) && !defined(__arch_um__) 25*cc4589ebSDavid Woodhouse 26*cc4589ebSDavid Woodhouse #include <linux/raid/pq.h> 27*cc4589ebSDavid Woodhouse #include "raid6x86.h" 28*cc4589ebSDavid Woodhouse 29*cc4589ebSDavid Woodhouse /* Defined in raid6mmx.c */ 30*cc4589ebSDavid Woodhouse extern const struct raid6_mmx_constants { 31*cc4589ebSDavid Woodhouse u64 x1d; 32*cc4589ebSDavid Woodhouse } raid6_mmx_constants; 33*cc4589ebSDavid Woodhouse 34*cc4589ebSDavid Woodhouse static int raid6_have_sse1_or_mmxext(void) 35*cc4589ebSDavid Woodhouse { 36*cc4589ebSDavid Woodhouse /* Not really boot_cpu but "all_cpus" */ 37*cc4589ebSDavid Woodhouse return boot_cpu_has(X86_FEATURE_MMX) && 38*cc4589ebSDavid Woodhouse (boot_cpu_has(X86_FEATURE_XMM) || 39*cc4589ebSDavid Woodhouse boot_cpu_has(X86_FEATURE_MMXEXT)); 40*cc4589ebSDavid Woodhouse } 41*cc4589ebSDavid Woodhouse 42*cc4589ebSDavid Woodhouse /* 43*cc4589ebSDavid Woodhouse * Plain SSE1 implementation 44*cc4589ebSDavid Woodhouse */ 45*cc4589ebSDavid Woodhouse static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) 46*cc4589ebSDavid Woodhouse { 47*cc4589ebSDavid Woodhouse u8 **dptr = (u8 **)ptrs; 48*cc4589ebSDavid Woodhouse u8 *p, *q; 49*cc4589ebSDavid Woodhouse int d, z, z0; 50*cc4589ebSDavid Woodhouse 51*cc4589ebSDavid Woodhouse z0 = disks - 3; /* Highest data disk */ 52*cc4589ebSDavid Woodhouse p = dptr[z0+1]; /* XOR parity */ 53*cc4589ebSDavid Woodhouse q = dptr[z0+2]; /* RS syndrome */ 54*cc4589ebSDavid Woodhouse 55*cc4589ebSDavid Woodhouse kernel_fpu_begin(); 56*cc4589ebSDavid Woodhouse 57*cc4589ebSDavid Woodhouse asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); 58*cc4589ebSDavid Woodhouse asm volatile("pxor %mm5,%mm5"); /* Zero temp */ 59*cc4589ebSDavid Woodhouse 60*cc4589ebSDavid Woodhouse for ( d = 0 ; d < bytes ; d += 8 ) { 61*cc4589ebSDavid Woodhouse asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); 62*cc4589ebSDavid Woodhouse asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ 63*cc4589ebSDavid Woodhouse asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); 64*cc4589ebSDavid Woodhouse asm volatile("movq %mm2,%mm4"); /* Q[0] */ 65*cc4589ebSDavid Woodhouse asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d])); 66*cc4589ebSDavid Woodhouse for ( z = z0-2 ; z >= 0 ; z-- ) { 67*cc4589ebSDavid Woodhouse asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 68*cc4589ebSDavid Woodhouse asm volatile("pcmpgtb %mm4,%mm5"); 69*cc4589ebSDavid Woodhouse asm volatile("paddb %mm4,%mm4"); 70*cc4589ebSDavid Woodhouse asm volatile("pand %mm0,%mm5"); 71*cc4589ebSDavid Woodhouse asm volatile("pxor %mm5,%mm4"); 72*cc4589ebSDavid Woodhouse asm volatile("pxor %mm5,%mm5"); 73*cc4589ebSDavid Woodhouse asm volatile("pxor %mm6,%mm2"); 74*cc4589ebSDavid Woodhouse asm volatile("pxor %mm6,%mm4"); 75*cc4589ebSDavid Woodhouse asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); 76*cc4589ebSDavid Woodhouse } 77*cc4589ebSDavid Woodhouse asm volatile("pcmpgtb %mm4,%mm5"); 78*cc4589ebSDavid Woodhouse asm volatile("paddb %mm4,%mm4"); 79*cc4589ebSDavid Woodhouse asm volatile("pand %mm0,%mm5"); 80*cc4589ebSDavid Woodhouse asm volatile("pxor %mm5,%mm4"); 81*cc4589ebSDavid Woodhouse asm volatile("pxor %mm5,%mm5"); 82*cc4589ebSDavid Woodhouse asm volatile("pxor %mm6,%mm2"); 83*cc4589ebSDavid Woodhouse asm volatile("pxor %mm6,%mm4"); 84*cc4589ebSDavid Woodhouse 85*cc4589ebSDavid Woodhouse asm volatile("movntq %%mm2,%0" : "=m" (p[d])); 86*cc4589ebSDavid Woodhouse asm volatile("movntq %%mm4,%0" : "=m" (q[d])); 87*cc4589ebSDavid Woodhouse } 88*cc4589ebSDavid Woodhouse 89*cc4589ebSDavid Woodhouse asm volatile("sfence" : : : "memory"); 90*cc4589ebSDavid Woodhouse kernel_fpu_end(); 91*cc4589ebSDavid Woodhouse } 92*cc4589ebSDavid Woodhouse 93*cc4589ebSDavid Woodhouse const struct raid6_calls raid6_sse1x1 = { 94*cc4589ebSDavid Woodhouse raid6_sse11_gen_syndrome, 95*cc4589ebSDavid Woodhouse raid6_have_sse1_or_mmxext, 96*cc4589ebSDavid Woodhouse "sse1x1", 97*cc4589ebSDavid Woodhouse 1 /* Has cache hints */ 98*cc4589ebSDavid Woodhouse }; 99*cc4589ebSDavid Woodhouse 100*cc4589ebSDavid Woodhouse /* 101*cc4589ebSDavid Woodhouse * Unrolled-by-2 SSE1 implementation 102*cc4589ebSDavid Woodhouse */ 103*cc4589ebSDavid Woodhouse static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) 104*cc4589ebSDavid Woodhouse { 105*cc4589ebSDavid Woodhouse u8 **dptr = (u8 **)ptrs; 106*cc4589ebSDavid Woodhouse u8 *p, *q; 107*cc4589ebSDavid Woodhouse int d, z, z0; 108*cc4589ebSDavid Woodhouse 109*cc4589ebSDavid Woodhouse z0 = disks - 3; /* Highest data disk */ 110*cc4589ebSDavid Woodhouse p = dptr[z0+1]; /* XOR parity */ 111*cc4589ebSDavid Woodhouse q = dptr[z0+2]; /* RS syndrome */ 112*cc4589ebSDavid Woodhouse 113*cc4589ebSDavid Woodhouse kernel_fpu_begin(); 114*cc4589ebSDavid Woodhouse 115*cc4589ebSDavid Woodhouse asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); 116*cc4589ebSDavid Woodhouse asm volatile("pxor %mm5,%mm5"); /* Zero temp */ 117*cc4589ebSDavid Woodhouse asm volatile("pxor %mm7,%mm7"); /* Zero temp */ 118*cc4589ebSDavid Woodhouse 119*cc4589ebSDavid Woodhouse /* We uniformly assume a single prefetch covers at least 16 bytes */ 120*cc4589ebSDavid Woodhouse for ( d = 0 ; d < bytes ; d += 16 ) { 121*cc4589ebSDavid Woodhouse asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); 122*cc4589ebSDavid Woodhouse asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ 123*cc4589ebSDavid Woodhouse asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */ 124*cc4589ebSDavid Woodhouse asm volatile("movq %mm2,%mm4"); /* Q[0] */ 125*cc4589ebSDavid Woodhouse asm volatile("movq %mm3,%mm6"); /* Q[1] */ 126*cc4589ebSDavid Woodhouse for ( z = z0-1 ; z >= 0 ; z-- ) { 127*cc4589ebSDavid Woodhouse asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 128*cc4589ebSDavid Woodhouse asm volatile("pcmpgtb %mm4,%mm5"); 129*cc4589ebSDavid Woodhouse asm volatile("pcmpgtb %mm6,%mm7"); 130*cc4589ebSDavid Woodhouse asm volatile("paddb %mm4,%mm4"); 131*cc4589ebSDavid Woodhouse asm volatile("paddb %mm6,%mm6"); 132*cc4589ebSDavid Woodhouse asm volatile("pand %mm0,%mm5"); 133*cc4589ebSDavid Woodhouse asm volatile("pand %mm0,%mm7"); 134*cc4589ebSDavid Woodhouse asm volatile("pxor %mm5,%mm4"); 135*cc4589ebSDavid Woodhouse asm volatile("pxor %mm7,%mm6"); 136*cc4589ebSDavid Woodhouse asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); 137*cc4589ebSDavid Woodhouse asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); 138*cc4589ebSDavid Woodhouse asm volatile("pxor %mm5,%mm2"); 139*cc4589ebSDavid Woodhouse asm volatile("pxor %mm7,%mm3"); 140*cc4589ebSDavid Woodhouse asm volatile("pxor %mm5,%mm4"); 141*cc4589ebSDavid Woodhouse asm volatile("pxor %mm7,%mm6"); 142*cc4589ebSDavid Woodhouse asm volatile("pxor %mm5,%mm5"); 143*cc4589ebSDavid Woodhouse asm volatile("pxor %mm7,%mm7"); 144*cc4589ebSDavid Woodhouse } 145*cc4589ebSDavid Woodhouse asm volatile("movntq %%mm2,%0" : "=m" (p[d])); 146*cc4589ebSDavid Woodhouse asm volatile("movntq %%mm3,%0" : "=m" (p[d+8])); 147*cc4589ebSDavid Woodhouse asm volatile("movntq %%mm4,%0" : "=m" (q[d])); 148*cc4589ebSDavid Woodhouse asm volatile("movntq %%mm6,%0" : "=m" (q[d+8])); 149*cc4589ebSDavid Woodhouse } 150*cc4589ebSDavid Woodhouse 151*cc4589ebSDavid Woodhouse asm volatile("sfence" : :: "memory"); 152*cc4589ebSDavid Woodhouse kernel_fpu_end(); 153*cc4589ebSDavid Woodhouse } 154*cc4589ebSDavid Woodhouse 155*cc4589ebSDavid Woodhouse const struct raid6_calls raid6_sse1x2 = { 156*cc4589ebSDavid Woodhouse raid6_sse12_gen_syndrome, 157*cc4589ebSDavid Woodhouse raid6_have_sse1_or_mmxext, 158*cc4589ebSDavid Woodhouse "sse1x2", 159*cc4589ebSDavid Woodhouse 1 /* Has cache hints */ 160*cc4589ebSDavid Woodhouse }; 161*cc4589ebSDavid Woodhouse 162*cc4589ebSDavid Woodhouse #endif 163