1*af1a8899SThomas Gleixner /* SPDX-License-Identifier: GPL-2.0-or-later */ 21da177e4SLinus Torvalds /* 31da177e4SLinus Torvalds * include/asm-generic/xor.h 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * Generic optimized RAID-5 checksumming functions. 61da177e4SLinus Torvalds */ 71da177e4SLinus Torvalds 8268bb0ceSLinus Torvalds #include <linux/prefetch.h> 91da177e4SLinus Torvalds 101da177e4SLinus Torvalds static void 111da177e4SLinus Torvalds xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 121da177e4SLinus Torvalds { 131da177e4SLinus Torvalds long lines = bytes / (sizeof (long)) / 8; 141da177e4SLinus Torvalds 151da177e4SLinus Torvalds do { 161da177e4SLinus Torvalds p1[0] ^= p2[0]; 171da177e4SLinus Torvalds p1[1] ^= p2[1]; 181da177e4SLinus Torvalds p1[2] ^= p2[2]; 191da177e4SLinus Torvalds p1[3] ^= p2[3]; 201da177e4SLinus Torvalds p1[4] ^= p2[4]; 211da177e4SLinus Torvalds p1[5] ^= p2[5]; 221da177e4SLinus Torvalds p1[6] ^= p2[6]; 231da177e4SLinus Torvalds p1[7] ^= p2[7]; 241da177e4SLinus Torvalds p1 += 8; 251da177e4SLinus Torvalds p2 += 8; 261da177e4SLinus Torvalds } while (--lines > 0); 271da177e4SLinus Torvalds } 281da177e4SLinus Torvalds 291da177e4SLinus Torvalds static void 301da177e4SLinus Torvalds xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, 311da177e4SLinus Torvalds unsigned long *p3) 321da177e4SLinus Torvalds { 331da177e4SLinus Torvalds long lines = bytes / (sizeof (long)) / 8; 341da177e4SLinus Torvalds 351da177e4SLinus Torvalds do { 361da177e4SLinus Torvalds p1[0] ^= p2[0] ^ p3[0]; 371da177e4SLinus Torvalds p1[1] ^= p2[1] ^ p3[1]; 381da177e4SLinus Torvalds p1[2] ^= p2[2] ^ p3[2]; 391da177e4SLinus Torvalds p1[3] ^= p2[3] ^ p3[3]; 401da177e4SLinus Torvalds p1[4] ^= p2[4] ^ p3[4]; 411da177e4SLinus Torvalds p1[5] ^= p2[5] ^ p3[5]; 421da177e4SLinus Torvalds p1[6] ^= p2[6] ^ p3[6]; 431da177e4SLinus Torvalds p1[7] ^= p2[7] ^ p3[7]; 441da177e4SLinus Torvalds p1 += 8; 451da177e4SLinus Torvalds p2 += 8; 461da177e4SLinus Torvalds p3 += 8; 471da177e4SLinus Torvalds } while (--lines > 0); 481da177e4SLinus Torvalds } 491da177e4SLinus Torvalds 501da177e4SLinus Torvalds static void 511da177e4SLinus Torvalds xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, 521da177e4SLinus Torvalds unsigned long *p3, unsigned long *p4) 531da177e4SLinus Torvalds { 541da177e4SLinus Torvalds long lines = bytes / (sizeof (long)) / 8; 551da177e4SLinus Torvalds 561da177e4SLinus Torvalds do { 571da177e4SLinus Torvalds p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; 581da177e4SLinus Torvalds p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; 591da177e4SLinus Torvalds p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; 601da177e4SLinus Torvalds p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; 611da177e4SLinus Torvalds p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; 621da177e4SLinus Torvalds p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; 631da177e4SLinus Torvalds p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; 641da177e4SLinus Torvalds p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; 651da177e4SLinus Torvalds p1 += 8; 661da177e4SLinus Torvalds p2 += 8; 671da177e4SLinus Torvalds p3 += 8; 681da177e4SLinus Torvalds p4 += 8; 691da177e4SLinus Torvalds } while (--lines > 0); 701da177e4SLinus Torvalds } 711da177e4SLinus Torvalds 721da177e4SLinus Torvalds static void 731da177e4SLinus Torvalds xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, 741da177e4SLinus Torvalds unsigned long *p3, unsigned long *p4, unsigned long *p5) 751da177e4SLinus Torvalds { 761da177e4SLinus Torvalds long lines = bytes / (sizeof (long)) / 8; 771da177e4SLinus Torvalds 781da177e4SLinus Torvalds do { 791da177e4SLinus Torvalds p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; 801da177e4SLinus Torvalds p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; 811da177e4SLinus Torvalds p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; 821da177e4SLinus Torvalds p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; 831da177e4SLinus Torvalds p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; 841da177e4SLinus Torvalds p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; 851da177e4SLinus Torvalds p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; 861da177e4SLinus Torvalds p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; 871da177e4SLinus Torvalds p1 += 8; 881da177e4SLinus Torvalds p2 += 8; 891da177e4SLinus Torvalds p3 += 8; 901da177e4SLinus Torvalds p4 += 8; 911da177e4SLinus Torvalds p5 += 8; 921da177e4SLinus Torvalds } while (--lines > 0); 931da177e4SLinus Torvalds } 941da177e4SLinus Torvalds 951da177e4SLinus Torvalds static void 961da177e4SLinus Torvalds xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 971da177e4SLinus Torvalds { 981da177e4SLinus Torvalds long lines = bytes / (sizeof (long)) / 8; 991da177e4SLinus Torvalds 1001da177e4SLinus Torvalds do { 1011da177e4SLinus Torvalds register long d0, d1, d2, d3, d4, d5, d6, d7; 1021da177e4SLinus Torvalds d0 = p1[0]; /* Pull the stuff into registers */ 1031da177e4SLinus Torvalds d1 = p1[1]; /* ... in bursts, if possible. */ 1041da177e4SLinus Torvalds d2 = p1[2]; 1051da177e4SLinus Torvalds d3 = p1[3]; 1061da177e4SLinus Torvalds d4 = p1[4]; 1071da177e4SLinus Torvalds d5 = p1[5]; 1081da177e4SLinus Torvalds d6 = p1[6]; 1091da177e4SLinus Torvalds d7 = p1[7]; 1101da177e4SLinus Torvalds d0 ^= p2[0]; 1111da177e4SLinus Torvalds d1 ^= p2[1]; 1121da177e4SLinus Torvalds d2 ^= p2[2]; 1131da177e4SLinus Torvalds d3 ^= p2[3]; 1141da177e4SLinus Torvalds d4 ^= p2[4]; 1151da177e4SLinus Torvalds d5 ^= p2[5]; 1161da177e4SLinus Torvalds d6 ^= p2[6]; 1171da177e4SLinus Torvalds d7 ^= p2[7]; 1181da177e4SLinus Torvalds p1[0] = d0; /* Store the result (in bursts) */ 1191da177e4SLinus Torvalds p1[1] = d1; 1201da177e4SLinus Torvalds p1[2] = d2; 1211da177e4SLinus Torvalds p1[3] = d3; 1221da177e4SLinus Torvalds p1[4] = d4; 1231da177e4SLinus Torvalds p1[5] = d5; 1241da177e4SLinus Torvalds p1[6] = d6; 1251da177e4SLinus Torvalds p1[7] = d7; 1261da177e4SLinus Torvalds p1 += 8; 1271da177e4SLinus Torvalds p2 += 8; 1281da177e4SLinus Torvalds } while (--lines > 0); 1291da177e4SLinus Torvalds } 1301da177e4SLinus Torvalds 1311da177e4SLinus Torvalds static void 1321da177e4SLinus Torvalds xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, 1331da177e4SLinus Torvalds unsigned long *p3) 1341da177e4SLinus Torvalds { 1351da177e4SLinus Torvalds long lines = bytes / (sizeof (long)) / 8; 1361da177e4SLinus Torvalds 1371da177e4SLinus Torvalds do { 1381da177e4SLinus Torvalds register long d0, d1, d2, d3, d4, d5, d6, d7; 1391da177e4SLinus Torvalds d0 = p1[0]; /* Pull the stuff into registers */ 1401da177e4SLinus Torvalds d1 = p1[1]; /* ... in bursts, if possible. */ 1411da177e4SLinus Torvalds d2 = p1[2]; 1421da177e4SLinus Torvalds d3 = p1[3]; 1431da177e4SLinus Torvalds d4 = p1[4]; 1441da177e4SLinus Torvalds d5 = p1[5]; 1451da177e4SLinus Torvalds d6 = p1[6]; 1461da177e4SLinus Torvalds d7 = p1[7]; 1471da177e4SLinus Torvalds d0 ^= p2[0]; 1481da177e4SLinus Torvalds d1 ^= p2[1]; 1491da177e4SLinus Torvalds d2 ^= p2[2]; 1501da177e4SLinus Torvalds d3 ^= p2[3]; 1511da177e4SLinus Torvalds d4 ^= p2[4]; 1521da177e4SLinus Torvalds d5 ^= p2[5]; 1531da177e4SLinus Torvalds d6 ^= p2[6]; 1541da177e4SLinus Torvalds d7 ^= p2[7]; 1551da177e4SLinus Torvalds d0 ^= p3[0]; 1561da177e4SLinus Torvalds d1 ^= p3[1]; 1571da177e4SLinus Torvalds d2 ^= p3[2]; 1581da177e4SLinus Torvalds d3 ^= p3[3]; 1591da177e4SLinus Torvalds d4 ^= p3[4]; 1601da177e4SLinus Torvalds d5 ^= p3[5]; 1611da177e4SLinus Torvalds d6 ^= p3[6]; 1621da177e4SLinus Torvalds d7 ^= p3[7]; 1631da177e4SLinus Torvalds p1[0] = d0; /* Store the result (in bursts) */ 1641da177e4SLinus Torvalds p1[1] = d1; 1651da177e4SLinus Torvalds p1[2] = d2; 1661da177e4SLinus Torvalds p1[3] = d3; 1671da177e4SLinus Torvalds p1[4] = d4; 1681da177e4SLinus Torvalds p1[5] = d5; 1691da177e4SLinus Torvalds p1[6] = d6; 1701da177e4SLinus Torvalds p1[7] = d7; 1711da177e4SLinus Torvalds p1 += 8; 1721da177e4SLinus Torvalds p2 += 8; 1731da177e4SLinus Torvalds p3 += 8; 1741da177e4SLinus Torvalds } while (--lines > 0); 1751da177e4SLinus Torvalds } 1761da177e4SLinus Torvalds 1771da177e4SLinus Torvalds static void 1781da177e4SLinus Torvalds xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, 1791da177e4SLinus Torvalds unsigned long *p3, unsigned long *p4) 1801da177e4SLinus Torvalds { 1811da177e4SLinus Torvalds long lines = bytes / (sizeof (long)) / 8; 1821da177e4SLinus Torvalds 1831da177e4SLinus Torvalds do { 1841da177e4SLinus Torvalds register long d0, d1, d2, d3, d4, d5, d6, d7; 1851da177e4SLinus Torvalds d0 = p1[0]; /* Pull the stuff into registers */ 1861da177e4SLinus Torvalds d1 = p1[1]; /* ... in bursts, if possible. */ 1871da177e4SLinus Torvalds d2 = p1[2]; 1881da177e4SLinus Torvalds d3 = p1[3]; 1891da177e4SLinus Torvalds d4 = p1[4]; 1901da177e4SLinus Torvalds d5 = p1[5]; 1911da177e4SLinus Torvalds d6 = p1[6]; 1921da177e4SLinus Torvalds d7 = p1[7]; 1931da177e4SLinus Torvalds d0 ^= p2[0]; 1941da177e4SLinus Torvalds d1 ^= p2[1]; 1951da177e4SLinus Torvalds d2 ^= p2[2]; 1961da177e4SLinus Torvalds d3 ^= p2[3]; 1971da177e4SLinus Torvalds d4 ^= p2[4]; 1981da177e4SLinus Torvalds d5 ^= p2[5]; 1991da177e4SLinus Torvalds d6 ^= p2[6]; 2001da177e4SLinus Torvalds d7 ^= p2[7]; 2011da177e4SLinus Torvalds d0 ^= p3[0]; 2021da177e4SLinus Torvalds d1 ^= p3[1]; 2031da177e4SLinus Torvalds d2 ^= p3[2]; 2041da177e4SLinus Torvalds d3 ^= p3[3]; 2051da177e4SLinus Torvalds d4 ^= p3[4]; 2061da177e4SLinus Torvalds d5 ^= p3[5]; 2071da177e4SLinus Torvalds d6 ^= p3[6]; 2081da177e4SLinus Torvalds d7 ^= p3[7]; 2091da177e4SLinus Torvalds d0 ^= p4[0]; 2101da177e4SLinus Torvalds d1 ^= p4[1]; 2111da177e4SLinus Torvalds d2 ^= p4[2]; 2121da177e4SLinus Torvalds d3 ^= p4[3]; 2131da177e4SLinus Torvalds d4 ^= p4[4]; 2141da177e4SLinus Torvalds d5 ^= p4[5]; 2151da177e4SLinus Torvalds d6 ^= p4[6]; 2161da177e4SLinus Torvalds d7 ^= p4[7]; 2171da177e4SLinus Torvalds p1[0] = d0; /* Store the result (in bursts) */ 2181da177e4SLinus Torvalds p1[1] = d1; 2191da177e4SLinus Torvalds p1[2] = d2; 2201da177e4SLinus Torvalds p1[3] = d3; 2211da177e4SLinus Torvalds p1[4] = d4; 2221da177e4SLinus Torvalds p1[5] = d5; 2231da177e4SLinus Torvalds p1[6] = d6; 2241da177e4SLinus Torvalds p1[7] = d7; 2251da177e4SLinus Torvalds p1 += 8; 2261da177e4SLinus Torvalds p2 += 8; 2271da177e4SLinus Torvalds p3 += 8; 2281da177e4SLinus Torvalds p4 += 8; 2291da177e4SLinus Torvalds } while (--lines > 0); 2301da177e4SLinus Torvalds } 2311da177e4SLinus Torvalds 2321da177e4SLinus Torvalds static void 2331da177e4SLinus Torvalds xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, 2341da177e4SLinus Torvalds unsigned long *p3, unsigned long *p4, unsigned long *p5) 2351da177e4SLinus Torvalds { 2361da177e4SLinus Torvalds long lines = bytes / (sizeof (long)) / 8; 2371da177e4SLinus Torvalds 2381da177e4SLinus Torvalds do { 2391da177e4SLinus Torvalds register long d0, d1, d2, d3, d4, d5, d6, d7; 2401da177e4SLinus Torvalds d0 = p1[0]; /* Pull the stuff into registers */ 2411da177e4SLinus Torvalds d1 = p1[1]; /* ... in bursts, if possible. */ 2421da177e4SLinus Torvalds d2 = p1[2]; 2431da177e4SLinus Torvalds d3 = p1[3]; 2441da177e4SLinus Torvalds d4 = p1[4]; 2451da177e4SLinus Torvalds d5 = p1[5]; 2461da177e4SLinus Torvalds d6 = p1[6]; 2471da177e4SLinus Torvalds d7 = p1[7]; 2481da177e4SLinus Torvalds d0 ^= p2[0]; 2491da177e4SLinus Torvalds d1 ^= p2[1]; 2501da177e4SLinus Torvalds d2 ^= p2[2]; 2511da177e4SLinus Torvalds d3 ^= p2[3]; 2521da177e4SLinus Torvalds d4 ^= p2[4]; 2531da177e4SLinus Torvalds d5 ^= p2[5]; 2541da177e4SLinus Torvalds d6 ^= p2[6]; 2551da177e4SLinus Torvalds d7 ^= p2[7]; 2561da177e4SLinus Torvalds d0 ^= p3[0]; 2571da177e4SLinus Torvalds d1 ^= p3[1]; 2581da177e4SLinus Torvalds d2 ^= p3[2]; 2591da177e4SLinus Torvalds d3 ^= p3[3]; 2601da177e4SLinus Torvalds d4 ^= p3[4]; 2611da177e4SLinus Torvalds d5 ^= p3[5]; 2621da177e4SLinus Torvalds d6 ^= p3[6]; 2631da177e4SLinus Torvalds d7 ^= p3[7]; 2641da177e4SLinus Torvalds d0 ^= p4[0]; 2651da177e4SLinus Torvalds d1 ^= p4[1]; 2661da177e4SLinus Torvalds d2 ^= p4[2]; 2671da177e4SLinus Torvalds d3 ^= p4[3]; 2681da177e4SLinus Torvalds d4 ^= p4[4]; 2691da177e4SLinus Torvalds d5 ^= p4[5]; 2701da177e4SLinus Torvalds d6 ^= p4[6]; 2711da177e4SLinus Torvalds d7 ^= p4[7]; 2721da177e4SLinus Torvalds d0 ^= p5[0]; 2731da177e4SLinus Torvalds d1 ^= p5[1]; 2741da177e4SLinus Torvalds d2 ^= p5[2]; 2751da177e4SLinus Torvalds d3 ^= p5[3]; 2761da177e4SLinus Torvalds d4 ^= p5[4]; 2771da177e4SLinus Torvalds d5 ^= p5[5]; 2781da177e4SLinus Torvalds d6 ^= p5[6]; 2791da177e4SLinus Torvalds d7 ^= p5[7]; 2801da177e4SLinus Torvalds p1[0] = d0; /* Store the result (in bursts) */ 2811da177e4SLinus Torvalds p1[1] = d1; 2821da177e4SLinus Torvalds p1[2] = d2; 2831da177e4SLinus Torvalds p1[3] = d3; 2841da177e4SLinus Torvalds p1[4] = d4; 2851da177e4SLinus Torvalds p1[5] = d5; 2861da177e4SLinus Torvalds p1[6] = d6; 2871da177e4SLinus Torvalds p1[7] = d7; 2881da177e4SLinus Torvalds p1 += 8; 2891da177e4SLinus Torvalds p2 += 8; 2901da177e4SLinus Torvalds p3 += 8; 2911da177e4SLinus Torvalds p4 += 8; 2921da177e4SLinus Torvalds p5 += 8; 2931da177e4SLinus Torvalds } while (--lines > 0); 2941da177e4SLinus Torvalds } 2951da177e4SLinus Torvalds 2961da177e4SLinus Torvalds static void 2971da177e4SLinus Torvalds xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 2981da177e4SLinus Torvalds { 2991da177e4SLinus Torvalds long lines = bytes / (sizeof (long)) / 8 - 1; 3001da177e4SLinus Torvalds prefetchw(p1); 3011da177e4SLinus Torvalds prefetch(p2); 3021da177e4SLinus Torvalds 3031da177e4SLinus Torvalds do { 3041da177e4SLinus Torvalds prefetchw(p1+8); 3051da177e4SLinus Torvalds prefetch(p2+8); 3061da177e4SLinus Torvalds once_more: 3071da177e4SLinus Torvalds p1[0] ^= p2[0]; 3081da177e4SLinus Torvalds p1[1] ^= p2[1]; 3091da177e4SLinus Torvalds p1[2] ^= p2[2]; 3101da177e4SLinus Torvalds p1[3] ^= p2[3]; 3111da177e4SLinus Torvalds p1[4] ^= p2[4]; 3121da177e4SLinus Torvalds p1[5] ^= p2[5]; 3131da177e4SLinus Torvalds p1[6] ^= p2[6]; 3141da177e4SLinus Torvalds p1[7] ^= p2[7]; 3151da177e4SLinus Torvalds p1 += 8; 3161da177e4SLinus Torvalds p2 += 8; 3171da177e4SLinus Torvalds } while (--lines > 0); 3181da177e4SLinus Torvalds if (lines == 0) 3191da177e4SLinus Torvalds goto once_more; 3201da177e4SLinus Torvalds } 3211da177e4SLinus Torvalds 3221da177e4SLinus Torvalds static void 3231da177e4SLinus Torvalds xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, 3241da177e4SLinus Torvalds unsigned long *p3) 3251da177e4SLinus Torvalds { 3261da177e4SLinus Torvalds long lines = bytes / (sizeof (long)) / 8 - 1; 3271da177e4SLinus Torvalds prefetchw(p1); 3281da177e4SLinus Torvalds prefetch(p2); 3291da177e4SLinus Torvalds prefetch(p3); 3301da177e4SLinus Torvalds 3311da177e4SLinus Torvalds do { 3321da177e4SLinus Torvalds prefetchw(p1+8); 3331da177e4SLinus Torvalds prefetch(p2+8); 3341da177e4SLinus Torvalds prefetch(p3+8); 3351da177e4SLinus Torvalds once_more: 3361da177e4SLinus Torvalds p1[0] ^= p2[0] ^ p3[0]; 3371da177e4SLinus Torvalds p1[1] ^= p2[1] ^ p3[1]; 3381da177e4SLinus Torvalds p1[2] ^= p2[2] ^ p3[2]; 3391da177e4SLinus Torvalds p1[3] ^= p2[3] ^ p3[3]; 3401da177e4SLinus Torvalds p1[4] ^= p2[4] ^ p3[4]; 3411da177e4SLinus Torvalds p1[5] ^= p2[5] ^ p3[5]; 3421da177e4SLinus Torvalds p1[6] ^= p2[6] ^ p3[6]; 3431da177e4SLinus Torvalds p1[7] ^= p2[7] ^ p3[7]; 3441da177e4SLinus Torvalds p1 += 8; 3451da177e4SLinus Torvalds p2 += 8; 3461da177e4SLinus Torvalds p3 += 8; 3471da177e4SLinus Torvalds } while (--lines > 0); 3481da177e4SLinus Torvalds if (lines == 0) 3491da177e4SLinus Torvalds goto once_more; 3501da177e4SLinus Torvalds } 3511da177e4SLinus Torvalds 3521da177e4SLinus Torvalds static void 3531da177e4SLinus Torvalds xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, 3541da177e4SLinus Torvalds unsigned long *p3, unsigned long *p4) 3551da177e4SLinus Torvalds { 3561da177e4SLinus Torvalds long lines = bytes / (sizeof (long)) / 8 - 1; 3571da177e4SLinus Torvalds 3581da177e4SLinus Torvalds prefetchw(p1); 3591da177e4SLinus Torvalds prefetch(p2); 3601da177e4SLinus Torvalds prefetch(p3); 3611da177e4SLinus Torvalds prefetch(p4); 3621da177e4SLinus Torvalds 3631da177e4SLinus Torvalds do { 3641da177e4SLinus Torvalds prefetchw(p1+8); 3651da177e4SLinus Torvalds prefetch(p2+8); 3661da177e4SLinus Torvalds prefetch(p3+8); 3671da177e4SLinus Torvalds prefetch(p4+8); 3681da177e4SLinus Torvalds once_more: 3691da177e4SLinus Torvalds p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; 3701da177e4SLinus Torvalds p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; 3711da177e4SLinus Torvalds p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; 3721da177e4SLinus Torvalds p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; 3731da177e4SLinus Torvalds p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; 3741da177e4SLinus Torvalds p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; 3751da177e4SLinus Torvalds p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; 3761da177e4SLinus Torvalds p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; 3771da177e4SLinus Torvalds p1 += 8; 3781da177e4SLinus Torvalds p2 += 8; 3791da177e4SLinus Torvalds p3 += 8; 3801da177e4SLinus Torvalds p4 += 8; 3811da177e4SLinus Torvalds } while (--lines > 0); 3821da177e4SLinus Torvalds if (lines == 0) 3831da177e4SLinus Torvalds goto once_more; 3841da177e4SLinus Torvalds } 3851da177e4SLinus Torvalds 3861da177e4SLinus Torvalds static void 3871da177e4SLinus Torvalds xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, 3881da177e4SLinus Torvalds unsigned long *p3, unsigned long *p4, unsigned long *p5) 3891da177e4SLinus Torvalds { 3901da177e4SLinus Torvalds long lines = bytes / (sizeof (long)) / 8 - 1; 3911da177e4SLinus Torvalds 3921da177e4SLinus Torvalds prefetchw(p1); 3931da177e4SLinus Torvalds prefetch(p2); 3941da177e4SLinus Torvalds prefetch(p3); 3951da177e4SLinus Torvalds prefetch(p4); 3961da177e4SLinus Torvalds prefetch(p5); 3971da177e4SLinus Torvalds 3981da177e4SLinus Torvalds do { 3991da177e4SLinus Torvalds prefetchw(p1+8); 4001da177e4SLinus Torvalds prefetch(p2+8); 4011da177e4SLinus Torvalds prefetch(p3+8); 4021da177e4SLinus Torvalds prefetch(p4+8); 4031da177e4SLinus Torvalds prefetch(p5+8); 4041da177e4SLinus Torvalds once_more: 4051da177e4SLinus Torvalds p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; 4061da177e4SLinus Torvalds p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; 4071da177e4SLinus Torvalds p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; 4081da177e4SLinus Torvalds p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; 4091da177e4SLinus Torvalds p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; 4101da177e4SLinus Torvalds p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; 4111da177e4SLinus Torvalds p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; 4121da177e4SLinus Torvalds p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; 4131da177e4SLinus Torvalds p1 += 8; 4141da177e4SLinus Torvalds p2 += 8; 4151da177e4SLinus Torvalds p3 += 8; 4161da177e4SLinus Torvalds p4 += 8; 4171da177e4SLinus Torvalds p5 += 8; 4181da177e4SLinus Torvalds } while (--lines > 0); 4191da177e4SLinus Torvalds if (lines == 0) 4201da177e4SLinus Torvalds goto once_more; 4211da177e4SLinus Torvalds } 4221da177e4SLinus Torvalds 4231da177e4SLinus Torvalds static void 4241da177e4SLinus Torvalds xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 4251da177e4SLinus Torvalds { 4261da177e4SLinus Torvalds long lines = bytes / (sizeof (long)) / 8 - 1; 4271da177e4SLinus Torvalds 4281da177e4SLinus Torvalds prefetchw(p1); 4291da177e4SLinus Torvalds prefetch(p2); 4301da177e4SLinus Torvalds 4311da177e4SLinus Torvalds do { 4321da177e4SLinus Torvalds register long d0, d1, d2, d3, d4, d5, d6, d7; 4331da177e4SLinus Torvalds 4341da177e4SLinus Torvalds prefetchw(p1+8); 4351da177e4SLinus Torvalds prefetch(p2+8); 4361da177e4SLinus Torvalds once_more: 4371da177e4SLinus Torvalds d0 = p1[0]; /* Pull the stuff into registers */ 4381da177e4SLinus Torvalds d1 = p1[1]; /* ... in bursts, if possible. */ 4391da177e4SLinus Torvalds d2 = p1[2]; 4401da177e4SLinus Torvalds d3 = p1[3]; 4411da177e4SLinus Torvalds d4 = p1[4]; 4421da177e4SLinus Torvalds d5 = p1[5]; 4431da177e4SLinus Torvalds d6 = p1[6]; 4441da177e4SLinus Torvalds d7 = p1[7]; 4451da177e4SLinus Torvalds d0 ^= p2[0]; 4461da177e4SLinus Torvalds d1 ^= p2[1]; 4471da177e4SLinus Torvalds d2 ^= p2[2]; 4481da177e4SLinus Torvalds d3 ^= p2[3]; 4491da177e4SLinus Torvalds d4 ^= p2[4]; 4501da177e4SLinus Torvalds d5 ^= p2[5]; 4511da177e4SLinus Torvalds d6 ^= p2[6]; 4521da177e4SLinus Torvalds d7 ^= p2[7]; 4531da177e4SLinus Torvalds p1[0] = d0; /* Store the result (in bursts) */ 4541da177e4SLinus Torvalds p1[1] = d1; 4551da177e4SLinus Torvalds p1[2] = d2; 4561da177e4SLinus Torvalds p1[3] = d3; 4571da177e4SLinus Torvalds p1[4] = d4; 4581da177e4SLinus Torvalds p1[5] = d5; 4591da177e4SLinus Torvalds p1[6] = d6; 4601da177e4SLinus Torvalds p1[7] = d7; 4611da177e4SLinus Torvalds p1 += 8; 4621da177e4SLinus Torvalds p2 += 8; 4631da177e4SLinus Torvalds } while (--lines > 0); 4641da177e4SLinus Torvalds if (lines == 0) 4651da177e4SLinus Torvalds goto once_more; 4661da177e4SLinus Torvalds } 4671da177e4SLinus Torvalds 4681da177e4SLinus Torvalds static void 4691da177e4SLinus Torvalds xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, 4701da177e4SLinus Torvalds unsigned long *p3) 4711da177e4SLinus Torvalds { 4721da177e4SLinus Torvalds long lines = bytes / (sizeof (long)) / 8 - 1; 4731da177e4SLinus Torvalds 4741da177e4SLinus Torvalds prefetchw(p1); 4751da177e4SLinus Torvalds prefetch(p2); 4761da177e4SLinus Torvalds prefetch(p3); 4771da177e4SLinus Torvalds 4781da177e4SLinus Torvalds do { 4791da177e4SLinus Torvalds register long d0, d1, d2, d3, d4, d5, d6, d7; 4801da177e4SLinus Torvalds 4811da177e4SLinus Torvalds prefetchw(p1+8); 4821da177e4SLinus Torvalds prefetch(p2+8); 4831da177e4SLinus Torvalds prefetch(p3+8); 4841da177e4SLinus Torvalds once_more: 4851da177e4SLinus Torvalds d0 = p1[0]; /* Pull the stuff into registers */ 4861da177e4SLinus Torvalds d1 = p1[1]; /* ... in bursts, if possible. */ 4871da177e4SLinus Torvalds d2 = p1[2]; 4881da177e4SLinus Torvalds d3 = p1[3]; 4891da177e4SLinus Torvalds d4 = p1[4]; 4901da177e4SLinus Torvalds d5 = p1[5]; 4911da177e4SLinus Torvalds d6 = p1[6]; 4921da177e4SLinus Torvalds d7 = p1[7]; 4931da177e4SLinus Torvalds d0 ^= p2[0]; 4941da177e4SLinus Torvalds d1 ^= p2[1]; 4951da177e4SLinus Torvalds d2 ^= p2[2]; 4961da177e4SLinus Torvalds d3 ^= p2[3]; 4971da177e4SLinus Torvalds d4 ^= p2[4]; 4981da177e4SLinus Torvalds d5 ^= p2[5]; 4991da177e4SLinus Torvalds d6 ^= p2[6]; 5001da177e4SLinus Torvalds d7 ^= p2[7]; 5011da177e4SLinus Torvalds d0 ^= p3[0]; 5021da177e4SLinus Torvalds d1 ^= p3[1]; 5031da177e4SLinus Torvalds d2 ^= p3[2]; 5041da177e4SLinus Torvalds d3 ^= p3[3]; 5051da177e4SLinus Torvalds d4 ^= p3[4]; 5061da177e4SLinus Torvalds d5 ^= p3[5]; 5071da177e4SLinus Torvalds d6 ^= p3[6]; 5081da177e4SLinus Torvalds d7 ^= p3[7]; 5091da177e4SLinus Torvalds p1[0] = d0; /* Store the result (in bursts) */ 5101da177e4SLinus Torvalds p1[1] = d1; 5111da177e4SLinus Torvalds p1[2] = d2; 5121da177e4SLinus Torvalds p1[3] = d3; 5131da177e4SLinus Torvalds p1[4] = d4; 5141da177e4SLinus Torvalds p1[5] = d5; 5151da177e4SLinus Torvalds p1[6] = d6; 5161da177e4SLinus Torvalds p1[7] = d7; 5171da177e4SLinus Torvalds p1 += 8; 5181da177e4SLinus Torvalds p2 += 8; 5191da177e4SLinus Torvalds p3 += 8; 5201da177e4SLinus Torvalds } while (--lines > 0); 5211da177e4SLinus Torvalds if (lines == 0) 5221da177e4SLinus Torvalds goto once_more; 5231da177e4SLinus Torvalds } 5241da177e4SLinus Torvalds 5251da177e4SLinus Torvalds static void 5261da177e4SLinus Torvalds xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, 5271da177e4SLinus Torvalds unsigned long *p3, unsigned long *p4) 5281da177e4SLinus Torvalds { 5291da177e4SLinus Torvalds long lines = bytes / (sizeof (long)) / 8 - 1; 5301da177e4SLinus Torvalds 5311da177e4SLinus Torvalds prefetchw(p1); 5321da177e4SLinus Torvalds prefetch(p2); 5331da177e4SLinus Torvalds prefetch(p3); 5341da177e4SLinus Torvalds prefetch(p4); 5351da177e4SLinus Torvalds 5361da177e4SLinus Torvalds do { 5371da177e4SLinus Torvalds register long d0, d1, d2, d3, d4, d5, d6, d7; 5381da177e4SLinus Torvalds 5391da177e4SLinus Torvalds prefetchw(p1+8); 5401da177e4SLinus Torvalds prefetch(p2+8); 5411da177e4SLinus Torvalds prefetch(p3+8); 5421da177e4SLinus Torvalds prefetch(p4+8); 5431da177e4SLinus Torvalds once_more: 5441da177e4SLinus Torvalds d0 = p1[0]; /* Pull the stuff into registers */ 5451da177e4SLinus Torvalds d1 = p1[1]; /* ... in bursts, if possible. */ 5461da177e4SLinus Torvalds d2 = p1[2]; 5471da177e4SLinus Torvalds d3 = p1[3]; 5481da177e4SLinus Torvalds d4 = p1[4]; 5491da177e4SLinus Torvalds d5 = p1[5]; 5501da177e4SLinus Torvalds d6 = p1[6]; 5511da177e4SLinus Torvalds d7 = p1[7]; 5521da177e4SLinus Torvalds d0 ^= p2[0]; 5531da177e4SLinus Torvalds d1 ^= p2[1]; 5541da177e4SLinus Torvalds d2 ^= p2[2]; 5551da177e4SLinus Torvalds d3 ^= p2[3]; 5561da177e4SLinus Torvalds d4 ^= p2[4]; 5571da177e4SLinus Torvalds d5 ^= p2[5]; 5581da177e4SLinus Torvalds d6 ^= p2[6]; 5591da177e4SLinus Torvalds d7 ^= p2[7]; 5601da177e4SLinus Torvalds d0 ^= p3[0]; 5611da177e4SLinus Torvalds d1 ^= p3[1]; 5621da177e4SLinus Torvalds d2 ^= p3[2]; 5631da177e4SLinus Torvalds d3 ^= p3[3]; 5641da177e4SLinus Torvalds d4 ^= p3[4]; 5651da177e4SLinus Torvalds d5 ^= p3[5]; 5661da177e4SLinus Torvalds d6 ^= p3[6]; 5671da177e4SLinus Torvalds d7 ^= p3[7]; 5681da177e4SLinus Torvalds d0 ^= p4[0]; 5691da177e4SLinus Torvalds d1 ^= p4[1]; 5701da177e4SLinus Torvalds d2 ^= p4[2]; 5711da177e4SLinus Torvalds d3 ^= p4[3]; 5721da177e4SLinus Torvalds d4 ^= p4[4]; 5731da177e4SLinus Torvalds d5 ^= p4[5]; 5741da177e4SLinus Torvalds d6 ^= p4[6]; 5751da177e4SLinus Torvalds d7 ^= p4[7]; 5761da177e4SLinus Torvalds p1[0] = d0; /* Store the result (in bursts) */ 5771da177e4SLinus Torvalds p1[1] = d1; 5781da177e4SLinus Torvalds p1[2] = d2; 5791da177e4SLinus Torvalds p1[3] = d3; 5801da177e4SLinus Torvalds p1[4] = d4; 5811da177e4SLinus Torvalds p1[5] = d5; 5821da177e4SLinus Torvalds p1[6] = d6; 5831da177e4SLinus Torvalds p1[7] = d7; 5841da177e4SLinus Torvalds p1 += 8; 5851da177e4SLinus Torvalds p2 += 8; 5861da177e4SLinus Torvalds p3 += 8; 5871da177e4SLinus Torvalds p4 += 8; 5881da177e4SLinus Torvalds } while (--lines > 0); 5891da177e4SLinus Torvalds if (lines == 0) 5901da177e4SLinus Torvalds goto once_more; 5911da177e4SLinus Torvalds } 5921da177e4SLinus Torvalds 5931da177e4SLinus Torvalds static void 5941da177e4SLinus Torvalds xor_32regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, 5951da177e4SLinus Torvalds unsigned long *p3, unsigned long *p4, unsigned long *p5) 5961da177e4SLinus Torvalds { 5971da177e4SLinus Torvalds long lines = bytes / (sizeof (long)) / 8 - 1; 5981da177e4SLinus Torvalds 5991da177e4SLinus Torvalds prefetchw(p1); 6001da177e4SLinus Torvalds prefetch(p2); 6011da177e4SLinus Torvalds prefetch(p3); 6021da177e4SLinus Torvalds prefetch(p4); 6031da177e4SLinus Torvalds prefetch(p5); 6041da177e4SLinus Torvalds 6051da177e4SLinus Torvalds do { 6061da177e4SLinus Torvalds register long d0, d1, d2, d3, d4, d5, d6, d7; 6071da177e4SLinus Torvalds 6081da177e4SLinus Torvalds prefetchw(p1+8); 6091da177e4SLinus Torvalds prefetch(p2+8); 6101da177e4SLinus Torvalds prefetch(p3+8); 6111da177e4SLinus Torvalds prefetch(p4+8); 6121da177e4SLinus Torvalds prefetch(p5+8); 6131da177e4SLinus Torvalds once_more: 6141da177e4SLinus Torvalds d0 = p1[0]; /* Pull the stuff into registers */ 6151da177e4SLinus Torvalds d1 = p1[1]; /* ... in bursts, if possible. */ 6161da177e4SLinus Torvalds d2 = p1[2]; 6171da177e4SLinus Torvalds d3 = p1[3]; 6181da177e4SLinus Torvalds d4 = p1[4]; 6191da177e4SLinus Torvalds d5 = p1[5]; 6201da177e4SLinus Torvalds d6 = p1[6]; 6211da177e4SLinus Torvalds d7 = p1[7]; 6221da177e4SLinus Torvalds d0 ^= p2[0]; 6231da177e4SLinus Torvalds d1 ^= p2[1]; 6241da177e4SLinus Torvalds d2 ^= p2[2]; 6251da177e4SLinus Torvalds d3 ^= p2[3]; 6261da177e4SLinus Torvalds d4 ^= p2[4]; 6271da177e4SLinus Torvalds d5 ^= p2[5]; 6281da177e4SLinus Torvalds d6 ^= p2[6]; 6291da177e4SLinus Torvalds d7 ^= p2[7]; 6301da177e4SLinus Torvalds d0 ^= p3[0]; 6311da177e4SLinus Torvalds d1 ^= p3[1]; 6321da177e4SLinus Torvalds d2 ^= p3[2]; 6331da177e4SLinus Torvalds d3 ^= p3[3]; 6341da177e4SLinus Torvalds d4 ^= p3[4]; 6351da177e4SLinus Torvalds d5 ^= p3[5]; 6361da177e4SLinus Torvalds d6 ^= p3[6]; 6371da177e4SLinus Torvalds d7 ^= p3[7]; 6381da177e4SLinus Torvalds d0 ^= p4[0]; 6391da177e4SLinus Torvalds d1 ^= p4[1]; 6401da177e4SLinus Torvalds d2 ^= p4[2]; 6411da177e4SLinus Torvalds d3 ^= p4[3]; 6421da177e4SLinus Torvalds d4 ^= p4[4]; 6431da177e4SLinus Torvalds d5 ^= p4[5]; 6441da177e4SLinus Torvalds d6 ^= p4[6]; 6451da177e4SLinus Torvalds d7 ^= p4[7]; 6461da177e4SLinus Torvalds d0 ^= p5[0]; 6471da177e4SLinus Torvalds d1 ^= p5[1]; 6481da177e4SLinus Torvalds d2 ^= p5[2]; 6491da177e4SLinus Torvalds d3 ^= p5[3]; 6501da177e4SLinus Torvalds d4 ^= p5[4]; 6511da177e4SLinus Torvalds d5 ^= p5[5]; 6521da177e4SLinus Torvalds d6 ^= p5[6]; 6531da177e4SLinus Torvalds d7 ^= p5[7]; 6541da177e4SLinus Torvalds p1[0] = d0; /* Store the result (in bursts) */ 6551da177e4SLinus Torvalds p1[1] = d1; 6561da177e4SLinus Torvalds p1[2] = d2; 6571da177e4SLinus Torvalds p1[3] = d3; 6581da177e4SLinus Torvalds p1[4] = d4; 6591da177e4SLinus Torvalds p1[5] = d5; 6601da177e4SLinus Torvalds p1[6] = d6; 6611da177e4SLinus Torvalds p1[7] = d7; 6621da177e4SLinus Torvalds p1 += 8; 6631da177e4SLinus Torvalds p2 += 8; 6641da177e4SLinus Torvalds p3 += 8; 6651da177e4SLinus Torvalds p4 += 8; 6661da177e4SLinus Torvalds p5 += 8; 6671da177e4SLinus Torvalds } while (--lines > 0); 6681da177e4SLinus Torvalds if (lines == 0) 6691da177e4SLinus Torvalds goto once_more; 6701da177e4SLinus Torvalds } 6711da177e4SLinus Torvalds 6721da177e4SLinus Torvalds static struct xor_block_template xor_block_8regs = { 6731da177e4SLinus Torvalds .name = "8regs", 6741da177e4SLinus Torvalds .do_2 = xor_8regs_2, 6751da177e4SLinus Torvalds .do_3 = xor_8regs_3, 6761da177e4SLinus Torvalds .do_4 = xor_8regs_4, 6771da177e4SLinus Torvalds .do_5 = xor_8regs_5, 6781da177e4SLinus Torvalds }; 6791da177e4SLinus Torvalds 6801da177e4SLinus Torvalds static struct xor_block_template xor_block_32regs = { 6811da177e4SLinus Torvalds .name = "32regs", 6821da177e4SLinus Torvalds .do_2 = xor_32regs_2, 6831da177e4SLinus Torvalds .do_3 = xor_32regs_3, 6841da177e4SLinus Torvalds .do_4 = xor_32regs_4, 6851da177e4SLinus Torvalds .do_5 = xor_32regs_5, 6861da177e4SLinus Torvalds }; 6871da177e4SLinus Torvalds 688720fb197SArnd Bergmann static struct xor_block_template xor_block_8regs_p __maybe_unused = { 6891da177e4SLinus Torvalds .name = "8regs_prefetch", 6901da177e4SLinus Torvalds .do_2 = xor_8regs_p_2, 6911da177e4SLinus Torvalds .do_3 = xor_8regs_p_3, 6921da177e4SLinus Torvalds .do_4 = xor_8regs_p_4, 6931da177e4SLinus Torvalds .do_5 = xor_8regs_p_5, 6941da177e4SLinus Torvalds }; 6951da177e4SLinus Torvalds 696720fb197SArnd Bergmann static struct xor_block_template xor_block_32regs_p __maybe_unused = { 6971da177e4SLinus Torvalds .name = "32regs_prefetch", 6981da177e4SLinus Torvalds .do_2 = xor_32regs_p_2, 6991da177e4SLinus Torvalds .do_3 = xor_32regs_p_3, 7001da177e4SLinus Torvalds .do_4 = xor_32regs_p_4, 7011da177e4SLinus Torvalds .do_5 = xor_32regs_p_5, 7021da177e4SLinus Torvalds }; 7031da177e4SLinus Torvalds 7041da177e4SLinus Torvalds #define XOR_TRY_TEMPLATES \ 7051da177e4SLinus Torvalds do { \ 7061da177e4SLinus Torvalds xor_speed(&xor_block_8regs); \ 7071da177e4SLinus Torvalds xor_speed(&xor_block_8regs_p); \ 7081da177e4SLinus Torvalds xor_speed(&xor_block_32regs); \ 7091da177e4SLinus Torvalds xor_speed(&xor_block_32regs_p); \ 7101da177e4SLinus Torvalds } while (0) 711