1d2912cb1SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2cc9f8349SJackie Liu /* 3cc9f8349SJackie Liu * arch/arm64/lib/xor-neon.c 4cc9f8349SJackie Liu * 5cc9f8349SJackie Liu * Authors: Jackie Liu <liuyun01@kylinos.cn> 6cc9f8349SJackie Liu * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd. 7cc9f8349SJackie Liu */ 8cc9f8349SJackie Liu 9cc9f8349SJackie Liu #include <linux/raid/xor.h> 10cc9f8349SJackie Liu #include <linux/module.h> 11cc9f8349SJackie Liu #include <asm/neon-intrinsics.h> 12cc9f8349SJackie Liu 13*297565aaSArd Biesheuvel void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1, 14*297565aaSArd Biesheuvel const unsigned long * __restrict p2) 15cc9f8349SJackie Liu { 16cc9f8349SJackie Liu uint64_t *dp1 = (uint64_t *)p1; 17cc9f8349SJackie Liu uint64_t *dp2 = (uint64_t *)p2; 18cc9f8349SJackie Liu 19cc9f8349SJackie Liu register uint64x2_t v0, v1, v2, v3; 20cc9f8349SJackie Liu long lines = bytes / (sizeof(uint64x2_t) * 4); 21cc9f8349SJackie Liu 22cc9f8349SJackie Liu do { 23cc9f8349SJackie Liu /* p1 ^= p2 */ 24cc9f8349SJackie Liu v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0)); 25cc9f8349SJackie Liu v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2)); 26cc9f8349SJackie Liu v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4)); 27cc9f8349SJackie Liu v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6)); 28cc9f8349SJackie Liu 29cc9f8349SJackie Liu /* store */ 30cc9f8349SJackie Liu vst1q_u64(dp1 + 0, v0); 31cc9f8349SJackie Liu vst1q_u64(dp1 + 2, v1); 32cc9f8349SJackie Liu vst1q_u64(dp1 + 4, v2); 33cc9f8349SJackie Liu vst1q_u64(dp1 + 6, v3); 34cc9f8349SJackie Liu 35cc9f8349SJackie Liu dp1 += 8; 36cc9f8349SJackie Liu dp2 += 8; 37cc9f8349SJackie Liu } while (--lines > 0); 38cc9f8349SJackie Liu } 39cc9f8349SJackie Liu 40*297565aaSArd Biesheuvel void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1, 41*297565aaSArd Biesheuvel const unsigned long * __restrict p2, 42*297565aaSArd Biesheuvel const unsigned long * __restrict p3) 43cc9f8349SJackie Liu { 44cc9f8349SJackie Liu uint64_t *dp1 = (uint64_t *)p1; 45cc9f8349SJackie Liu uint64_t *dp2 = (uint64_t *)p2; 46cc9f8349SJackie Liu uint64_t *dp3 = (uint64_t *)p3; 47cc9f8349SJackie Liu 48cc9f8349SJackie Liu register uint64x2_t v0, v1, v2, v3; 49cc9f8349SJackie Liu long lines = bytes / (sizeof(uint64x2_t) * 4); 50cc9f8349SJackie Liu 51cc9f8349SJackie Liu do { 52cc9f8349SJackie Liu /* p1 ^= p2 */ 53cc9f8349SJackie Liu v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0)); 54cc9f8349SJackie Liu v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2)); 55cc9f8349SJackie Liu v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4)); 56cc9f8349SJackie Liu v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6)); 57cc9f8349SJackie Liu 58cc9f8349SJackie Liu /* p1 ^= p3 */ 59cc9f8349SJackie Liu v0 = veorq_u64(v0, vld1q_u64(dp3 + 0)); 60cc9f8349SJackie Liu v1 = veorq_u64(v1, vld1q_u64(dp3 + 2)); 61cc9f8349SJackie Liu v2 = veorq_u64(v2, vld1q_u64(dp3 + 4)); 62cc9f8349SJackie Liu v3 = veorq_u64(v3, vld1q_u64(dp3 + 6)); 63cc9f8349SJackie Liu 64cc9f8349SJackie Liu /* store */ 65cc9f8349SJackie Liu vst1q_u64(dp1 + 0, v0); 66cc9f8349SJackie Liu vst1q_u64(dp1 + 2, v1); 67cc9f8349SJackie Liu vst1q_u64(dp1 + 4, v2); 68cc9f8349SJackie Liu vst1q_u64(dp1 + 6, v3); 69cc9f8349SJackie Liu 70cc9f8349SJackie Liu dp1 += 8; 71cc9f8349SJackie Liu dp2 += 8; 72cc9f8349SJackie Liu dp3 += 8; 73cc9f8349SJackie Liu } while (--lines > 0); 74cc9f8349SJackie Liu } 75cc9f8349SJackie Liu 76*297565aaSArd Biesheuvel void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1, 77*297565aaSArd Biesheuvel const unsigned long * __restrict p2, 78*297565aaSArd Biesheuvel const unsigned long * __restrict p3, 79*297565aaSArd Biesheuvel const unsigned long * __restrict p4) 80cc9f8349SJackie Liu { 81cc9f8349SJackie Liu uint64_t *dp1 = (uint64_t *)p1; 82cc9f8349SJackie Liu uint64_t *dp2 = (uint64_t *)p2; 83cc9f8349SJackie Liu uint64_t *dp3 = (uint64_t *)p3; 84cc9f8349SJackie Liu uint64_t *dp4 = (uint64_t *)p4; 85cc9f8349SJackie Liu 86cc9f8349SJackie Liu register uint64x2_t v0, v1, v2, v3; 87cc9f8349SJackie Liu long lines = bytes / (sizeof(uint64x2_t) * 4); 88cc9f8349SJackie Liu 89cc9f8349SJackie Liu do { 90cc9f8349SJackie Liu /* p1 ^= p2 */ 91cc9f8349SJackie Liu v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0)); 92cc9f8349SJackie Liu v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2)); 93cc9f8349SJackie Liu v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4)); 94cc9f8349SJackie Liu v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6)); 95cc9f8349SJackie Liu 96cc9f8349SJackie Liu /* p1 ^= p3 */ 97cc9f8349SJackie Liu v0 = veorq_u64(v0, vld1q_u64(dp3 + 0)); 98cc9f8349SJackie Liu v1 = veorq_u64(v1, vld1q_u64(dp3 + 2)); 99cc9f8349SJackie Liu v2 = veorq_u64(v2, vld1q_u64(dp3 + 4)); 100cc9f8349SJackie Liu v3 = veorq_u64(v3, vld1q_u64(dp3 + 6)); 101cc9f8349SJackie Liu 102cc9f8349SJackie Liu /* p1 ^= p4 */ 103cc9f8349SJackie Liu v0 = veorq_u64(v0, vld1q_u64(dp4 + 0)); 104cc9f8349SJackie Liu v1 = veorq_u64(v1, vld1q_u64(dp4 + 2)); 105cc9f8349SJackie Liu v2 = veorq_u64(v2, vld1q_u64(dp4 + 4)); 106cc9f8349SJackie Liu v3 = veorq_u64(v3, vld1q_u64(dp4 + 6)); 107cc9f8349SJackie Liu 108cc9f8349SJackie Liu /* store */ 109cc9f8349SJackie Liu vst1q_u64(dp1 + 0, v0); 110cc9f8349SJackie Liu vst1q_u64(dp1 + 2, v1); 111cc9f8349SJackie Liu vst1q_u64(dp1 + 4, v2); 112cc9f8349SJackie Liu vst1q_u64(dp1 + 6, v3); 113cc9f8349SJackie Liu 114cc9f8349SJackie Liu dp1 += 8; 115cc9f8349SJackie Liu dp2 += 8; 116cc9f8349SJackie Liu dp3 += 8; 117cc9f8349SJackie Liu dp4 += 8; 118cc9f8349SJackie Liu } while (--lines > 0); 119cc9f8349SJackie Liu } 120cc9f8349SJackie Liu 121*297565aaSArd Biesheuvel void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1, 122*297565aaSArd Biesheuvel const unsigned long * __restrict p2, 123*297565aaSArd Biesheuvel const unsigned long * __restrict p3, 124*297565aaSArd Biesheuvel const unsigned long * __restrict p4, 125*297565aaSArd Biesheuvel const unsigned long * __restrict p5) 126cc9f8349SJackie Liu { 127cc9f8349SJackie Liu uint64_t *dp1 = (uint64_t *)p1; 128cc9f8349SJackie Liu uint64_t *dp2 = (uint64_t *)p2; 129cc9f8349SJackie Liu uint64_t *dp3 = (uint64_t *)p3; 130cc9f8349SJackie Liu uint64_t *dp4 = (uint64_t *)p4; 131cc9f8349SJackie Liu uint64_t *dp5 = (uint64_t *)p5; 132cc9f8349SJackie Liu 133cc9f8349SJackie Liu register uint64x2_t v0, v1, v2, v3; 134cc9f8349SJackie Liu long lines = bytes / (sizeof(uint64x2_t) * 4); 135cc9f8349SJackie Liu 136cc9f8349SJackie Liu do { 137cc9f8349SJackie Liu /* p1 ^= p2 */ 138cc9f8349SJackie Liu v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0)); 139cc9f8349SJackie Liu v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2)); 140cc9f8349SJackie Liu v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4)); 141cc9f8349SJackie Liu v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6)); 142cc9f8349SJackie Liu 143cc9f8349SJackie Liu /* p1 ^= p3 */ 144cc9f8349SJackie Liu v0 = veorq_u64(v0, vld1q_u64(dp3 + 0)); 145cc9f8349SJackie Liu v1 = veorq_u64(v1, vld1q_u64(dp3 + 2)); 146cc9f8349SJackie Liu v2 = veorq_u64(v2, vld1q_u64(dp3 + 4)); 147cc9f8349SJackie Liu v3 = veorq_u64(v3, vld1q_u64(dp3 + 6)); 148cc9f8349SJackie Liu 149cc9f8349SJackie Liu /* p1 ^= p4 */ 150cc9f8349SJackie Liu v0 = veorq_u64(v0, vld1q_u64(dp4 + 0)); 151cc9f8349SJackie Liu v1 = veorq_u64(v1, vld1q_u64(dp4 + 2)); 152cc9f8349SJackie Liu v2 = veorq_u64(v2, vld1q_u64(dp4 + 4)); 153cc9f8349SJackie Liu v3 = veorq_u64(v3, vld1q_u64(dp4 + 6)); 154cc9f8349SJackie Liu 155cc9f8349SJackie Liu /* p1 ^= p5 */ 156cc9f8349SJackie Liu v0 = veorq_u64(v0, vld1q_u64(dp5 + 0)); 157cc9f8349SJackie Liu v1 = veorq_u64(v1, vld1q_u64(dp5 + 2)); 158cc9f8349SJackie Liu v2 = veorq_u64(v2, vld1q_u64(dp5 + 4)); 159cc9f8349SJackie Liu v3 = veorq_u64(v3, vld1q_u64(dp5 + 6)); 160cc9f8349SJackie Liu 161cc9f8349SJackie Liu /* store */ 162cc9f8349SJackie Liu vst1q_u64(dp1 + 0, v0); 163cc9f8349SJackie Liu vst1q_u64(dp1 + 2, v1); 164cc9f8349SJackie Liu vst1q_u64(dp1 + 4, v2); 165cc9f8349SJackie Liu vst1q_u64(dp1 + 6, v3); 166cc9f8349SJackie Liu 167cc9f8349SJackie Liu dp1 += 8; 168cc9f8349SJackie Liu dp2 += 8; 169cc9f8349SJackie Liu dp3 += 8; 170cc9f8349SJackie Liu dp4 += 8; 171cc9f8349SJackie Liu dp5 += 8; 172cc9f8349SJackie Liu } while (--lines > 0); 173cc9f8349SJackie Liu } 174cc9f8349SJackie Liu 1752c54b423SArd Biesheuvel struct xor_block_template xor_block_inner_neon __ro_after_init = { 176cc9f8349SJackie Liu .name = "__inner_neon__", 177cc9f8349SJackie Liu .do_2 = xor_arm64_neon_2, 178cc9f8349SJackie Liu .do_3 = xor_arm64_neon_3, 179cc9f8349SJackie Liu .do_4 = xor_arm64_neon_4, 180cc9f8349SJackie Liu .do_5 = xor_arm64_neon_5, 181cc9f8349SJackie Liu }; 182cc9f8349SJackie Liu EXPORT_SYMBOL(xor_block_inner_neon); 183cc9f8349SJackie Liu 1842c54b423SArd Biesheuvel static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r) 1852c54b423SArd Biesheuvel { 1862c54b423SArd Biesheuvel uint64x2_t res; 1872c54b423SArd Biesheuvel 1882c54b423SArd Biesheuvel asm(ARM64_ASM_PREAMBLE ".arch_extension sha3\n" 1892c54b423SArd Biesheuvel "eor3 %0.16b, %1.16b, %2.16b, %3.16b" 1902c54b423SArd Biesheuvel : "=w"(res) : "w"(p), "w"(q), "w"(r)); 1912c54b423SArd Biesheuvel return res; 1922c54b423SArd Biesheuvel } 1932c54b423SArd Biesheuvel 194*297565aaSArd Biesheuvel static void xor_arm64_eor3_3(unsigned long bytes, 195*297565aaSArd Biesheuvel unsigned long * __restrict p1, 196*297565aaSArd Biesheuvel const unsigned long * __restrict p2, 197*297565aaSArd Biesheuvel const unsigned long * __restrict p3) 1982c54b423SArd Biesheuvel { 1992c54b423SArd Biesheuvel uint64_t *dp1 = (uint64_t *)p1; 2002c54b423SArd Biesheuvel uint64_t *dp2 = (uint64_t *)p2; 2012c54b423SArd Biesheuvel uint64_t *dp3 = (uint64_t *)p3; 2022c54b423SArd Biesheuvel 2032c54b423SArd Biesheuvel register uint64x2_t v0, v1, v2, v3; 2042c54b423SArd Biesheuvel long lines = bytes / (sizeof(uint64x2_t) * 4); 2052c54b423SArd Biesheuvel 2062c54b423SArd Biesheuvel do { 2072c54b423SArd Biesheuvel /* p1 ^= p2 ^ p3 */ 2082c54b423SArd Biesheuvel v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0), 2092c54b423SArd Biesheuvel vld1q_u64(dp3 + 0)); 2102c54b423SArd Biesheuvel v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2), 2112c54b423SArd Biesheuvel vld1q_u64(dp3 + 2)); 2122c54b423SArd Biesheuvel v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4), 2132c54b423SArd Biesheuvel vld1q_u64(dp3 + 4)); 2142c54b423SArd Biesheuvel v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6), 2152c54b423SArd Biesheuvel vld1q_u64(dp3 + 6)); 2162c54b423SArd Biesheuvel 2172c54b423SArd Biesheuvel /* store */ 2182c54b423SArd Biesheuvel vst1q_u64(dp1 + 0, v0); 2192c54b423SArd Biesheuvel vst1q_u64(dp1 + 2, v1); 2202c54b423SArd Biesheuvel vst1q_u64(dp1 + 4, v2); 2212c54b423SArd Biesheuvel vst1q_u64(dp1 + 6, v3); 2222c54b423SArd Biesheuvel 2232c54b423SArd Biesheuvel dp1 += 8; 2242c54b423SArd Biesheuvel dp2 += 8; 2252c54b423SArd Biesheuvel dp3 += 8; 2262c54b423SArd Biesheuvel } while (--lines > 0); 2272c54b423SArd Biesheuvel } 2282c54b423SArd Biesheuvel 229*297565aaSArd Biesheuvel static void xor_arm64_eor3_4(unsigned long bytes, 230*297565aaSArd Biesheuvel unsigned long * __restrict p1, 231*297565aaSArd Biesheuvel const unsigned long * __restrict p2, 232*297565aaSArd Biesheuvel const unsigned long * __restrict p3, 233*297565aaSArd Biesheuvel const unsigned long * __restrict p4) 2342c54b423SArd Biesheuvel { 2352c54b423SArd Biesheuvel uint64_t *dp1 = (uint64_t *)p1; 2362c54b423SArd Biesheuvel uint64_t *dp2 = (uint64_t *)p2; 2372c54b423SArd Biesheuvel uint64_t *dp3 = (uint64_t *)p3; 2382c54b423SArd Biesheuvel uint64_t *dp4 = (uint64_t *)p4; 2392c54b423SArd Biesheuvel 2402c54b423SArd Biesheuvel register uint64x2_t v0, v1, v2, v3; 2412c54b423SArd Biesheuvel long lines = bytes / (sizeof(uint64x2_t) * 4); 2422c54b423SArd Biesheuvel 2432c54b423SArd Biesheuvel do { 2442c54b423SArd Biesheuvel /* p1 ^= p2 ^ p3 */ 2452c54b423SArd Biesheuvel v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0), 2462c54b423SArd Biesheuvel vld1q_u64(dp3 + 0)); 2472c54b423SArd Biesheuvel v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2), 2482c54b423SArd Biesheuvel vld1q_u64(dp3 + 2)); 2492c54b423SArd Biesheuvel v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4), 2502c54b423SArd Biesheuvel vld1q_u64(dp3 + 4)); 2512c54b423SArd Biesheuvel v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6), 2522c54b423SArd Biesheuvel vld1q_u64(dp3 + 6)); 2532c54b423SArd Biesheuvel 2542c54b423SArd Biesheuvel /* p1 ^= p4 */ 2552c54b423SArd Biesheuvel v0 = veorq_u64(v0, vld1q_u64(dp4 + 0)); 2562c54b423SArd Biesheuvel v1 = veorq_u64(v1, vld1q_u64(dp4 + 2)); 2572c54b423SArd Biesheuvel v2 = veorq_u64(v2, vld1q_u64(dp4 + 4)); 2582c54b423SArd Biesheuvel v3 = veorq_u64(v3, vld1q_u64(dp4 + 6)); 2592c54b423SArd Biesheuvel 2602c54b423SArd Biesheuvel /* store */ 2612c54b423SArd Biesheuvel vst1q_u64(dp1 + 0, v0); 2622c54b423SArd Biesheuvel vst1q_u64(dp1 + 2, v1); 2632c54b423SArd Biesheuvel vst1q_u64(dp1 + 4, v2); 2642c54b423SArd Biesheuvel vst1q_u64(dp1 + 6, v3); 2652c54b423SArd Biesheuvel 2662c54b423SArd Biesheuvel dp1 += 8; 2672c54b423SArd Biesheuvel dp2 += 8; 2682c54b423SArd Biesheuvel dp3 += 8; 2692c54b423SArd Biesheuvel dp4 += 8; 2702c54b423SArd Biesheuvel } while (--lines > 0); 2712c54b423SArd Biesheuvel } 2722c54b423SArd Biesheuvel 273*297565aaSArd Biesheuvel static void xor_arm64_eor3_5(unsigned long bytes, 274*297565aaSArd Biesheuvel unsigned long * __restrict p1, 275*297565aaSArd Biesheuvel const unsigned long * __restrict p2, 276*297565aaSArd Biesheuvel const unsigned long * __restrict p3, 277*297565aaSArd Biesheuvel const unsigned long * __restrict p4, 278*297565aaSArd Biesheuvel const unsigned long * __restrict p5) 2792c54b423SArd Biesheuvel { 2802c54b423SArd Biesheuvel uint64_t *dp1 = (uint64_t *)p1; 2812c54b423SArd Biesheuvel uint64_t *dp2 = (uint64_t *)p2; 2822c54b423SArd Biesheuvel uint64_t *dp3 = (uint64_t *)p3; 2832c54b423SArd Biesheuvel uint64_t *dp4 = (uint64_t *)p4; 2842c54b423SArd Biesheuvel uint64_t *dp5 = (uint64_t *)p5; 2852c54b423SArd Biesheuvel 2862c54b423SArd Biesheuvel register uint64x2_t v0, v1, v2, v3; 2872c54b423SArd Biesheuvel long lines = bytes / (sizeof(uint64x2_t) * 4); 2882c54b423SArd Biesheuvel 2892c54b423SArd Biesheuvel do { 2902c54b423SArd Biesheuvel /* p1 ^= p2 ^ p3 */ 2912c54b423SArd Biesheuvel v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0), 2922c54b423SArd Biesheuvel vld1q_u64(dp3 + 0)); 2932c54b423SArd Biesheuvel v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2), 2942c54b423SArd Biesheuvel vld1q_u64(dp3 + 2)); 2952c54b423SArd Biesheuvel v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4), 2962c54b423SArd Biesheuvel vld1q_u64(dp3 + 4)); 2972c54b423SArd Biesheuvel v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6), 2982c54b423SArd Biesheuvel vld1q_u64(dp3 + 6)); 2992c54b423SArd Biesheuvel 3002c54b423SArd Biesheuvel /* p1 ^= p4 ^ p5 */ 3012c54b423SArd Biesheuvel v0 = eor3(v0, vld1q_u64(dp4 + 0), vld1q_u64(dp5 + 0)); 3022c54b423SArd Biesheuvel v1 = eor3(v1, vld1q_u64(dp4 + 2), vld1q_u64(dp5 + 2)); 3032c54b423SArd Biesheuvel v2 = eor3(v2, vld1q_u64(dp4 + 4), vld1q_u64(dp5 + 4)); 3042c54b423SArd Biesheuvel v3 = eor3(v3, vld1q_u64(dp4 + 6), vld1q_u64(dp5 + 6)); 3052c54b423SArd Biesheuvel 3062c54b423SArd Biesheuvel /* store */ 3072c54b423SArd Biesheuvel vst1q_u64(dp1 + 0, v0); 3082c54b423SArd Biesheuvel vst1q_u64(dp1 + 2, v1); 3092c54b423SArd Biesheuvel vst1q_u64(dp1 + 4, v2); 3102c54b423SArd Biesheuvel vst1q_u64(dp1 + 6, v3); 3112c54b423SArd Biesheuvel 3122c54b423SArd Biesheuvel dp1 += 8; 3132c54b423SArd Biesheuvel dp2 += 8; 3142c54b423SArd Biesheuvel dp3 += 8; 3152c54b423SArd Biesheuvel dp4 += 8; 3162c54b423SArd Biesheuvel dp5 += 8; 3172c54b423SArd Biesheuvel } while (--lines > 0); 3182c54b423SArd Biesheuvel } 3192c54b423SArd Biesheuvel 3202c54b423SArd Biesheuvel static int __init xor_neon_init(void) 3212c54b423SArd Biesheuvel { 3222c54b423SArd Biesheuvel if (IS_ENABLED(CONFIG_AS_HAS_SHA3) && cpu_have_named_feature(SHA3)) { 3232c54b423SArd Biesheuvel xor_block_inner_neon.do_3 = xor_arm64_eor3_3; 3242c54b423SArd Biesheuvel xor_block_inner_neon.do_4 = xor_arm64_eor3_4; 3252c54b423SArd Biesheuvel xor_block_inner_neon.do_5 = xor_arm64_eor3_5; 3262c54b423SArd Biesheuvel } 3272c54b423SArd Biesheuvel return 0; 3282c54b423SArd Biesheuvel } 3292c54b423SArd Biesheuvel module_init(xor_neon_init); 3302c54b423SArd Biesheuvel 3312c54b423SArd Biesheuvel static void __exit xor_neon_exit(void) 3322c54b423SArd Biesheuvel { 3332c54b423SArd Biesheuvel } 3342c54b423SArd Biesheuvel module_exit(xor_neon_exit); 3352c54b423SArd Biesheuvel 336cc9f8349SJackie Liu MODULE_AUTHOR("Jackie Liu <liuyun01@kylinos.cn>"); 337cc9f8349SJackie Liu MODULE_DESCRIPTION("ARMv8 XOR Extensions"); 338cc9f8349SJackie Liu MODULE_LICENSE("GPL"); 339