1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * arch/arm/include/asm/xor.h 4 * 5 * Copyright (C) 2001 Russell King 6 */ 7 #include <linux/hardirq.h> 8 #include <asm-generic/xor.h> 9 #include <asm/hwcap.h> 10 #include <asm/neon.h> 11 12 #define __XOR(a1, a2) a1 ^= a2 13 14 #define GET_BLOCK_2(dst) \ 15 __asm__("ldmia %0, {%1, %2}" \ 16 : "=r" (dst), "=r" (a1), "=r" (a2) \ 17 : "0" (dst)) 18 19 #define GET_BLOCK_4(dst) \ 20 __asm__("ldmia %0, {%1, %2, %3, %4}" \ 21 : "=r" (dst), "=r" (a1), "=r" (a2), "=r" (a3), "=r" (a4) \ 22 : "0" (dst)) 23 24 #define XOR_BLOCK_2(src) \ 25 __asm__("ldmia %0!, {%1, %2}" \ 26 : "=r" (src), "=r" (b1), "=r" (b2) \ 27 : "0" (src)); \ 28 __XOR(a1, b1); __XOR(a2, b2); 29 30 #define XOR_BLOCK_4(src) \ 31 __asm__("ldmia %0!, {%1, %2, %3, %4}" \ 32 : "=r" (src), "=r" (b1), "=r" (b2), "=r" (b3), "=r" (b4) \ 33 : "0" (src)); \ 34 __XOR(a1, b1); __XOR(a2, b2); __XOR(a3, b3); __XOR(a4, b4) 35 36 #define PUT_BLOCK_2(dst) \ 37 __asm__ __volatile__("stmia %0!, {%2, %3}" \ 38 : "=r" (dst) \ 39 : "0" (dst), "r" (a1), "r" (a2)) 40 41 #define PUT_BLOCK_4(dst) \ 42 __asm__ __volatile__("stmia %0!, {%2, %3, %4, %5}" \ 43 : "=r" (dst) \ 44 : "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4)) 45 46 static void 47 xor_arm4regs_2(unsigned long bytes, unsigned long * __restrict p1, 48 const unsigned long * __restrict p2) 49 { 50 unsigned int lines = bytes / sizeof(unsigned long) / 4; 51 register unsigned int a1 __asm__("r4"); 52 register unsigned int a2 __asm__("r5"); 53 register unsigned int a3 __asm__("r6"); 54 register unsigned int a4 __asm__("r7"); 55 register unsigned int b1 __asm__("r8"); 56 register unsigned int b2 __asm__("r9"); 57 register unsigned int b3 __asm__("ip"); 58 register unsigned int b4 __asm__("lr"); 59 60 do { 61 GET_BLOCK_4(p1); 62 XOR_BLOCK_4(p2); 63 PUT_BLOCK_4(p1); 64 } while (--lines); 65 } 66 67 static void 68 xor_arm4regs_3(unsigned long bytes, unsigned long * __restrict p1, 69 const unsigned long * __restrict p2, 70 const unsigned long * __restrict p3) 71 { 72 unsigned int lines = bytes / sizeof(unsigned long) / 4; 73 register unsigned int a1 __asm__("r4"); 74 register unsigned int a2 __asm__("r5"); 75 register unsigned int a3 __asm__("r6"); 76 register unsigned int a4 __asm__("r7"); 77 register unsigned int b1 __asm__("r8"); 78 register unsigned int b2 __asm__("r9"); 79 register unsigned int b3 __asm__("ip"); 80 register unsigned int b4 __asm__("lr"); 81 82 do { 83 GET_BLOCK_4(p1); 84 XOR_BLOCK_4(p2); 85 XOR_BLOCK_4(p3); 86 PUT_BLOCK_4(p1); 87 } while (--lines); 88 } 89 90 static void 91 xor_arm4regs_4(unsigned long bytes, unsigned long * __restrict p1, 92 const unsigned long * __restrict p2, 93 const unsigned long * __restrict p3, 94 const unsigned long * __restrict p4) 95 { 96 unsigned int lines = bytes / sizeof(unsigned long) / 2; 97 register unsigned int a1 __asm__("r8"); 98 register unsigned int a2 __asm__("r9"); 99 register unsigned int b1 __asm__("ip"); 100 register unsigned int b2 __asm__("lr"); 101 102 do { 103 GET_BLOCK_2(p1); 104 XOR_BLOCK_2(p2); 105 XOR_BLOCK_2(p3); 106 XOR_BLOCK_2(p4); 107 PUT_BLOCK_2(p1); 108 } while (--lines); 109 } 110 111 static void 112 xor_arm4regs_5(unsigned long bytes, unsigned long * __restrict p1, 113 const unsigned long * __restrict p2, 114 const unsigned long * __restrict p3, 115 const unsigned long * __restrict p4, 116 const unsigned long * __restrict p5) 117 { 118 unsigned int lines = bytes / sizeof(unsigned long) / 2; 119 register unsigned int a1 __asm__("r8"); 120 register unsigned int a2 __asm__("r9"); 121 register unsigned int b1 __asm__("ip"); 122 register unsigned int b2 __asm__("lr"); 123 124 do { 125 GET_BLOCK_2(p1); 126 XOR_BLOCK_2(p2); 127 XOR_BLOCK_2(p3); 128 XOR_BLOCK_2(p4); 129 XOR_BLOCK_2(p5); 130 PUT_BLOCK_2(p1); 131 } while (--lines); 132 } 133 134 static struct xor_block_template xor_block_arm4regs = { 135 .name = "arm4regs", 136 .do_2 = xor_arm4regs_2, 137 .do_3 = xor_arm4regs_3, 138 .do_4 = xor_arm4regs_4, 139 .do_5 = xor_arm4regs_5, 140 }; 141 142 #undef XOR_TRY_TEMPLATES 143 #define XOR_TRY_TEMPLATES \ 144 do { \ 145 xor_speed(&xor_block_arm4regs); \ 146 xor_speed(&xor_block_8regs); \ 147 xor_speed(&xor_block_32regs); \ 148 NEON_TEMPLATES; \ 149 } while (0) 150 151 #ifdef CONFIG_KERNEL_MODE_NEON 152 153 extern struct xor_block_template const xor_block_neon_inner; 154 155 static void 156 xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, 157 const unsigned long * __restrict p2) 158 { 159 if (in_interrupt()) { 160 xor_arm4regs_2(bytes, p1, p2); 161 } else { 162 kernel_neon_begin(); 163 xor_block_neon_inner.do_2(bytes, p1, p2); 164 kernel_neon_end(); 165 } 166 } 167 168 static void 169 xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, 170 const unsigned long * __restrict p2, 171 const unsigned long * __restrict p3) 172 { 173 if (in_interrupt()) { 174 xor_arm4regs_3(bytes, p1, p2, p3); 175 } else { 176 kernel_neon_begin(); 177 xor_block_neon_inner.do_3(bytes, p1, p2, p3); 178 kernel_neon_end(); 179 } 180 } 181 182 static void 183 xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, 184 const unsigned long * __restrict p2, 185 const unsigned long * __restrict p3, 186 const unsigned long * __restrict p4) 187 { 188 if (in_interrupt()) { 189 xor_arm4regs_4(bytes, p1, p2, p3, p4); 190 } else { 191 kernel_neon_begin(); 192 xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4); 193 kernel_neon_end(); 194 } 195 } 196 197 static void 198 xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, 199 const unsigned long * __restrict p2, 200 const unsigned long * __restrict p3, 201 const unsigned long * __restrict p4, 202 const unsigned long * __restrict p5) 203 { 204 if (in_interrupt()) { 205 xor_arm4regs_5(bytes, p1, p2, p3, p4, p5); 206 } else { 207 kernel_neon_begin(); 208 xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5); 209 kernel_neon_end(); 210 } 211 } 212 213 static struct xor_block_template xor_block_neon = { 214 .name = "neon", 215 .do_2 = xor_neon_2, 216 .do_3 = xor_neon_3, 217 .do_4 = xor_neon_4, 218 .do_5 = xor_neon_5 219 }; 220 221 #define NEON_TEMPLATES \ 222 do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0) 223 #else 224 #define NEON_TEMPLATES 225 #endif 226