1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * arch/arm/include/asm/xor.h 4 * 5 * Copyright (C) 2001 Russell King 6 */ 7 #include <linux/hardirq.h> 8 #include <asm-generic/xor.h> 9 #include <asm/hwcap.h> 10 #include <asm/neon.h> 11 12 #define __XOR(a1, a2) a1 ^= a2 13 14 #define GET_BLOCK_2(dst) \ 15 __asm__("ldmia %0, {%1, %2}" \ 16 : "=r" (dst), "=r" (a1), "=r" (a2) \ 17 : "0" (dst)) 18 19 #define GET_BLOCK_4(dst) \ 20 __asm__("ldmia %0, {%1, %2, %3, %4}" \ 21 : "=r" (dst), "=r" (a1), "=r" (a2), "=r" (a3), "=r" (a4) \ 22 : "0" (dst)) 23 24 #define XOR_BLOCK_2(src) \ 25 __asm__("ldmia %0!, {%1, %2}" \ 26 : "=r" (src), "=r" (b1), "=r" (b2) \ 27 : "0" (src)); \ 28 __XOR(a1, b1); __XOR(a2, b2); 29 30 #define XOR_BLOCK_4(src) \ 31 __asm__("ldmia %0!, {%1, %2, %3, %4}" \ 32 : "=r" (src), "=r" (b1), "=r" (b2), "=r" (b3), "=r" (b4) \ 33 : "0" (src)); \ 34 __XOR(a1, b1); __XOR(a2, b2); __XOR(a3, b3); __XOR(a4, b4) 35 36 #define PUT_BLOCK_2(dst) \ 37 __asm__ __volatile__("stmia %0!, {%2, %3}" \ 38 : "=r" (dst) \ 39 : "0" (dst), "r" (a1), "r" (a2)) 40 41 #define PUT_BLOCK_4(dst) \ 42 __asm__ __volatile__("stmia %0!, {%2, %3, %4, %5}" \ 43 : "=r" (dst) \ 44 : "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4)) 45 46 static void 47 xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 48 { 49 unsigned int lines = bytes / sizeof(unsigned long) / 4; 50 register unsigned int a1 __asm__("r4"); 51 register unsigned int a2 __asm__("r5"); 52 register unsigned int a3 __asm__("r6"); 53 register unsigned int a4 __asm__("r7"); 54 register unsigned int b1 __asm__("r8"); 55 register unsigned int b2 __asm__("r9"); 56 register unsigned int b3 __asm__("ip"); 57 register unsigned int b4 __asm__("lr"); 58 59 do { 60 GET_BLOCK_4(p1); 61 XOR_BLOCK_4(p2); 62 PUT_BLOCK_4(p1); 63 } while (--lines); 64 } 65 66 static void 67 xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, 68 unsigned long *p3) 69 { 70 unsigned int lines = bytes / sizeof(unsigned long) / 4; 71 register unsigned int a1 __asm__("r4"); 72 register unsigned int a2 __asm__("r5"); 73 register unsigned int a3 __asm__("r6"); 74 register unsigned int a4 __asm__("r7"); 75 register unsigned int b1 __asm__("r8"); 76 register unsigned int b2 __asm__("r9"); 77 register unsigned int b3 __asm__("ip"); 78 register unsigned int b4 __asm__("lr"); 79 80 do { 81 GET_BLOCK_4(p1); 82 XOR_BLOCK_4(p2); 83 XOR_BLOCK_4(p3); 84 PUT_BLOCK_4(p1); 85 } while (--lines); 86 } 87 88 static void 89 xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, 90 unsigned long *p3, unsigned long *p4) 91 { 92 unsigned int lines = bytes / sizeof(unsigned long) / 2; 93 register unsigned int a1 __asm__("r8"); 94 register unsigned int a2 __asm__("r9"); 95 register unsigned int b1 __asm__("ip"); 96 register unsigned int b2 __asm__("lr"); 97 98 do { 99 GET_BLOCK_2(p1); 100 XOR_BLOCK_2(p2); 101 XOR_BLOCK_2(p3); 102 XOR_BLOCK_2(p4); 103 PUT_BLOCK_2(p1); 104 } while (--lines); 105 } 106 107 static void 108 xor_arm4regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, 109 unsigned long *p3, unsigned long *p4, unsigned long *p5) 110 { 111 unsigned int lines = bytes / sizeof(unsigned long) / 2; 112 register unsigned int a1 __asm__("r8"); 113 register unsigned int a2 __asm__("r9"); 114 register unsigned int b1 __asm__("ip"); 115 register unsigned int b2 __asm__("lr"); 116 117 do { 118 GET_BLOCK_2(p1); 119 XOR_BLOCK_2(p2); 120 XOR_BLOCK_2(p3); 121 XOR_BLOCK_2(p4); 122 XOR_BLOCK_2(p5); 123 PUT_BLOCK_2(p1); 124 } while (--lines); 125 } 126 127 static struct xor_block_template xor_block_arm4regs = { 128 .name = "arm4regs", 129 .do_2 = xor_arm4regs_2, 130 .do_3 = xor_arm4regs_3, 131 .do_4 = xor_arm4regs_4, 132 .do_5 = xor_arm4regs_5, 133 }; 134 135 #undef XOR_TRY_TEMPLATES 136 #define XOR_TRY_TEMPLATES \ 137 do { \ 138 xor_speed(&xor_block_arm4regs); \ 139 xor_speed(&xor_block_8regs); \ 140 xor_speed(&xor_block_32regs); \ 141 NEON_TEMPLATES; \ 142 } while (0) 143 144 #ifdef CONFIG_KERNEL_MODE_NEON 145 146 extern struct xor_block_template const xor_block_neon_inner; 147 148 static void 149 xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 150 { 151 if (in_interrupt()) { 152 xor_arm4regs_2(bytes, p1, p2); 153 } else { 154 kernel_neon_begin(); 155 xor_block_neon_inner.do_2(bytes, p1, p2); 156 kernel_neon_end(); 157 } 158 } 159 160 static void 161 xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, 162 unsigned long *p3) 163 { 164 if (in_interrupt()) { 165 xor_arm4regs_3(bytes, p1, p2, p3); 166 } else { 167 kernel_neon_begin(); 168 xor_block_neon_inner.do_3(bytes, p1, p2, p3); 169 kernel_neon_end(); 170 } 171 } 172 173 static void 174 xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, 175 unsigned long *p3, unsigned long *p4) 176 { 177 if (in_interrupt()) { 178 xor_arm4regs_4(bytes, p1, p2, p3, p4); 179 } else { 180 kernel_neon_begin(); 181 xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4); 182 kernel_neon_end(); 183 } 184 } 185 186 static void 187 xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, 188 unsigned long *p3, unsigned long *p4, unsigned long *p5) 189 { 190 if (in_interrupt()) { 191 xor_arm4regs_5(bytes, p1, p2, p3, p4, p5); 192 } else { 193 kernel_neon_begin(); 194 xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5); 195 kernel_neon_end(); 196 } 197 } 198 199 static struct xor_block_template xor_block_neon = { 200 .name = "neon", 201 .do_2 = xor_neon_2, 202 .do_3 = xor_neon_3, 203 .do_4 = xor_neon_4, 204 .do_5 = xor_neon_5 205 }; 206 207 #define NEON_TEMPLATES \ 208 do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0) 209 #else 210 #define NEON_TEMPLATES 211 #endif 212