1 /* 2 * arch/arm/include/asm/xor.h 3 * 4 * Copyright (C) 2001 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 #include <linux/hardirq.h> 11 #include <asm-generic/xor.h> 12 #include <asm/hwcap.h> 13 #include <asm/neon.h> 14 15 #define __XOR(a1, a2) a1 ^= a2 16 17 #define GET_BLOCK_2(dst) \ 18 __asm__("ldmia %0, {%1, %2}" \ 19 : "=r" (dst), "=r" (a1), "=r" (a2) \ 20 : "0" (dst)) 21 22 #define GET_BLOCK_4(dst) \ 23 __asm__("ldmia %0, {%1, %2, %3, %4}" \ 24 : "=r" (dst), "=r" (a1), "=r" (a2), "=r" (a3), "=r" (a4) \ 25 : "0" (dst)) 26 27 #define XOR_BLOCK_2(src) \ 28 __asm__("ldmia %0!, {%1, %2}" \ 29 : "=r" (src), "=r" (b1), "=r" (b2) \ 30 : "0" (src)); \ 31 __XOR(a1, b1); __XOR(a2, b2); 32 33 #define XOR_BLOCK_4(src) \ 34 __asm__("ldmia %0!, {%1, %2, %3, %4}" \ 35 : "=r" (src), "=r" (b1), "=r" (b2), "=r" (b3), "=r" (b4) \ 36 : "0" (src)); \ 37 __XOR(a1, b1); __XOR(a2, b2); __XOR(a3, b3); __XOR(a4, b4) 38 39 #define PUT_BLOCK_2(dst) \ 40 __asm__ __volatile__("stmia %0!, {%2, %3}" \ 41 : "=r" (dst) \ 42 : "0" (dst), "r" (a1), "r" (a2)) 43 44 #define PUT_BLOCK_4(dst) \ 45 __asm__ __volatile__("stmia %0!, {%2, %3, %4, %5}" \ 46 : "=r" (dst) \ 47 : "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4)) 48 49 static void 50 xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 51 { 52 unsigned int lines = bytes / sizeof(unsigned long) / 4; 53 register unsigned int a1 __asm__("r4"); 54 register unsigned int a2 __asm__("r5"); 55 register unsigned int a3 __asm__("r6"); 56 register unsigned int a4 __asm__("r7"); 57 register unsigned int b1 __asm__("r8"); 58 register unsigned int b2 __asm__("r9"); 59 register unsigned int b3 __asm__("ip"); 60 register unsigned int b4 __asm__("lr"); 61 62 do { 63 GET_BLOCK_4(p1); 64 XOR_BLOCK_4(p2); 65 PUT_BLOCK_4(p1); 66 } while (--lines); 67 } 68 69 static void 70 xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, 71 unsigned long *p3) 72 { 73 unsigned int lines = bytes / sizeof(unsigned long) / 4; 74 register unsigned int a1 __asm__("r4"); 75 register unsigned int a2 __asm__("r5"); 76 register unsigned int a3 __asm__("r6"); 77 register unsigned int a4 __asm__("r7"); 78 register unsigned int b1 __asm__("r8"); 79 register unsigned int b2 __asm__("r9"); 80 register unsigned int b3 __asm__("ip"); 81 register unsigned int b4 __asm__("lr"); 82 83 do { 84 GET_BLOCK_4(p1); 85 XOR_BLOCK_4(p2); 86 XOR_BLOCK_4(p3); 87 PUT_BLOCK_4(p1); 88 } while (--lines); 89 } 90 91 static void 92 xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, 93 unsigned long *p3, unsigned long *p4) 94 { 95 unsigned int lines = bytes / sizeof(unsigned long) / 2; 96 register unsigned int a1 __asm__("r8"); 97 register unsigned int a2 __asm__("r9"); 98 register unsigned int b1 __asm__("ip"); 99 register unsigned int b2 __asm__("lr"); 100 101 do { 102 GET_BLOCK_2(p1); 103 XOR_BLOCK_2(p2); 104 XOR_BLOCK_2(p3); 105 XOR_BLOCK_2(p4); 106 PUT_BLOCK_2(p1); 107 } while (--lines); 108 } 109 110 static void 111 xor_arm4regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, 112 unsigned long *p3, unsigned long *p4, unsigned long *p5) 113 { 114 unsigned int lines = bytes / sizeof(unsigned long) / 2; 115 register unsigned int a1 __asm__("r8"); 116 register unsigned int a2 __asm__("r9"); 117 register unsigned int b1 __asm__("ip"); 118 register unsigned int b2 __asm__("lr"); 119 120 do { 121 GET_BLOCK_2(p1); 122 XOR_BLOCK_2(p2); 123 XOR_BLOCK_2(p3); 124 XOR_BLOCK_2(p4); 125 XOR_BLOCK_2(p5); 126 PUT_BLOCK_2(p1); 127 } while (--lines); 128 } 129 130 static struct xor_block_template xor_block_arm4regs = { 131 .name = "arm4regs", 132 .do_2 = xor_arm4regs_2, 133 .do_3 = xor_arm4regs_3, 134 .do_4 = xor_arm4regs_4, 135 .do_5 = xor_arm4regs_5, 136 }; 137 138 #undef XOR_TRY_TEMPLATES 139 #define XOR_TRY_TEMPLATES \ 140 do { \ 141 xor_speed(&xor_block_arm4regs); \ 142 xor_speed(&xor_block_8regs); \ 143 xor_speed(&xor_block_32regs); \ 144 NEON_TEMPLATES; \ 145 } while (0) 146 147 #ifdef CONFIG_KERNEL_MODE_NEON 148 149 extern struct xor_block_template const xor_block_neon_inner; 150 151 static void 152 xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 153 { 154 if (in_interrupt()) { 155 xor_arm4regs_2(bytes, p1, p2); 156 } else { 157 kernel_neon_begin(); 158 xor_block_neon_inner.do_2(bytes, p1, p2); 159 kernel_neon_end(); 160 } 161 } 162 163 static void 164 xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, 165 unsigned long *p3) 166 { 167 if (in_interrupt()) { 168 xor_arm4regs_3(bytes, p1, p2, p3); 169 } else { 170 kernel_neon_begin(); 171 xor_block_neon_inner.do_3(bytes, p1, p2, p3); 172 kernel_neon_end(); 173 } 174 } 175 176 static void 177 xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, 178 unsigned long *p3, unsigned long *p4) 179 { 180 if (in_interrupt()) { 181 xor_arm4regs_4(bytes, p1, p2, p3, p4); 182 } else { 183 kernel_neon_begin(); 184 xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4); 185 kernel_neon_end(); 186 } 187 } 188 189 static void 190 xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, 191 unsigned long *p3, unsigned long *p4, unsigned long *p5) 192 { 193 if (in_interrupt()) { 194 xor_arm4regs_5(bytes, p1, p2, p3, p4, p5); 195 } else { 196 kernel_neon_begin(); 197 xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5); 198 kernel_neon_end(); 199 } 200 } 201 202 static struct xor_block_template xor_block_neon = { 203 .name = "neon", 204 .do_2 = xor_neon_2, 205 .do_3 = xor_neon_3, 206 .do_4 = xor_neon_4, 207 .do_5 = xor_neon_5 208 }; 209 210 #define NEON_TEMPLATES \ 211 do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0) 212 #else 213 #define NEON_TEMPLATES 214 #endif 215