1d2912cb1SThomas Gleixner /* SPDX-License-Identifier: GPL-2.0-only */
24baa9922SRussell King /*
34baa9922SRussell King * arch/arm/include/asm/xor.h
44baa9922SRussell King *
54baa9922SRussell King * Copyright (C) 2001 Russell King
64baa9922SRussell King */
701956597SArd Biesheuvel #include <linux/hardirq.h>
84baa9922SRussell King #include <asm-generic/xor.h>
901956597SArd Biesheuvel #include <asm/hwcap.h>
1001956597SArd Biesheuvel #include <asm/neon.h>
114baa9922SRussell King
124baa9922SRussell King #define __XOR(a1, a2) a1 ^= a2
134baa9922SRussell King
144baa9922SRussell King #define GET_BLOCK_2(dst) \
154baa9922SRussell King __asm__("ldmia %0, {%1, %2}" \
164baa9922SRussell King : "=r" (dst), "=r" (a1), "=r" (a2) \
174baa9922SRussell King : "0" (dst))
184baa9922SRussell King
194baa9922SRussell King #define GET_BLOCK_4(dst) \
204baa9922SRussell King __asm__("ldmia %0, {%1, %2, %3, %4}" \
214baa9922SRussell King : "=r" (dst), "=r" (a1), "=r" (a2), "=r" (a3), "=r" (a4) \
224baa9922SRussell King : "0" (dst))
234baa9922SRussell King
244baa9922SRussell King #define XOR_BLOCK_2(src) \
254baa9922SRussell King __asm__("ldmia %0!, {%1, %2}" \
264baa9922SRussell King : "=r" (src), "=r" (b1), "=r" (b2) \
274baa9922SRussell King : "0" (src)); \
284baa9922SRussell King __XOR(a1, b1); __XOR(a2, b2);
294baa9922SRussell King
304baa9922SRussell King #define XOR_BLOCK_4(src) \
314baa9922SRussell King __asm__("ldmia %0!, {%1, %2, %3, %4}" \
324baa9922SRussell King : "=r" (src), "=r" (b1), "=r" (b2), "=r" (b3), "=r" (b4) \
334baa9922SRussell King : "0" (src)); \
344baa9922SRussell King __XOR(a1, b1); __XOR(a2, b2); __XOR(a3, b3); __XOR(a4, b4)
354baa9922SRussell King
364baa9922SRussell King #define PUT_BLOCK_2(dst) \
374baa9922SRussell King __asm__ __volatile__("stmia %0!, {%2, %3}" \
384baa9922SRussell King : "=r" (dst) \
394baa9922SRussell King : "0" (dst), "r" (a1), "r" (a2))
404baa9922SRussell King
414baa9922SRussell King #define PUT_BLOCK_4(dst) \
424baa9922SRussell King __asm__ __volatile__("stmia %0!, {%2, %3, %4, %5}" \
434baa9922SRussell King : "=r" (dst) \
444baa9922SRussell King : "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4))
454baa9922SRussell King
464baa9922SRussell King static void
xor_arm4regs_2(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2)47297565aaSArd Biesheuvel xor_arm4regs_2(unsigned long bytes, unsigned long * __restrict p1,
48297565aaSArd Biesheuvel const unsigned long * __restrict p2)
494baa9922SRussell King {
504baa9922SRussell King unsigned int lines = bytes / sizeof(unsigned long) / 4;
514baa9922SRussell King register unsigned int a1 __asm__("r4");
524baa9922SRussell King register unsigned int a2 __asm__("r5");
534baa9922SRussell King register unsigned int a3 __asm__("r6");
54*527d0863SNick Desaulniers register unsigned int a4 __asm__("r10");
554baa9922SRussell King register unsigned int b1 __asm__("r8");
564baa9922SRussell King register unsigned int b2 __asm__("r9");
574baa9922SRussell King register unsigned int b3 __asm__("ip");
584baa9922SRussell King register unsigned int b4 __asm__("lr");
594baa9922SRussell King
604baa9922SRussell King do {
614baa9922SRussell King GET_BLOCK_4(p1);
624baa9922SRussell King XOR_BLOCK_4(p2);
634baa9922SRussell King PUT_BLOCK_4(p1);
644baa9922SRussell King } while (--lines);
654baa9922SRussell King }
664baa9922SRussell King
674baa9922SRussell King static void
xor_arm4regs_3(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3)68297565aaSArd Biesheuvel xor_arm4regs_3(unsigned long bytes, unsigned long * __restrict p1,
69297565aaSArd Biesheuvel const unsigned long * __restrict p2,
70297565aaSArd Biesheuvel const unsigned long * __restrict p3)
714baa9922SRussell King {
724baa9922SRussell King unsigned int lines = bytes / sizeof(unsigned long) / 4;
734baa9922SRussell King register unsigned int a1 __asm__("r4");
744baa9922SRussell King register unsigned int a2 __asm__("r5");
754baa9922SRussell King register unsigned int a3 __asm__("r6");
76*527d0863SNick Desaulniers register unsigned int a4 __asm__("r10");
774baa9922SRussell King register unsigned int b1 __asm__("r8");
784baa9922SRussell King register unsigned int b2 __asm__("r9");
794baa9922SRussell King register unsigned int b3 __asm__("ip");
804baa9922SRussell King register unsigned int b4 __asm__("lr");
814baa9922SRussell King
824baa9922SRussell King do {
834baa9922SRussell King GET_BLOCK_4(p1);
844baa9922SRussell King XOR_BLOCK_4(p2);
854baa9922SRussell King XOR_BLOCK_4(p3);
864baa9922SRussell King PUT_BLOCK_4(p1);
874baa9922SRussell King } while (--lines);
884baa9922SRussell King }
894baa9922SRussell King
904baa9922SRussell King static void
xor_arm4regs_4(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3,const unsigned long * __restrict p4)91297565aaSArd Biesheuvel xor_arm4regs_4(unsigned long bytes, unsigned long * __restrict p1,
92297565aaSArd Biesheuvel const unsigned long * __restrict p2,
93297565aaSArd Biesheuvel const unsigned long * __restrict p3,
94297565aaSArd Biesheuvel const unsigned long * __restrict p4)
954baa9922SRussell King {
964baa9922SRussell King unsigned int lines = bytes / sizeof(unsigned long) / 2;
974baa9922SRussell King register unsigned int a1 __asm__("r8");
984baa9922SRussell King register unsigned int a2 __asm__("r9");
994baa9922SRussell King register unsigned int b1 __asm__("ip");
1004baa9922SRussell King register unsigned int b2 __asm__("lr");
1014baa9922SRussell King
1024baa9922SRussell King do {
1034baa9922SRussell King GET_BLOCK_2(p1);
1044baa9922SRussell King XOR_BLOCK_2(p2);
1054baa9922SRussell King XOR_BLOCK_2(p3);
1064baa9922SRussell King XOR_BLOCK_2(p4);
1074baa9922SRussell King PUT_BLOCK_2(p1);
1084baa9922SRussell King } while (--lines);
1094baa9922SRussell King }
1104baa9922SRussell King
1114baa9922SRussell King static void
xor_arm4regs_5(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3,const unsigned long * __restrict p4,const unsigned long * __restrict p5)112297565aaSArd Biesheuvel xor_arm4regs_5(unsigned long bytes, unsigned long * __restrict p1,
113297565aaSArd Biesheuvel const unsigned long * __restrict p2,
114297565aaSArd Biesheuvel const unsigned long * __restrict p3,
115297565aaSArd Biesheuvel const unsigned long * __restrict p4,
116297565aaSArd Biesheuvel const unsigned long * __restrict p5)
1174baa9922SRussell King {
1184baa9922SRussell King unsigned int lines = bytes / sizeof(unsigned long) / 2;
1194baa9922SRussell King register unsigned int a1 __asm__("r8");
1204baa9922SRussell King register unsigned int a2 __asm__("r9");
1214baa9922SRussell King register unsigned int b1 __asm__("ip");
1224baa9922SRussell King register unsigned int b2 __asm__("lr");
1234baa9922SRussell King
1244baa9922SRussell King do {
1254baa9922SRussell King GET_BLOCK_2(p1);
1264baa9922SRussell King XOR_BLOCK_2(p2);
1274baa9922SRussell King XOR_BLOCK_2(p3);
1284baa9922SRussell King XOR_BLOCK_2(p4);
1294baa9922SRussell King XOR_BLOCK_2(p5);
1304baa9922SRussell King PUT_BLOCK_2(p1);
1314baa9922SRussell King } while (--lines);
1324baa9922SRussell King }
1334baa9922SRussell King
1344baa9922SRussell King static struct xor_block_template xor_block_arm4regs = {
1354baa9922SRussell King .name = "arm4regs",
1364baa9922SRussell King .do_2 = xor_arm4regs_2,
1374baa9922SRussell King .do_3 = xor_arm4regs_3,
1384baa9922SRussell King .do_4 = xor_arm4regs_4,
1394baa9922SRussell King .do_5 = xor_arm4regs_5,
1404baa9922SRussell King };
1414baa9922SRussell King
1424baa9922SRussell King #undef XOR_TRY_TEMPLATES
1434baa9922SRussell King #define XOR_TRY_TEMPLATES \
1444baa9922SRussell King do { \
1454baa9922SRussell King xor_speed(&xor_block_arm4regs); \
1464baa9922SRussell King xor_speed(&xor_block_8regs); \
1474baa9922SRussell King xor_speed(&xor_block_32regs); \
14801956597SArd Biesheuvel NEON_TEMPLATES; \
1494baa9922SRussell King } while (0)
15001956597SArd Biesheuvel
15101956597SArd Biesheuvel #ifdef CONFIG_KERNEL_MODE_NEON
15201956597SArd Biesheuvel
15301956597SArd Biesheuvel extern struct xor_block_template const xor_block_neon_inner;
15401956597SArd Biesheuvel
15501956597SArd Biesheuvel static void
xor_neon_2(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2)156297565aaSArd Biesheuvel xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
157297565aaSArd Biesheuvel const unsigned long * __restrict p2)
15801956597SArd Biesheuvel {
15901956597SArd Biesheuvel if (in_interrupt()) {
16001956597SArd Biesheuvel xor_arm4regs_2(bytes, p1, p2);
16101956597SArd Biesheuvel } else {
16201956597SArd Biesheuvel kernel_neon_begin();
16301956597SArd Biesheuvel xor_block_neon_inner.do_2(bytes, p1, p2);
16401956597SArd Biesheuvel kernel_neon_end();
16501956597SArd Biesheuvel }
16601956597SArd Biesheuvel }
16701956597SArd Biesheuvel
16801956597SArd Biesheuvel static void
xor_neon_3(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3)169297565aaSArd Biesheuvel xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
170297565aaSArd Biesheuvel const unsigned long * __restrict p2,
171297565aaSArd Biesheuvel const unsigned long * __restrict p3)
17201956597SArd Biesheuvel {
17301956597SArd Biesheuvel if (in_interrupt()) {
17401956597SArd Biesheuvel xor_arm4regs_3(bytes, p1, p2, p3);
17501956597SArd Biesheuvel } else {
17601956597SArd Biesheuvel kernel_neon_begin();
17701956597SArd Biesheuvel xor_block_neon_inner.do_3(bytes, p1, p2, p3);
17801956597SArd Biesheuvel kernel_neon_end();
17901956597SArd Biesheuvel }
18001956597SArd Biesheuvel }
18101956597SArd Biesheuvel
18201956597SArd Biesheuvel static void
xor_neon_4(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3,const unsigned long * __restrict p4)183297565aaSArd Biesheuvel xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
184297565aaSArd Biesheuvel const unsigned long * __restrict p2,
185297565aaSArd Biesheuvel const unsigned long * __restrict p3,
186297565aaSArd Biesheuvel const unsigned long * __restrict p4)
18701956597SArd Biesheuvel {
18801956597SArd Biesheuvel if (in_interrupt()) {
18901956597SArd Biesheuvel xor_arm4regs_4(bytes, p1, p2, p3, p4);
19001956597SArd Biesheuvel } else {
19101956597SArd Biesheuvel kernel_neon_begin();
19201956597SArd Biesheuvel xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4);
19301956597SArd Biesheuvel kernel_neon_end();
19401956597SArd Biesheuvel }
19501956597SArd Biesheuvel }
19601956597SArd Biesheuvel
19701956597SArd Biesheuvel static void
xor_neon_5(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3,const unsigned long * __restrict p4,const unsigned long * __restrict p5)198297565aaSArd Biesheuvel xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
199297565aaSArd Biesheuvel const unsigned long * __restrict p2,
200297565aaSArd Biesheuvel const unsigned long * __restrict p3,
201297565aaSArd Biesheuvel const unsigned long * __restrict p4,
202297565aaSArd Biesheuvel const unsigned long * __restrict p5)
20301956597SArd Biesheuvel {
20401956597SArd Biesheuvel if (in_interrupt()) {
20501956597SArd Biesheuvel xor_arm4regs_5(bytes, p1, p2, p3, p4, p5);
20601956597SArd Biesheuvel } else {
20701956597SArd Biesheuvel kernel_neon_begin();
20801956597SArd Biesheuvel xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5);
20901956597SArd Biesheuvel kernel_neon_end();
21001956597SArd Biesheuvel }
21101956597SArd Biesheuvel }
21201956597SArd Biesheuvel
21301956597SArd Biesheuvel static struct xor_block_template xor_block_neon = {
21401956597SArd Biesheuvel .name = "neon",
21501956597SArd Biesheuvel .do_2 = xor_neon_2,
21601956597SArd Biesheuvel .do_3 = xor_neon_3,
21701956597SArd Biesheuvel .do_4 = xor_neon_4,
21801956597SArd Biesheuvel .do_5 = xor_neon_5
21901956597SArd Biesheuvel };
22001956597SArd Biesheuvel
22101956597SArd Biesheuvel #define NEON_TEMPLATES \
22201956597SArd Biesheuvel do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0)
22301956597SArd Biesheuvel #else
22401956597SArd Biesheuvel #define NEON_TEMPLATES
22501956597SArd Biesheuvel #endif
226