1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
4  *
5  * Template for XOR operations, instantiated in xor_simd.c.
6  *
7  * Expected preprocessor definitions:
8  *
9  * - LINE_WIDTH
10  * - XOR_FUNC_NAME(nr)
11  * - LD_INOUT_LINE(buf)
12  * - LD_AND_XOR_LINE(buf)
13  * - ST_LINE(buf)
14  */
15 
16 void XOR_FUNC_NAME(2)(unsigned long bytes,
17 		      unsigned long * __restrict v1,
18 		      const unsigned long * __restrict v2)
19 {
20 	unsigned long lines = bytes / LINE_WIDTH;
21 
22 	do {
23 		__asm__ __volatile__ (
24 			LD_INOUT_LINE(v1)
25 			LD_AND_XOR_LINE(v2)
26 			ST_LINE(v1)
27 		: : [v1] "r"(v1), [v2] "r"(v2) : "memory"
28 		);
29 
30 		v1 += LINE_WIDTH / sizeof(unsigned long);
31 		v2 += LINE_WIDTH / sizeof(unsigned long);
32 	} while (--lines > 0);
33 }
34 
35 void XOR_FUNC_NAME(3)(unsigned long bytes,
36 		      unsigned long * __restrict v1,
37 		      const unsigned long * __restrict v2,
38 		      const unsigned long * __restrict v3)
39 {
40 	unsigned long lines = bytes / LINE_WIDTH;
41 
42 	do {
43 		__asm__ __volatile__ (
44 			LD_INOUT_LINE(v1)
45 			LD_AND_XOR_LINE(v2)
46 			LD_AND_XOR_LINE(v3)
47 			ST_LINE(v1)
48 		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
49 		);
50 
51 		v1 += LINE_WIDTH / sizeof(unsigned long);
52 		v2 += LINE_WIDTH / sizeof(unsigned long);
53 		v3 += LINE_WIDTH / sizeof(unsigned long);
54 	} while (--lines > 0);
55 }
56 
57 void XOR_FUNC_NAME(4)(unsigned long bytes,
58 		      unsigned long * __restrict v1,
59 		      const unsigned long * __restrict v2,
60 		      const unsigned long * __restrict v3,
61 		      const unsigned long * __restrict v4)
62 {
63 	unsigned long lines = bytes / LINE_WIDTH;
64 
65 	do {
66 		__asm__ __volatile__ (
67 			LD_INOUT_LINE(v1)
68 			LD_AND_XOR_LINE(v2)
69 			LD_AND_XOR_LINE(v3)
70 			LD_AND_XOR_LINE(v4)
71 			ST_LINE(v1)
72 		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
73 		: "memory"
74 		);
75 
76 		v1 += LINE_WIDTH / sizeof(unsigned long);
77 		v2 += LINE_WIDTH / sizeof(unsigned long);
78 		v3 += LINE_WIDTH / sizeof(unsigned long);
79 		v4 += LINE_WIDTH / sizeof(unsigned long);
80 	} while (--lines > 0);
81 }
82 
83 void XOR_FUNC_NAME(5)(unsigned long bytes,
84 		      unsigned long * __restrict v1,
85 		      const unsigned long * __restrict v2,
86 		      const unsigned long * __restrict v3,
87 		      const unsigned long * __restrict v4,
88 		      const unsigned long * __restrict v5)
89 {
90 	unsigned long lines = bytes / LINE_WIDTH;
91 
92 	do {
93 		__asm__ __volatile__ (
94 			LD_INOUT_LINE(v1)
95 			LD_AND_XOR_LINE(v2)
96 			LD_AND_XOR_LINE(v3)
97 			LD_AND_XOR_LINE(v4)
98 			LD_AND_XOR_LINE(v5)
99 			ST_LINE(v1)
100 		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
101 		    [v5] "r"(v5) : "memory"
102 		);
103 
104 		v1 += LINE_WIDTH / sizeof(unsigned long);
105 		v2 += LINE_WIDTH / sizeof(unsigned long);
106 		v3 += LINE_WIDTH / sizeof(unsigned long);
107 		v4 += LINE_WIDTH / sizeof(unsigned long);
108 		v5 += LINE_WIDTH / sizeof(unsigned long);
109 	} while (--lines > 0);
110 }
111