xref: /openbmc/linux/arch/powerpc/lib/xor_vmx.c (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   *
4   * Copyright (C) IBM Corporation, 2012
5   *
6   * Author: Anton Blanchard <anton@au.ibm.com>
7   */
8  
9  /*
10   * Sparse (as at v0.5.0) gets very, very confused by this file.
11   * Make it a bit simpler for it.
12   */
13  #if !defined(__CHECKER__)
14  #include <altivec.h>
15  #else
16  #define vec_xor(a, b) a ^ b
17  #define vector __attribute__((vector_size(16)))
18  #endif
19  
20  #include "xor_vmx.h"
21  
22  typedef vector signed char unative_t;
23  
24  #define DEFINE(V)				\
25  	unative_t *V = (unative_t *)V##_in;	\
26  	unative_t V##_0, V##_1, V##_2, V##_3
27  
28  #define LOAD(V)			\
29  	do {			\
30  		V##_0 = V[0];	\
31  		V##_1 = V[1];	\
32  		V##_2 = V[2];	\
33  		V##_3 = V[3];	\
34  	} while (0)
35  
36  #define STORE(V)		\
37  	do {			\
38  		V[0] = V##_0;	\
39  		V[1] = V##_1;	\
40  		V[2] = V##_2;	\
41  		V[3] = V##_3;	\
42  	} while (0)
43  
44  #define XOR(V1, V2)					\
45  	do {						\
46  		V1##_0 = vec_xor(V1##_0, V2##_0);	\
47  		V1##_1 = vec_xor(V1##_1, V2##_1);	\
48  		V1##_2 = vec_xor(V1##_2, V2##_2);	\
49  		V1##_3 = vec_xor(V1##_3, V2##_3);	\
50  	} while (0)
51  
__xor_altivec_2(unsigned long bytes,unsigned long * __restrict v1_in,const unsigned long * __restrict v2_in)52  void __xor_altivec_2(unsigned long bytes,
53  		     unsigned long * __restrict v1_in,
54  		     const unsigned long * __restrict v2_in)
55  {
56  	DEFINE(v1);
57  	DEFINE(v2);
58  	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
59  
60  	do {
61  		LOAD(v1);
62  		LOAD(v2);
63  		XOR(v1, v2);
64  		STORE(v1);
65  
66  		v1 += 4;
67  		v2 += 4;
68  	} while (--lines > 0);
69  }
70  
__xor_altivec_3(unsigned long bytes,unsigned long * __restrict v1_in,const unsigned long * __restrict v2_in,const unsigned long * __restrict v3_in)71  void __xor_altivec_3(unsigned long bytes,
72  		     unsigned long * __restrict v1_in,
73  		     const unsigned long * __restrict v2_in,
74  		     const unsigned long * __restrict v3_in)
75  {
76  	DEFINE(v1);
77  	DEFINE(v2);
78  	DEFINE(v3);
79  	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
80  
81  	do {
82  		LOAD(v1);
83  		LOAD(v2);
84  		LOAD(v3);
85  		XOR(v1, v2);
86  		XOR(v1, v3);
87  		STORE(v1);
88  
89  		v1 += 4;
90  		v2 += 4;
91  		v3 += 4;
92  	} while (--lines > 0);
93  }
94  
__xor_altivec_4(unsigned long bytes,unsigned long * __restrict v1_in,const unsigned long * __restrict v2_in,const unsigned long * __restrict v3_in,const unsigned long * __restrict v4_in)95  void __xor_altivec_4(unsigned long bytes,
96  		     unsigned long * __restrict v1_in,
97  		     const unsigned long * __restrict v2_in,
98  		     const unsigned long * __restrict v3_in,
99  		     const unsigned long * __restrict v4_in)
100  {
101  	DEFINE(v1);
102  	DEFINE(v2);
103  	DEFINE(v3);
104  	DEFINE(v4);
105  	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
106  
107  	do {
108  		LOAD(v1);
109  		LOAD(v2);
110  		LOAD(v3);
111  		LOAD(v4);
112  		XOR(v1, v2);
113  		XOR(v3, v4);
114  		XOR(v1, v3);
115  		STORE(v1);
116  
117  		v1 += 4;
118  		v2 += 4;
119  		v3 += 4;
120  		v4 += 4;
121  	} while (--lines > 0);
122  }
123  
__xor_altivec_5(unsigned long bytes,unsigned long * __restrict v1_in,const unsigned long * __restrict v2_in,const unsigned long * __restrict v3_in,const unsigned long * __restrict v4_in,const unsigned long * __restrict v5_in)124  void __xor_altivec_5(unsigned long bytes,
125  		     unsigned long * __restrict v1_in,
126  		     const unsigned long * __restrict v2_in,
127  		     const unsigned long * __restrict v3_in,
128  		     const unsigned long * __restrict v4_in,
129  		     const unsigned long * __restrict v5_in)
130  {
131  	DEFINE(v1);
132  	DEFINE(v2);
133  	DEFINE(v3);
134  	DEFINE(v4);
135  	DEFINE(v5);
136  	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
137  
138  	do {
139  		LOAD(v1);
140  		LOAD(v2);
141  		LOAD(v3);
142  		LOAD(v4);
143  		LOAD(v5);
144  		XOR(v1, v2);
145  		XOR(v3, v4);
146  		XOR(v1, v5);
147  		XOR(v1, v3);
148  		STORE(v1);
149  
150  		v1 += 4;
151  		v2 += 4;
152  		v3 += 4;
153  		v4 += 4;
154  		v5 += 4;
155  	} while (--lines > 0);
156  }
157