xref: /openbmc/linux/arch/arm64/crypto/sm4-ce-asm.h (revision 1a931707ad4a46e79d4ecfee56d8f6e8cc8d4f28)
1*ce41fefdSTianjia Zhang /* SPDX-License-Identifier: GPL-2.0-or-later */
2*ce41fefdSTianjia Zhang /*
3*ce41fefdSTianjia Zhang  * SM4 helper macros for Crypto Extensions
4*ce41fefdSTianjia Zhang  * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
5*ce41fefdSTianjia Zhang  */
6*ce41fefdSTianjia Zhang 
7*ce41fefdSTianjia Zhang #define SM4_PREPARE(ptr)					\
8*ce41fefdSTianjia Zhang 	ld1		{v24.16b-v27.16b}, [ptr], #64;		\
9*ce41fefdSTianjia Zhang 	ld1		{v28.16b-v31.16b}, [ptr];
10*ce41fefdSTianjia Zhang 
11*ce41fefdSTianjia Zhang #define SM4_CRYPT_BLK_BE(b0)					\
12*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v24.4s;				\
13*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v25.4s;				\
14*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v26.4s;				\
15*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v27.4s;				\
16*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v28.4s;				\
17*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v29.4s;				\
18*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v30.4s;				\
19*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v31.4s;				\
20*ce41fefdSTianjia Zhang 	rev64		b0.4s, b0.4s;				\
21*ce41fefdSTianjia Zhang 	ext		b0.16b, b0.16b, b0.16b, #8;		\
22*ce41fefdSTianjia Zhang 	rev32		b0.16b, b0.16b;
23*ce41fefdSTianjia Zhang 
24*ce41fefdSTianjia Zhang #define SM4_CRYPT_BLK(b0)					\
25*ce41fefdSTianjia Zhang 	rev32		b0.16b, b0.16b;				\
26*ce41fefdSTianjia Zhang 	SM4_CRYPT_BLK_BE(b0);
27*ce41fefdSTianjia Zhang 
28*ce41fefdSTianjia Zhang #define SM4_CRYPT_BLK2_BE(b0, b1)				\
29*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v24.4s;				\
30*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v24.4s;				\
31*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v25.4s;				\
32*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v25.4s;				\
33*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v26.4s;				\
34*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v26.4s;				\
35*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v27.4s;				\
36*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v27.4s;				\
37*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v28.4s;				\
38*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v28.4s;				\
39*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v29.4s;				\
40*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v29.4s;				\
41*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v30.4s;				\
42*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v30.4s;				\
43*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v31.4s;				\
44*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v31.4s;				\
45*ce41fefdSTianjia Zhang 	rev64		b0.4s, b0.4s;				\
46*ce41fefdSTianjia Zhang 	rev64		b1.4s, b1.4s;				\
47*ce41fefdSTianjia Zhang 	ext		b0.16b, b0.16b, b0.16b, #8;		\
48*ce41fefdSTianjia Zhang 	ext		b1.16b, b1.16b, b1.16b, #8;		\
49*ce41fefdSTianjia Zhang 	rev32		b0.16b, b0.16b;				\
50*ce41fefdSTianjia Zhang 	rev32		b1.16b, b1.16b;				\
51*ce41fefdSTianjia Zhang 
52*ce41fefdSTianjia Zhang #define SM4_CRYPT_BLK2(b0, b1)					\
53*ce41fefdSTianjia Zhang 	rev32		b0.16b, b0.16b;				\
54*ce41fefdSTianjia Zhang 	rev32		b1.16b, b1.16b;				\
55*ce41fefdSTianjia Zhang 	SM4_CRYPT_BLK2_BE(b0, b1);
56*ce41fefdSTianjia Zhang 
57*ce41fefdSTianjia Zhang #define SM4_CRYPT_BLK4_BE(b0, b1, b2, b3)			\
58*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v24.4s;				\
59*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v24.4s;				\
60*ce41fefdSTianjia Zhang 	sm4e		b2.4s, v24.4s;				\
61*ce41fefdSTianjia Zhang 	sm4e		b3.4s, v24.4s;				\
62*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v25.4s;				\
63*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v25.4s;				\
64*ce41fefdSTianjia Zhang 	sm4e		b2.4s, v25.4s;				\
65*ce41fefdSTianjia Zhang 	sm4e		b3.4s, v25.4s;				\
66*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v26.4s;				\
67*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v26.4s;				\
68*ce41fefdSTianjia Zhang 	sm4e		b2.4s, v26.4s;				\
69*ce41fefdSTianjia Zhang 	sm4e		b3.4s, v26.4s;				\
70*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v27.4s;				\
71*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v27.4s;				\
72*ce41fefdSTianjia Zhang 	sm4e		b2.4s, v27.4s;				\
73*ce41fefdSTianjia Zhang 	sm4e		b3.4s, v27.4s;				\
74*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v28.4s;				\
75*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v28.4s;				\
76*ce41fefdSTianjia Zhang 	sm4e		b2.4s, v28.4s;				\
77*ce41fefdSTianjia Zhang 	sm4e		b3.4s, v28.4s;				\
78*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v29.4s;				\
79*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v29.4s;				\
80*ce41fefdSTianjia Zhang 	sm4e		b2.4s, v29.4s;				\
81*ce41fefdSTianjia Zhang 	sm4e		b3.4s, v29.4s;				\
82*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v30.4s;				\
83*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v30.4s;				\
84*ce41fefdSTianjia Zhang 	sm4e		b2.4s, v30.4s;				\
85*ce41fefdSTianjia Zhang 	sm4e		b3.4s, v30.4s;				\
86*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v31.4s;				\
87*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v31.4s;				\
88*ce41fefdSTianjia Zhang 	sm4e		b2.4s, v31.4s;				\
89*ce41fefdSTianjia Zhang 	sm4e		b3.4s, v31.4s;				\
90*ce41fefdSTianjia Zhang 	rev64		b0.4s, b0.4s;				\
91*ce41fefdSTianjia Zhang 	rev64		b1.4s, b1.4s;				\
92*ce41fefdSTianjia Zhang 	rev64		b2.4s, b2.4s;				\
93*ce41fefdSTianjia Zhang 	rev64		b3.4s, b3.4s;				\
94*ce41fefdSTianjia Zhang 	ext		b0.16b, b0.16b, b0.16b, #8;		\
95*ce41fefdSTianjia Zhang 	ext		b1.16b, b1.16b, b1.16b, #8;		\
96*ce41fefdSTianjia Zhang 	ext		b2.16b, b2.16b, b2.16b, #8;		\
97*ce41fefdSTianjia Zhang 	ext		b3.16b, b3.16b, b3.16b, #8;		\
98*ce41fefdSTianjia Zhang 	rev32		b0.16b, b0.16b;				\
99*ce41fefdSTianjia Zhang 	rev32		b1.16b, b1.16b;				\
100*ce41fefdSTianjia Zhang 	rev32		b2.16b, b2.16b;				\
101*ce41fefdSTianjia Zhang 	rev32		b3.16b, b3.16b;
102*ce41fefdSTianjia Zhang 
103*ce41fefdSTianjia Zhang #define SM4_CRYPT_BLK4(b0, b1, b2, b3)				\
104*ce41fefdSTianjia Zhang 	rev32		b0.16b, b0.16b;				\
105*ce41fefdSTianjia Zhang 	rev32		b1.16b, b1.16b;				\
106*ce41fefdSTianjia Zhang 	rev32		b2.16b, b2.16b;				\
107*ce41fefdSTianjia Zhang 	rev32		b3.16b, b3.16b;				\
108*ce41fefdSTianjia Zhang 	SM4_CRYPT_BLK4_BE(b0, b1, b2, b3);
109*ce41fefdSTianjia Zhang 
110*ce41fefdSTianjia Zhang #define SM4_CRYPT_BLK8_BE(b0, b1, b2, b3, b4, b5, b6, b7)	\
111*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v24.4s;				\
112*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v24.4s;				\
113*ce41fefdSTianjia Zhang 	sm4e		b2.4s, v24.4s;				\
114*ce41fefdSTianjia Zhang 	sm4e		b3.4s, v24.4s;				\
115*ce41fefdSTianjia Zhang 	sm4e		b4.4s, v24.4s;				\
116*ce41fefdSTianjia Zhang 	sm4e		b5.4s, v24.4s;				\
117*ce41fefdSTianjia Zhang 	sm4e		b6.4s, v24.4s;				\
118*ce41fefdSTianjia Zhang 	sm4e		b7.4s, v24.4s;				\
119*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v25.4s;				\
120*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v25.4s;				\
121*ce41fefdSTianjia Zhang 	sm4e		b2.4s, v25.4s;				\
122*ce41fefdSTianjia Zhang 	sm4e		b3.4s, v25.4s;				\
123*ce41fefdSTianjia Zhang 	sm4e		b4.4s, v25.4s;				\
124*ce41fefdSTianjia Zhang 	sm4e		b5.4s, v25.4s;				\
125*ce41fefdSTianjia Zhang 	sm4e		b6.4s, v25.4s;				\
126*ce41fefdSTianjia Zhang 	sm4e		b7.4s, v25.4s;				\
127*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v26.4s;				\
128*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v26.4s;				\
129*ce41fefdSTianjia Zhang 	sm4e		b2.4s, v26.4s;				\
130*ce41fefdSTianjia Zhang 	sm4e		b3.4s, v26.4s;				\
131*ce41fefdSTianjia Zhang 	sm4e		b4.4s, v26.4s;				\
132*ce41fefdSTianjia Zhang 	sm4e		b5.4s, v26.4s;				\
133*ce41fefdSTianjia Zhang 	sm4e		b6.4s, v26.4s;				\
134*ce41fefdSTianjia Zhang 	sm4e		b7.4s, v26.4s;				\
135*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v27.4s;				\
136*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v27.4s;				\
137*ce41fefdSTianjia Zhang 	sm4e		b2.4s, v27.4s;				\
138*ce41fefdSTianjia Zhang 	sm4e		b3.4s, v27.4s;				\
139*ce41fefdSTianjia Zhang 	sm4e		b4.4s, v27.4s;				\
140*ce41fefdSTianjia Zhang 	sm4e		b5.4s, v27.4s;				\
141*ce41fefdSTianjia Zhang 	sm4e		b6.4s, v27.4s;				\
142*ce41fefdSTianjia Zhang 	sm4e		b7.4s, v27.4s;				\
143*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v28.4s;				\
144*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v28.4s;				\
145*ce41fefdSTianjia Zhang 	sm4e		b2.4s, v28.4s;				\
146*ce41fefdSTianjia Zhang 	sm4e		b3.4s, v28.4s;				\
147*ce41fefdSTianjia Zhang 	sm4e		b4.4s, v28.4s;				\
148*ce41fefdSTianjia Zhang 	sm4e		b5.4s, v28.4s;				\
149*ce41fefdSTianjia Zhang 	sm4e		b6.4s, v28.4s;				\
150*ce41fefdSTianjia Zhang 	sm4e		b7.4s, v28.4s;				\
151*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v29.4s;				\
152*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v29.4s;				\
153*ce41fefdSTianjia Zhang 	sm4e		b2.4s, v29.4s;				\
154*ce41fefdSTianjia Zhang 	sm4e		b3.4s, v29.4s;				\
155*ce41fefdSTianjia Zhang 	sm4e		b4.4s, v29.4s;				\
156*ce41fefdSTianjia Zhang 	sm4e		b5.4s, v29.4s;				\
157*ce41fefdSTianjia Zhang 	sm4e		b6.4s, v29.4s;				\
158*ce41fefdSTianjia Zhang 	sm4e		b7.4s, v29.4s;				\
159*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v30.4s;				\
160*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v30.4s;				\
161*ce41fefdSTianjia Zhang 	sm4e		b2.4s, v30.4s;				\
162*ce41fefdSTianjia Zhang 	sm4e		b3.4s, v30.4s;				\
163*ce41fefdSTianjia Zhang 	sm4e		b4.4s, v30.4s;				\
164*ce41fefdSTianjia Zhang 	sm4e		b5.4s, v30.4s;				\
165*ce41fefdSTianjia Zhang 	sm4e		b6.4s, v30.4s;				\
166*ce41fefdSTianjia Zhang 	sm4e		b7.4s, v30.4s;				\
167*ce41fefdSTianjia Zhang 	sm4e		b0.4s, v31.4s;				\
168*ce41fefdSTianjia Zhang 	sm4e		b1.4s, v31.4s;				\
169*ce41fefdSTianjia Zhang 	sm4e		b2.4s, v31.4s;				\
170*ce41fefdSTianjia Zhang 	sm4e		b3.4s, v31.4s;				\
171*ce41fefdSTianjia Zhang 	sm4e		b4.4s, v31.4s;				\
172*ce41fefdSTianjia Zhang 	sm4e		b5.4s, v31.4s;				\
173*ce41fefdSTianjia Zhang 	sm4e		b6.4s, v31.4s;				\
174*ce41fefdSTianjia Zhang 	sm4e		b7.4s, v31.4s;				\
175*ce41fefdSTianjia Zhang 	rev64		b0.4s, b0.4s;				\
176*ce41fefdSTianjia Zhang 	rev64		b1.4s, b1.4s;				\
177*ce41fefdSTianjia Zhang 	rev64		b2.4s, b2.4s;				\
178*ce41fefdSTianjia Zhang 	rev64		b3.4s, b3.4s;				\
179*ce41fefdSTianjia Zhang 	rev64		b4.4s, b4.4s;				\
180*ce41fefdSTianjia Zhang 	rev64		b5.4s, b5.4s;				\
181*ce41fefdSTianjia Zhang 	rev64		b6.4s, b6.4s;				\
182*ce41fefdSTianjia Zhang 	rev64		b7.4s, b7.4s;				\
183*ce41fefdSTianjia Zhang 	ext		b0.16b, b0.16b, b0.16b, #8;		\
184*ce41fefdSTianjia Zhang 	ext		b1.16b, b1.16b, b1.16b, #8;		\
185*ce41fefdSTianjia Zhang 	ext		b2.16b, b2.16b, b2.16b, #8;		\
186*ce41fefdSTianjia Zhang 	ext		b3.16b, b3.16b, b3.16b, #8;		\
187*ce41fefdSTianjia Zhang 	ext		b4.16b, b4.16b, b4.16b, #8;		\
188*ce41fefdSTianjia Zhang 	ext		b5.16b, b5.16b, b5.16b, #8;		\
189*ce41fefdSTianjia Zhang 	ext		b6.16b, b6.16b, b6.16b, #8;		\
190*ce41fefdSTianjia Zhang 	ext		b7.16b, b7.16b, b7.16b, #8;		\
191*ce41fefdSTianjia Zhang 	rev32		b0.16b, b0.16b;				\
192*ce41fefdSTianjia Zhang 	rev32		b1.16b, b1.16b;				\
193*ce41fefdSTianjia Zhang 	rev32		b2.16b, b2.16b;				\
194*ce41fefdSTianjia Zhang 	rev32		b3.16b, b3.16b;				\
195*ce41fefdSTianjia Zhang 	rev32		b4.16b, b4.16b;				\
196*ce41fefdSTianjia Zhang 	rev32		b5.16b, b5.16b;				\
197*ce41fefdSTianjia Zhang 	rev32		b6.16b, b6.16b;				\
198*ce41fefdSTianjia Zhang 	rev32		b7.16b, b7.16b;
199*ce41fefdSTianjia Zhang 
200*ce41fefdSTianjia Zhang #define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7)		\
201*ce41fefdSTianjia Zhang 	rev32		b0.16b, b0.16b;				\
202*ce41fefdSTianjia Zhang 	rev32		b1.16b, b1.16b;				\
203*ce41fefdSTianjia Zhang 	rev32		b2.16b, b2.16b;				\
204*ce41fefdSTianjia Zhang 	rev32		b3.16b, b3.16b;				\
205*ce41fefdSTianjia Zhang 	rev32		b4.16b, b4.16b;				\
206*ce41fefdSTianjia Zhang 	rev32		b5.16b, b5.16b;				\
207*ce41fefdSTianjia Zhang 	rev32		b6.16b, b6.16b;				\
208*ce41fefdSTianjia Zhang 	rev32		b7.16b, b7.16b;				\
209*ce41fefdSTianjia Zhang 	SM4_CRYPT_BLK8_BE(b0, b1, b2, b3, b4, b5, b6, b7);
210