xref: /openbmc/linux/arch/arm64/crypto/aes-neon.S (revision b830f94f)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
4 *
5 * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
6 */
7
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10
11#define AES_ENTRY(func)		ENTRY(neon_ ## func)
12#define AES_ENDPROC(func)	ENDPROC(neon_ ## func)
13
14	xtsmask		.req	v7
15	cbciv		.req	v7
16	vctr		.req	v4
17
18	.macro		xts_reload_mask, tmp
19	xts_load_mask	\tmp
20	.endm
21
22	/* multiply by polynomial 'x' in GF(2^8) */
23	.macro		mul_by_x, out, in, temp, const
24	sshr		\temp, \in, #7
25	shl		\out, \in, #1
26	and		\temp, \temp, \const
27	eor		\out, \out, \temp
28	.endm
29
30	/* multiply by polynomial 'x^2' in GF(2^8) */
31	.macro		mul_by_x2, out, in, temp, const
32	ushr		\temp, \in, #6
33	shl		\out, \in, #2
34	pmul		\temp, \temp, \const
35	eor		\out, \out, \temp
36	.endm
37
38	/* preload the entire Sbox */
39	.macro		prepare, sbox, shiftrows, temp
40	movi		v12.16b, #0x1b
41	ldr_l		q13, \shiftrows, \temp
42	ldr_l		q14, .Lror32by8, \temp
43	adr_l		\temp, \sbox
44	ld1		{v16.16b-v19.16b}, [\temp], #64
45	ld1		{v20.16b-v23.16b}, [\temp], #64
46	ld1		{v24.16b-v27.16b}, [\temp], #64
47	ld1		{v28.16b-v31.16b}, [\temp]
48	.endm
49
50	/* do preload for encryption */
51	.macro		enc_prepare, ignore0, ignore1, temp
52	prepare		.LForward_Sbox, .LForward_ShiftRows, \temp
53	.endm
54
55	.macro		enc_switch_key, ignore0, ignore1, temp
56	/* do nothing */
57	.endm
58
59	/* do preload for decryption */
60	.macro		dec_prepare, ignore0, ignore1, temp
61	prepare		.LReverse_Sbox, .LReverse_ShiftRows, \temp
62	.endm
63
64	/* apply SubBytes transformation using the the preloaded Sbox */
65	.macro		sub_bytes, in
66	sub		v9.16b, \in\().16b, v15.16b
67	tbl		\in\().16b, {v16.16b-v19.16b}, \in\().16b
68	sub		v10.16b, v9.16b, v15.16b
69	tbx		\in\().16b, {v20.16b-v23.16b}, v9.16b
70	sub		v11.16b, v10.16b, v15.16b
71	tbx		\in\().16b, {v24.16b-v27.16b}, v10.16b
72	tbx		\in\().16b, {v28.16b-v31.16b}, v11.16b
73	.endm
74
75	/* apply MixColumns transformation */
76	.macro		mix_columns, in, enc
77	.if		\enc == 0
78	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
79	mul_by_x2	v8.16b, \in\().16b, v9.16b, v12.16b
80	eor		\in\().16b, \in\().16b, v8.16b
81	rev32		v8.8h, v8.8h
82	eor		\in\().16b, \in\().16b, v8.16b
83	.endif
84
85	mul_by_x	v9.16b, \in\().16b, v8.16b, v12.16b
86	rev32		v8.8h, \in\().8h
87	eor		v8.16b, v8.16b, v9.16b
88	eor		\in\().16b, \in\().16b, v8.16b
89	tbl		\in\().16b, {\in\().16b}, v14.16b
90	eor		\in\().16b, \in\().16b, v8.16b
91	.endm
92
93	.macro		do_block, enc, in, rounds, rk, rkp, i
94	ld1		{v15.4s}, [\rk]
95	add		\rkp, \rk, #16
96	mov		\i, \rounds
971111:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
98	movi		v15.16b, #0x40
99	tbl		\in\().16b, {\in\().16b}, v13.16b	/* ShiftRows */
100	sub_bytes	\in
101	subs		\i, \i, #1
102	ld1		{v15.4s}, [\rkp], #16
103	beq		2222f
104	mix_columns	\in, \enc
105	b		1111b
1062222:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
107	.endm
108
109	.macro		encrypt_block, in, rounds, rk, rkp, i
110	do_block	1, \in, \rounds, \rk, \rkp, \i
111	.endm
112
113	.macro		decrypt_block, in, rounds, rk, rkp, i
114	do_block	0, \in, \rounds, \rk, \rkp, \i
115	.endm
116
117	/*
118	 * Interleaved versions: functionally equivalent to the
119	 * ones above, but applied to AES states in parallel.
120	 */
121
122	.macro		sub_bytes_4x, in0, in1, in2, in3
123	sub		v8.16b, \in0\().16b, v15.16b
124	tbl		\in0\().16b, {v16.16b-v19.16b}, \in0\().16b
125	sub		v9.16b, \in1\().16b, v15.16b
126	tbl		\in1\().16b, {v16.16b-v19.16b}, \in1\().16b
127	sub		v10.16b, \in2\().16b, v15.16b
128	tbl		\in2\().16b, {v16.16b-v19.16b}, \in2\().16b
129	sub		v11.16b, \in3\().16b, v15.16b
130	tbl		\in3\().16b, {v16.16b-v19.16b}, \in3\().16b
131	tbx		\in0\().16b, {v20.16b-v23.16b}, v8.16b
132	tbx		\in1\().16b, {v20.16b-v23.16b}, v9.16b
133	sub		v8.16b, v8.16b, v15.16b
134	tbx		\in2\().16b, {v20.16b-v23.16b}, v10.16b
135	sub		v9.16b, v9.16b, v15.16b
136	tbx		\in3\().16b, {v20.16b-v23.16b}, v11.16b
137	sub		v10.16b, v10.16b, v15.16b
138	tbx		\in0\().16b, {v24.16b-v27.16b}, v8.16b
139	sub		v11.16b, v11.16b, v15.16b
140	tbx		\in1\().16b, {v24.16b-v27.16b}, v9.16b
141	sub		v8.16b, v8.16b, v15.16b
142	tbx		\in2\().16b, {v24.16b-v27.16b}, v10.16b
143	sub		v9.16b, v9.16b, v15.16b
144	tbx		\in3\().16b, {v24.16b-v27.16b}, v11.16b
145	sub		v10.16b, v10.16b, v15.16b
146	tbx		\in0\().16b, {v28.16b-v31.16b}, v8.16b
147	sub		v11.16b, v11.16b, v15.16b
148	tbx		\in1\().16b, {v28.16b-v31.16b}, v9.16b
149	tbx		\in2\().16b, {v28.16b-v31.16b}, v10.16b
150	tbx		\in3\().16b, {v28.16b-v31.16b}, v11.16b
151	.endm
152
153	.macro		mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
154	sshr		\tmp0\().16b, \in0\().16b, #7
155	shl		\out0\().16b, \in0\().16b, #1
156	sshr		\tmp1\().16b, \in1\().16b, #7
157	and		\tmp0\().16b, \tmp0\().16b, \const\().16b
158	shl		\out1\().16b, \in1\().16b, #1
159	and		\tmp1\().16b, \tmp1\().16b, \const\().16b
160	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
161	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
162	.endm
163
164	.macro		mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
165	ushr		\tmp0\().16b, \in0\().16b, #6
166	shl		\out0\().16b, \in0\().16b, #2
167	ushr		\tmp1\().16b, \in1\().16b, #6
168	pmul		\tmp0\().16b, \tmp0\().16b, \const\().16b
169	shl		\out1\().16b, \in1\().16b, #2
170	pmul		\tmp1\().16b, \tmp1\().16b, \const\().16b
171	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
172	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
173	.endm
174
175	.macro		mix_columns_2x, in0, in1, enc
176	.if		\enc == 0
177	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
178	mul_by_x2_2x	v8, v9, \in0, \in1, v10, v11, v12
179	eor		\in0\().16b, \in0\().16b, v8.16b
180	rev32		v8.8h, v8.8h
181	eor		\in1\().16b, \in1\().16b, v9.16b
182	rev32		v9.8h, v9.8h
183	eor		\in0\().16b, \in0\().16b, v8.16b
184	eor		\in1\().16b, \in1\().16b, v9.16b
185	.endif
186
187	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v12
188	rev32		v10.8h, \in0\().8h
189	rev32		v11.8h, \in1\().8h
190	eor		v10.16b, v10.16b, v8.16b
191	eor		v11.16b, v11.16b, v9.16b
192	eor		\in0\().16b, \in0\().16b, v10.16b
193	eor		\in1\().16b, \in1\().16b, v11.16b
194	tbl		\in0\().16b, {\in0\().16b}, v14.16b
195	tbl		\in1\().16b, {\in1\().16b}, v14.16b
196	eor		\in0\().16b, \in0\().16b, v10.16b
197	eor		\in1\().16b, \in1\().16b, v11.16b
198	.endm
199
200	.macro		do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
201	ld1		{v15.4s}, [\rk]
202	add		\rkp, \rk, #16
203	mov		\i, \rounds
2041111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
205	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
206	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
207	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
208	movi		v15.16b, #0x40
209	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
210	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
211	tbl		\in2\().16b, {\in2\().16b}, v13.16b	/* ShiftRows */
212	tbl		\in3\().16b, {\in3\().16b}, v13.16b	/* ShiftRows */
213	sub_bytes_4x	\in0, \in1, \in2, \in3
214	subs		\i, \i, #1
215	ld1		{v15.4s}, [\rkp], #16
216	beq		2222f
217	mix_columns_2x	\in0, \in1, \enc
218	mix_columns_2x	\in2, \in3, \enc
219	b		1111b
2202222:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
221	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
222	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
223	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
224	.endm
225
226	.macro		encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
227	do_block_4x	1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
228	.endm
229
230	.macro		decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
231	do_block_4x	0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
232	.endm
233
234#include "aes-modes.S"
235
236	.section	".rodata", "a"
237	.align		6
238.LForward_Sbox:
239	.byte		0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
240	.byte		0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
241	.byte		0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
242	.byte		0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
243	.byte		0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
244	.byte		0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
245	.byte		0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
246	.byte		0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
247	.byte		0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
248	.byte		0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
249	.byte		0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
250	.byte		0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
251	.byte		0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
252	.byte		0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
253	.byte		0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
254	.byte		0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
255	.byte		0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
256	.byte		0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
257	.byte		0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
258	.byte		0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
259	.byte		0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
260	.byte		0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
261	.byte		0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
262	.byte		0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
263	.byte		0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
264	.byte		0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
265	.byte		0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
266	.byte		0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
267	.byte		0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
268	.byte		0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
269	.byte		0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
270	.byte		0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
271
272.LReverse_Sbox:
273	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
274	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
275	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
276	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
277	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
278	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
279	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
280	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
281	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
282	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
283	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
284	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
285	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
286	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
287	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
288	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
289	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
290	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
291	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
292	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
293	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
294	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
295	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
296	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
297	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
298	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
299	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
300	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
301	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
302	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
303	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
304	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
305
306.LForward_ShiftRows:
307	.octa		0x0b06010c07020d08030e09040f0a0500
308
309.LReverse_ShiftRows:
310	.octa		0x0306090c0f0205080b0e0104070a0d00
311
312.Lror32by8:
313	.octa		0x0c0f0e0d080b0a090407060500030201
314