xref: /openbmc/linux/arch/arm64/crypto/aes-modes.S (revision 5ef12cb4a3a78ffb331c03a795a15eea4ae35155)
1/*
2 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
3 *
4 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/* included by aes-ce.S and aes-neon.S */
12
13	.text
14	.align		4
15
16aes_encrypt_block4x:
17	encrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
18	ret
19ENDPROC(aes_encrypt_block4x)
20
21aes_decrypt_block4x:
22	decrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
23	ret
24ENDPROC(aes_decrypt_block4x)
25
26	/*
27	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
28	 *		   int blocks)
29	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
30	 *		   int blocks)
31	 */
32
33AES_ENTRY(aes_ecb_encrypt)
34	stp		x29, x30, [sp, #-16]!
35	mov		x29, sp
36
37	enc_prepare	w3, x2, x5
38
39.LecbencloopNx:
40	subs		w4, w4, #4
41	bmi		.Lecbenc1x
42	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
43	bl		aes_encrypt_block4x
44	st1		{v0.16b-v3.16b}, [x0], #64
45	b		.LecbencloopNx
46.Lecbenc1x:
47	adds		w4, w4, #4
48	beq		.Lecbencout
49.Lecbencloop:
50	ld1		{v0.16b}, [x1], #16		/* get next pt block */
51	encrypt_block	v0, w3, x2, x5, w6
52	st1		{v0.16b}, [x0], #16
53	subs		w4, w4, #1
54	bne		.Lecbencloop
55.Lecbencout:
56	ldp		x29, x30, [sp], #16
57	ret
58AES_ENDPROC(aes_ecb_encrypt)
59
60
61AES_ENTRY(aes_ecb_decrypt)
62	stp		x29, x30, [sp, #-16]!
63	mov		x29, sp
64
65	dec_prepare	w3, x2, x5
66
67.LecbdecloopNx:
68	subs		w4, w4, #4
69	bmi		.Lecbdec1x
70	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
71	bl		aes_decrypt_block4x
72	st1		{v0.16b-v3.16b}, [x0], #64
73	b		.LecbdecloopNx
74.Lecbdec1x:
75	adds		w4, w4, #4
76	beq		.Lecbdecout
77.Lecbdecloop:
78	ld1		{v0.16b}, [x1], #16		/* get next ct block */
79	decrypt_block	v0, w3, x2, x5, w6
80	st1		{v0.16b}, [x0], #16
81	subs		w4, w4, #1
82	bne		.Lecbdecloop
83.Lecbdecout:
84	ldp		x29, x30, [sp], #16
85	ret
86AES_ENDPROC(aes_ecb_decrypt)
87
88
89	/*
90	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
91	 *		   int blocks, u8 iv[])
92	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
93	 *		   int blocks, u8 iv[])
94	 */
95
96AES_ENTRY(aes_cbc_encrypt)
97	ld1		{v4.16b}, [x5]			/* get iv */
98	enc_prepare	w3, x2, x6
99
100.Lcbcencloop4x:
101	subs		w4, w4, #4
102	bmi		.Lcbcenc1x
103	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
104	eor		v0.16b, v0.16b, v4.16b		/* ..and xor with iv */
105	encrypt_block	v0, w3, x2, x6, w7
106	eor		v1.16b, v1.16b, v0.16b
107	encrypt_block	v1, w3, x2, x6, w7
108	eor		v2.16b, v2.16b, v1.16b
109	encrypt_block	v2, w3, x2, x6, w7
110	eor		v3.16b, v3.16b, v2.16b
111	encrypt_block	v3, w3, x2, x6, w7
112	st1		{v0.16b-v3.16b}, [x0], #64
113	mov		v4.16b, v3.16b
114	b		.Lcbcencloop4x
115.Lcbcenc1x:
116	adds		w4, w4, #4
117	beq		.Lcbcencout
118.Lcbcencloop:
119	ld1		{v0.16b}, [x1], #16		/* get next pt block */
120	eor		v4.16b, v4.16b, v0.16b		/* ..and xor with iv */
121	encrypt_block	v4, w3, x2, x6, w7
122	st1		{v4.16b}, [x0], #16
123	subs		w4, w4, #1
124	bne		.Lcbcencloop
125.Lcbcencout:
126	st1		{v4.16b}, [x5]			/* return iv */
127	ret
128AES_ENDPROC(aes_cbc_encrypt)
129
130
131AES_ENTRY(aes_cbc_decrypt)
132	stp		x29, x30, [sp, #-16]!
133	mov		x29, sp
134
135	ld1		{v7.16b}, [x5]			/* get iv */
136	dec_prepare	w3, x2, x6
137
138.LcbcdecloopNx:
139	subs		w4, w4, #4
140	bmi		.Lcbcdec1x
141	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
142	mov		v4.16b, v0.16b
143	mov		v5.16b, v1.16b
144	mov		v6.16b, v2.16b
145	bl		aes_decrypt_block4x
146	sub		x1, x1, #16
147	eor		v0.16b, v0.16b, v7.16b
148	eor		v1.16b, v1.16b, v4.16b
149	ld1		{v7.16b}, [x1], #16		/* reload 1 ct block */
150	eor		v2.16b, v2.16b, v5.16b
151	eor		v3.16b, v3.16b, v6.16b
152	st1		{v0.16b-v3.16b}, [x0], #64
153	b		.LcbcdecloopNx
154.Lcbcdec1x:
155	adds		w4, w4, #4
156	beq		.Lcbcdecout
157.Lcbcdecloop:
158	ld1		{v1.16b}, [x1], #16		/* get next ct block */
159	mov		v0.16b, v1.16b			/* ...and copy to v0 */
160	decrypt_block	v0, w3, x2, x6, w7
161	eor		v0.16b, v0.16b, v7.16b		/* xor with iv => pt */
162	mov		v7.16b, v1.16b			/* ct is next iv */
163	st1		{v0.16b}, [x0], #16
164	subs		w4, w4, #1
165	bne		.Lcbcdecloop
166.Lcbcdecout:
167	st1		{v7.16b}, [x5]			/* return iv */
168	ldp		x29, x30, [sp], #16
169	ret
170AES_ENDPROC(aes_cbc_decrypt)
171
172
173	/*
174	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
175	 *		   int blocks, u8 ctr[])
176	 */
177
178AES_ENTRY(aes_ctr_encrypt)
179	stp		x29, x30, [sp, #-16]!
180	mov		x29, sp
181
182	enc_prepare	w3, x2, x6
183	ld1		{v4.16b}, [x5]
184
185	umov		x6, v4.d[1]		/* keep swabbed ctr in reg */
186	rev		x6, x6
187	cmn		w6, w4			/* 32 bit overflow? */
188	bcs		.Lctrloop
189.LctrloopNx:
190	subs		w4, w4, #4
191	bmi		.Lctr1x
192	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
193	dup		v7.4s, w6
194	mov		v0.16b, v4.16b
195	add		v7.4s, v7.4s, v8.4s
196	mov		v1.16b, v4.16b
197	rev32		v8.16b, v7.16b
198	mov		v2.16b, v4.16b
199	mov		v3.16b, v4.16b
200	mov		v1.s[3], v8.s[0]
201	mov		v2.s[3], v8.s[1]
202	mov		v3.s[3], v8.s[2]
203	ld1		{v5.16b-v7.16b}, [x1], #48	/* get 3 input blocks */
204	bl		aes_encrypt_block4x
205	eor		v0.16b, v5.16b, v0.16b
206	ld1		{v5.16b}, [x1], #16		/* get 1 input block  */
207	eor		v1.16b, v6.16b, v1.16b
208	eor		v2.16b, v7.16b, v2.16b
209	eor		v3.16b, v5.16b, v3.16b
210	st1		{v0.16b-v3.16b}, [x0], #64
211	add		x6, x6, #4
212	rev		x7, x6
213	ins		v4.d[1], x7
214	cbz		w4, .Lctrout
215	b		.LctrloopNx
216.Lctr1x:
217	adds		w4, w4, #4
218	beq		.Lctrout
219.Lctrloop:
220	mov		v0.16b, v4.16b
221	encrypt_block	v0, w3, x2, x8, w7
222
223	adds		x6, x6, #1		/* increment BE ctr */
224	rev		x7, x6
225	ins		v4.d[1], x7
226	bcs		.Lctrcarry		/* overflow? */
227
228.Lctrcarrydone:
229	subs		w4, w4, #1
230	bmi		.Lctrtailblock		/* blocks <0 means tail block */
231	ld1		{v3.16b}, [x1], #16
232	eor		v3.16b, v0.16b, v3.16b
233	st1		{v3.16b}, [x0], #16
234	bne		.Lctrloop
235
236.Lctrout:
237	st1		{v4.16b}, [x5]		/* return next CTR value */
238	ldp		x29, x30, [sp], #16
239	ret
240
241.Lctrtailblock:
242	st1		{v0.16b}, [x0]
243	ldp		x29, x30, [sp], #16
244	ret
245
246.Lctrcarry:
247	umov		x7, v4.d[0]		/* load upper word of ctr  */
248	rev		x7, x7			/* ... to handle the carry */
249	add		x7, x7, #1
250	rev		x7, x7
251	ins		v4.d[0], x7
252	b		.Lctrcarrydone
253AES_ENDPROC(aes_ctr_encrypt)
254	.ltorg
255
256
257	/*
258	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
259	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
260	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
261	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
262	 */
263
264	.macro		next_tweak, out, in, const, tmp
265	sshr		\tmp\().2d,  \in\().2d,   #63
266	and		\tmp\().16b, \tmp\().16b, \const\().16b
267	add		\out\().2d,  \in\().2d,   \in\().2d
268	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
269	eor		\out\().16b, \out\().16b, \tmp\().16b
270	.endm
271
272.Lxts_mul_x:
273CPU_LE(	.quad		1, 0x87		)
274CPU_BE(	.quad		0x87, 1		)
275
276AES_ENTRY(aes_xts_encrypt)
277	stp		x29, x30, [sp, #-16]!
278	mov		x29, sp
279
280	ld1		{v4.16b}, [x6]
281	cbz		w7, .Lxtsencnotfirst
282
283	enc_prepare	w3, x5, x8
284	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
285	enc_switch_key	w3, x2, x8
286	ldr		q7, .Lxts_mul_x
287	b		.LxtsencNx
288
289.Lxtsencnotfirst:
290	enc_prepare	w3, x2, x8
291.LxtsencloopNx:
292	ldr		q7, .Lxts_mul_x
293	next_tweak	v4, v4, v7, v8
294.LxtsencNx:
295	subs		w4, w4, #4
296	bmi		.Lxtsenc1x
297	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
298	next_tweak	v5, v4, v7, v8
299	eor		v0.16b, v0.16b, v4.16b
300	next_tweak	v6, v5, v7, v8
301	eor		v1.16b, v1.16b, v5.16b
302	eor		v2.16b, v2.16b, v6.16b
303	next_tweak	v7, v6, v7, v8
304	eor		v3.16b, v3.16b, v7.16b
305	bl		aes_encrypt_block4x
306	eor		v3.16b, v3.16b, v7.16b
307	eor		v0.16b, v0.16b, v4.16b
308	eor		v1.16b, v1.16b, v5.16b
309	eor		v2.16b, v2.16b, v6.16b
310	st1		{v0.16b-v3.16b}, [x0], #64
311	mov		v4.16b, v7.16b
312	cbz		w4, .Lxtsencout
313	b		.LxtsencloopNx
314.Lxtsenc1x:
315	adds		w4, w4, #4
316	beq		.Lxtsencout
317.Lxtsencloop:
318	ld1		{v1.16b}, [x1], #16
319	eor		v0.16b, v1.16b, v4.16b
320	encrypt_block	v0, w3, x2, x8, w7
321	eor		v0.16b, v0.16b, v4.16b
322	st1		{v0.16b}, [x0], #16
323	subs		w4, w4, #1
324	beq		.Lxtsencout
325	next_tweak	v4, v4, v7, v8
326	b		.Lxtsencloop
327.Lxtsencout:
328	st1		{v4.16b}, [x6]
329	ldp		x29, x30, [sp], #16
330	ret
331AES_ENDPROC(aes_xts_encrypt)
332
333
334AES_ENTRY(aes_xts_decrypt)
335	stp		x29, x30, [sp, #-16]!
336	mov		x29, sp
337
338	ld1		{v4.16b}, [x6]
339	cbz		w7, .Lxtsdecnotfirst
340
341	enc_prepare	w3, x5, x8
342	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
343	dec_prepare	w3, x2, x8
344	ldr		q7, .Lxts_mul_x
345	b		.LxtsdecNx
346
347.Lxtsdecnotfirst:
348	dec_prepare	w3, x2, x8
349.LxtsdecloopNx:
350	ldr		q7, .Lxts_mul_x
351	next_tweak	v4, v4, v7, v8
352.LxtsdecNx:
353	subs		w4, w4, #4
354	bmi		.Lxtsdec1x
355	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
356	next_tweak	v5, v4, v7, v8
357	eor		v0.16b, v0.16b, v4.16b
358	next_tweak	v6, v5, v7, v8
359	eor		v1.16b, v1.16b, v5.16b
360	eor		v2.16b, v2.16b, v6.16b
361	next_tweak	v7, v6, v7, v8
362	eor		v3.16b, v3.16b, v7.16b
363	bl		aes_decrypt_block4x
364	eor		v3.16b, v3.16b, v7.16b
365	eor		v0.16b, v0.16b, v4.16b
366	eor		v1.16b, v1.16b, v5.16b
367	eor		v2.16b, v2.16b, v6.16b
368	st1		{v0.16b-v3.16b}, [x0], #64
369	mov		v4.16b, v7.16b
370	cbz		w4, .Lxtsdecout
371	b		.LxtsdecloopNx
372.Lxtsdec1x:
373	adds		w4, w4, #4
374	beq		.Lxtsdecout
375.Lxtsdecloop:
376	ld1		{v1.16b}, [x1], #16
377	eor		v0.16b, v1.16b, v4.16b
378	decrypt_block	v0, w3, x2, x8, w7
379	eor		v0.16b, v0.16b, v4.16b
380	st1		{v0.16b}, [x0], #16
381	subs		w4, w4, #1
382	beq		.Lxtsdecout
383	next_tweak	v4, v4, v7, v8
384	b		.Lxtsdecloop
385.Lxtsdecout:
386	st1		{v4.16b}, [x6]
387	ldp		x29, x30, [sp], #16
388	ret
389AES_ENDPROC(aes_xts_decrypt)
390
391	/*
392	 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
393	 *		  int blocks, u8 dg[], int enc_before, int enc_after)
394	 */
395AES_ENTRY(aes_mac_update)
396	ld1		{v0.16b}, [x4]			/* get dg */
397	enc_prepare	w2, x1, x7
398	cbz		w5, .Lmacloop4x
399
400	encrypt_block	v0, w2, x1, x7, w8
401
402.Lmacloop4x:
403	subs		w3, w3, #4
404	bmi		.Lmac1x
405	ld1		{v1.16b-v4.16b}, [x0], #64	/* get next pt block */
406	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
407	encrypt_block	v0, w2, x1, x7, w8
408	eor		v0.16b, v0.16b, v2.16b
409	encrypt_block	v0, w2, x1, x7, w8
410	eor		v0.16b, v0.16b, v3.16b
411	encrypt_block	v0, w2, x1, x7, w8
412	eor		v0.16b, v0.16b, v4.16b
413	cmp		w3, wzr
414	csinv		x5, x6, xzr, eq
415	cbz		w5, .Lmacout
416	encrypt_block	v0, w2, x1, x7, w8
417	b		.Lmacloop4x
418.Lmac1x:
419	add		w3, w3, #4
420.Lmacloop:
421	cbz		w3, .Lmacout
422	ld1		{v1.16b}, [x0], #16		/* get next pt block */
423	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
424
425	subs		w3, w3, #1
426	csinv		x5, x6, xzr, eq
427	cbz		w5, .Lmacout
428
429	encrypt_block	v0, w2, x1, x7, w8
430	b		.Lmacloop
431
432.Lmacout:
433	st1		{v0.16b}, [x4]			/* return dg */
434	ret
435AES_ENDPROC(aes_mac_update)
436