xref: /openbmc/linux/arch/arm64/crypto/aes-modes.S (revision 4b4f3acc)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
4 *
5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
6 */
7
8/* included by aes-ce.S and aes-neon.S */
9
10	.text
11	.align		4
12
13aes_encrypt_block4x:
14	encrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
15	ret
16ENDPROC(aes_encrypt_block4x)
17
18aes_decrypt_block4x:
19	decrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
20	ret
21ENDPROC(aes_decrypt_block4x)
22
23	/*
24	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
25	 *		   int blocks)
26	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
27	 *		   int blocks)
28	 */
29
30AES_ENTRY(aes_ecb_encrypt)
31	stp		x29, x30, [sp, #-16]!
32	mov		x29, sp
33
34	enc_prepare	w3, x2, x5
35
36.LecbencloopNx:
37	subs		w4, w4, #4
38	bmi		.Lecbenc1x
39	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
40	bl		aes_encrypt_block4x
41	st1		{v0.16b-v3.16b}, [x0], #64
42	b		.LecbencloopNx
43.Lecbenc1x:
44	adds		w4, w4, #4
45	beq		.Lecbencout
46.Lecbencloop:
47	ld1		{v0.16b}, [x1], #16		/* get next pt block */
48	encrypt_block	v0, w3, x2, x5, w6
49	st1		{v0.16b}, [x0], #16
50	subs		w4, w4, #1
51	bne		.Lecbencloop
52.Lecbencout:
53	ldp		x29, x30, [sp], #16
54	ret
55AES_ENDPROC(aes_ecb_encrypt)
56
57
58AES_ENTRY(aes_ecb_decrypt)
59	stp		x29, x30, [sp, #-16]!
60	mov		x29, sp
61
62	dec_prepare	w3, x2, x5
63
64.LecbdecloopNx:
65	subs		w4, w4, #4
66	bmi		.Lecbdec1x
67	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
68	bl		aes_decrypt_block4x
69	st1		{v0.16b-v3.16b}, [x0], #64
70	b		.LecbdecloopNx
71.Lecbdec1x:
72	adds		w4, w4, #4
73	beq		.Lecbdecout
74.Lecbdecloop:
75	ld1		{v0.16b}, [x1], #16		/* get next ct block */
76	decrypt_block	v0, w3, x2, x5, w6
77	st1		{v0.16b}, [x0], #16
78	subs		w4, w4, #1
79	bne		.Lecbdecloop
80.Lecbdecout:
81	ldp		x29, x30, [sp], #16
82	ret
83AES_ENDPROC(aes_ecb_decrypt)
84
85
86	/*
87	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
88	 *		   int blocks, u8 iv[])
89	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
90	 *		   int blocks, u8 iv[])
91	 */
92
93AES_ENTRY(aes_cbc_encrypt)
94	ld1		{v4.16b}, [x5]			/* get iv */
95	enc_prepare	w3, x2, x6
96
97.Lcbcencloop4x:
98	subs		w4, w4, #4
99	bmi		.Lcbcenc1x
100	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
101	eor		v0.16b, v0.16b, v4.16b		/* ..and xor with iv */
102	encrypt_block	v0, w3, x2, x6, w7
103	eor		v1.16b, v1.16b, v0.16b
104	encrypt_block	v1, w3, x2, x6, w7
105	eor		v2.16b, v2.16b, v1.16b
106	encrypt_block	v2, w3, x2, x6, w7
107	eor		v3.16b, v3.16b, v2.16b
108	encrypt_block	v3, w3, x2, x6, w7
109	st1		{v0.16b-v3.16b}, [x0], #64
110	mov		v4.16b, v3.16b
111	b		.Lcbcencloop4x
112.Lcbcenc1x:
113	adds		w4, w4, #4
114	beq		.Lcbcencout
115.Lcbcencloop:
116	ld1		{v0.16b}, [x1], #16		/* get next pt block */
117	eor		v4.16b, v4.16b, v0.16b		/* ..and xor with iv */
118	encrypt_block	v4, w3, x2, x6, w7
119	st1		{v4.16b}, [x0], #16
120	subs		w4, w4, #1
121	bne		.Lcbcencloop
122.Lcbcencout:
123	st1		{v4.16b}, [x5]			/* return iv */
124	ret
125AES_ENDPROC(aes_cbc_encrypt)
126
127
128AES_ENTRY(aes_cbc_decrypt)
129	stp		x29, x30, [sp, #-16]!
130	mov		x29, sp
131
132	ld1		{v7.16b}, [x5]			/* get iv */
133	dec_prepare	w3, x2, x6
134
135.LcbcdecloopNx:
136	subs		w4, w4, #4
137	bmi		.Lcbcdec1x
138	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
139	mov		v4.16b, v0.16b
140	mov		v5.16b, v1.16b
141	mov		v6.16b, v2.16b
142	bl		aes_decrypt_block4x
143	sub		x1, x1, #16
144	eor		v0.16b, v0.16b, v7.16b
145	eor		v1.16b, v1.16b, v4.16b
146	ld1		{v7.16b}, [x1], #16		/* reload 1 ct block */
147	eor		v2.16b, v2.16b, v5.16b
148	eor		v3.16b, v3.16b, v6.16b
149	st1		{v0.16b-v3.16b}, [x0], #64
150	b		.LcbcdecloopNx
151.Lcbcdec1x:
152	adds		w4, w4, #4
153	beq		.Lcbcdecout
154.Lcbcdecloop:
155	ld1		{v1.16b}, [x1], #16		/* get next ct block */
156	mov		v0.16b, v1.16b			/* ...and copy to v0 */
157	decrypt_block	v0, w3, x2, x6, w7
158	eor		v0.16b, v0.16b, v7.16b		/* xor with iv => pt */
159	mov		v7.16b, v1.16b			/* ct is next iv */
160	st1		{v0.16b}, [x0], #16
161	subs		w4, w4, #1
162	bne		.Lcbcdecloop
163.Lcbcdecout:
164	st1		{v7.16b}, [x5]			/* return iv */
165	ldp		x29, x30, [sp], #16
166	ret
167AES_ENDPROC(aes_cbc_decrypt)
168
169
170	/*
171	 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
172	 *		       int rounds, int bytes, u8 const iv[])
173	 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
174	 *		       int rounds, int bytes, u8 const iv[])
175	 */
176
177AES_ENTRY(aes_cbc_cts_encrypt)
178	adr_l		x8, .Lcts_permute_table
179	sub		x4, x4, #16
180	add		x9, x8, #32
181	add		x8, x8, x4
182	sub		x9, x9, x4
183	ld1		{v3.16b}, [x8]
184	ld1		{v4.16b}, [x9]
185
186	ld1		{v0.16b}, [x1], x4		/* overlapping loads */
187	ld1		{v1.16b}, [x1]
188
189	ld1		{v5.16b}, [x5]			/* get iv */
190	enc_prepare	w3, x2, x6
191
192	eor		v0.16b, v0.16b, v5.16b		/* xor with iv */
193	tbl		v1.16b, {v1.16b}, v4.16b
194	encrypt_block	v0, w3, x2, x6, w7
195
196	eor		v1.16b, v1.16b, v0.16b
197	tbl		v0.16b, {v0.16b}, v3.16b
198	encrypt_block	v1, w3, x2, x6, w7
199
200	add		x4, x0, x4
201	st1		{v0.16b}, [x4]			/* overlapping stores */
202	st1		{v1.16b}, [x0]
203	ret
204AES_ENDPROC(aes_cbc_cts_encrypt)
205
206AES_ENTRY(aes_cbc_cts_decrypt)
207	adr_l		x8, .Lcts_permute_table
208	sub		x4, x4, #16
209	add		x9, x8, #32
210	add		x8, x8, x4
211	sub		x9, x9, x4
212	ld1		{v3.16b}, [x8]
213	ld1		{v4.16b}, [x9]
214
215	ld1		{v0.16b}, [x1], x4		/* overlapping loads */
216	ld1		{v1.16b}, [x1]
217
218	ld1		{v5.16b}, [x5]			/* get iv */
219	dec_prepare	w3, x2, x6
220
221	tbl		v2.16b, {v1.16b}, v4.16b
222	decrypt_block	v0, w3, x2, x6, w7
223	eor		v2.16b, v2.16b, v0.16b
224
225	tbx		v0.16b, {v1.16b}, v4.16b
226	tbl		v2.16b, {v2.16b}, v3.16b
227	decrypt_block	v0, w3, x2, x6, w7
228	eor		v0.16b, v0.16b, v5.16b		/* xor with iv */
229
230	add		x4, x0, x4
231	st1		{v2.16b}, [x4]			/* overlapping stores */
232	st1		{v0.16b}, [x0]
233	ret
234AES_ENDPROC(aes_cbc_cts_decrypt)
235
236	.section	".rodata", "a"
237	.align		6
238.Lcts_permute_table:
239	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
240	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
241	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
242	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
243	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
244	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
245	.previous
246
247
248	/*
249	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
250	 *		   int blocks, u8 ctr[])
251	 */
252
253AES_ENTRY(aes_ctr_encrypt)
254	stp		x29, x30, [sp, #-16]!
255	mov		x29, sp
256
257	enc_prepare	w3, x2, x6
258	ld1		{v4.16b}, [x5]
259
260	umov		x6, v4.d[1]		/* keep swabbed ctr in reg */
261	rev		x6, x6
262	cmn		w6, w4			/* 32 bit overflow? */
263	bcs		.Lctrloop
264.LctrloopNx:
265	subs		w4, w4, #4
266	bmi		.Lctr1x
267	add		w7, w6, #1
268	mov		v0.16b, v4.16b
269	add		w8, w6, #2
270	mov		v1.16b, v4.16b
271	add		w9, w6, #3
272	mov		v2.16b, v4.16b
273	rev		w7, w7
274	mov		v3.16b, v4.16b
275	rev		w8, w8
276	mov		v1.s[3], w7
277	rev		w9, w9
278	mov		v2.s[3], w8
279	mov		v3.s[3], w9
280	ld1		{v5.16b-v7.16b}, [x1], #48	/* get 3 input blocks */
281	bl		aes_encrypt_block4x
282	eor		v0.16b, v5.16b, v0.16b
283	ld1		{v5.16b}, [x1], #16		/* get 1 input block  */
284	eor		v1.16b, v6.16b, v1.16b
285	eor		v2.16b, v7.16b, v2.16b
286	eor		v3.16b, v5.16b, v3.16b
287	st1		{v0.16b-v3.16b}, [x0], #64
288	add		x6, x6, #4
289	rev		x7, x6
290	ins		v4.d[1], x7
291	cbz		w4, .Lctrout
292	b		.LctrloopNx
293.Lctr1x:
294	adds		w4, w4, #4
295	beq		.Lctrout
296.Lctrloop:
297	mov		v0.16b, v4.16b
298	encrypt_block	v0, w3, x2, x8, w7
299
300	adds		x6, x6, #1		/* increment BE ctr */
301	rev		x7, x6
302	ins		v4.d[1], x7
303	bcs		.Lctrcarry		/* overflow? */
304
305.Lctrcarrydone:
306	subs		w4, w4, #1
307	bmi		.Lctrtailblock		/* blocks <0 means tail block */
308	ld1		{v3.16b}, [x1], #16
309	eor		v3.16b, v0.16b, v3.16b
310	st1		{v3.16b}, [x0], #16
311	bne		.Lctrloop
312
313.Lctrout:
314	st1		{v4.16b}, [x5]		/* return next CTR value */
315	ldp		x29, x30, [sp], #16
316	ret
317
318.Lctrtailblock:
319	st1		{v0.16b}, [x0]
320	b		.Lctrout
321
322.Lctrcarry:
323	umov		x7, v4.d[0]		/* load upper word of ctr  */
324	rev		x7, x7			/* ... to handle the carry */
325	add		x7, x7, #1
326	rev		x7, x7
327	ins		v4.d[0], x7
328	b		.Lctrcarrydone
329AES_ENDPROC(aes_ctr_encrypt)
330
331
332	/*
333	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
334	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
335	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
336	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
337	 */
338
339	.macro		next_tweak, out, in, tmp
340	sshr		\tmp\().2d,  \in\().2d,   #63
341	and		\tmp\().16b, \tmp\().16b, xtsmask.16b
342	add		\out\().2d,  \in\().2d,   \in\().2d
343	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
344	eor		\out\().16b, \out\().16b, \tmp\().16b
345	.endm
346
347	.macro		xts_load_mask, tmp
348	movi		xtsmask.2s, #0x1
349	movi		\tmp\().2s, #0x87
350	uzp1		xtsmask.4s, xtsmask.4s, \tmp\().4s
351	.endm
352
353AES_ENTRY(aes_xts_encrypt)
354	stp		x29, x30, [sp, #-16]!
355	mov		x29, sp
356
357	ld1		{v4.16b}, [x6]
358	xts_load_mask	v8
359	cbz		w7, .Lxtsencnotfirst
360
361	enc_prepare	w3, x5, x8
362	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
363	enc_switch_key	w3, x2, x8
364	b		.LxtsencNx
365
366.Lxtsencnotfirst:
367	enc_prepare	w3, x2, x8
368.LxtsencloopNx:
369	next_tweak	v4, v4, v8
370.LxtsencNx:
371	subs		w4, w4, #4
372	bmi		.Lxtsenc1x
373	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
374	next_tweak	v5, v4, v8
375	eor		v0.16b, v0.16b, v4.16b
376	next_tweak	v6, v5, v8
377	eor		v1.16b, v1.16b, v5.16b
378	eor		v2.16b, v2.16b, v6.16b
379	next_tweak	v7, v6, v8
380	eor		v3.16b, v3.16b, v7.16b
381	bl		aes_encrypt_block4x
382	eor		v3.16b, v3.16b, v7.16b
383	eor		v0.16b, v0.16b, v4.16b
384	eor		v1.16b, v1.16b, v5.16b
385	eor		v2.16b, v2.16b, v6.16b
386	st1		{v0.16b-v3.16b}, [x0], #64
387	mov		v4.16b, v7.16b
388	cbz		w4, .Lxtsencout
389	xts_reload_mask	v8
390	b		.LxtsencloopNx
391.Lxtsenc1x:
392	adds		w4, w4, #4
393	beq		.Lxtsencout
394.Lxtsencloop:
395	ld1		{v1.16b}, [x1], #16
396	eor		v0.16b, v1.16b, v4.16b
397	encrypt_block	v0, w3, x2, x8, w7
398	eor		v0.16b, v0.16b, v4.16b
399	st1		{v0.16b}, [x0], #16
400	subs		w4, w4, #1
401	beq		.Lxtsencout
402	next_tweak	v4, v4, v8
403	b		.Lxtsencloop
404.Lxtsencout:
405	st1		{v4.16b}, [x6]
406	ldp		x29, x30, [sp], #16
407	ret
408AES_ENDPROC(aes_xts_encrypt)
409
410
411AES_ENTRY(aes_xts_decrypt)
412	stp		x29, x30, [sp, #-16]!
413	mov		x29, sp
414
415	ld1		{v4.16b}, [x6]
416	xts_load_mask	v8
417	cbz		w7, .Lxtsdecnotfirst
418
419	enc_prepare	w3, x5, x8
420	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
421	dec_prepare	w3, x2, x8
422	b		.LxtsdecNx
423
424.Lxtsdecnotfirst:
425	dec_prepare	w3, x2, x8
426.LxtsdecloopNx:
427	next_tweak	v4, v4, v8
428.LxtsdecNx:
429	subs		w4, w4, #4
430	bmi		.Lxtsdec1x
431	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
432	next_tweak	v5, v4, v8
433	eor		v0.16b, v0.16b, v4.16b
434	next_tweak	v6, v5, v8
435	eor		v1.16b, v1.16b, v5.16b
436	eor		v2.16b, v2.16b, v6.16b
437	next_tweak	v7, v6, v8
438	eor		v3.16b, v3.16b, v7.16b
439	bl		aes_decrypt_block4x
440	eor		v3.16b, v3.16b, v7.16b
441	eor		v0.16b, v0.16b, v4.16b
442	eor		v1.16b, v1.16b, v5.16b
443	eor		v2.16b, v2.16b, v6.16b
444	st1		{v0.16b-v3.16b}, [x0], #64
445	mov		v4.16b, v7.16b
446	cbz		w4, .Lxtsdecout
447	xts_reload_mask	v8
448	b		.LxtsdecloopNx
449.Lxtsdec1x:
450	adds		w4, w4, #4
451	beq		.Lxtsdecout
452.Lxtsdecloop:
453	ld1		{v1.16b}, [x1], #16
454	eor		v0.16b, v1.16b, v4.16b
455	decrypt_block	v0, w3, x2, x8, w7
456	eor		v0.16b, v0.16b, v4.16b
457	st1		{v0.16b}, [x0], #16
458	subs		w4, w4, #1
459	beq		.Lxtsdecout
460	next_tweak	v4, v4, v8
461	b		.Lxtsdecloop
462.Lxtsdecout:
463	st1		{v4.16b}, [x6]
464	ldp		x29, x30, [sp], #16
465	ret
466AES_ENDPROC(aes_xts_decrypt)
467
468	/*
469	 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
470	 *		  int blocks, u8 dg[], int enc_before, int enc_after)
471	 */
472AES_ENTRY(aes_mac_update)
473	frame_push	6
474
475	mov		x19, x0
476	mov		x20, x1
477	mov		x21, x2
478	mov		x22, x3
479	mov		x23, x4
480	mov		x24, x6
481
482	ld1		{v0.16b}, [x23]			/* get dg */
483	enc_prepare	w2, x1, x7
484	cbz		w5, .Lmacloop4x
485
486	encrypt_block	v0, w2, x1, x7, w8
487
488.Lmacloop4x:
489	subs		w22, w22, #4
490	bmi		.Lmac1x
491	ld1		{v1.16b-v4.16b}, [x19], #64	/* get next pt block */
492	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
493	encrypt_block	v0, w21, x20, x7, w8
494	eor		v0.16b, v0.16b, v2.16b
495	encrypt_block	v0, w21, x20, x7, w8
496	eor		v0.16b, v0.16b, v3.16b
497	encrypt_block	v0, w21, x20, x7, w8
498	eor		v0.16b, v0.16b, v4.16b
499	cmp		w22, wzr
500	csinv		x5, x24, xzr, eq
501	cbz		w5, .Lmacout
502	encrypt_block	v0, w21, x20, x7, w8
503	st1		{v0.16b}, [x23]			/* return dg */
504	cond_yield_neon	.Lmacrestart
505	b		.Lmacloop4x
506.Lmac1x:
507	add		w22, w22, #4
508.Lmacloop:
509	cbz		w22, .Lmacout
510	ld1		{v1.16b}, [x19], #16		/* get next pt block */
511	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
512
513	subs		w22, w22, #1
514	csinv		x5, x24, xzr, eq
515	cbz		w5, .Lmacout
516
517.Lmacenc:
518	encrypt_block	v0, w21, x20, x7, w8
519	b		.Lmacloop
520
521.Lmacout:
522	st1		{v0.16b}, [x23]			/* return dg */
523	frame_pop
524	ret
525
526.Lmacrestart:
527	ld1		{v0.16b}, [x23]			/* get dg */
528	enc_prepare	w21, x20, x0
529	b		.Lmacloop4x
530AES_ENDPROC(aes_mac_update)
531