xref: /openbmc/linux/arch/arm64/crypto/aes-modes.S (revision ba61bb17)
1/*
2 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
3 *
4 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/* included by aes-ce.S and aes-neon.S */
12
13	.text
14	.align		4
15
16aes_encrypt_block4x:
17	encrypt_block4x	v0, v1, v2, v3, w22, x21, x8, w7
18	ret
19ENDPROC(aes_encrypt_block4x)
20
21aes_decrypt_block4x:
22	decrypt_block4x	v0, v1, v2, v3, w22, x21, x8, w7
23	ret
24ENDPROC(aes_decrypt_block4x)
25
26	/*
27	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
28	 *		   int blocks)
29	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
30	 *		   int blocks)
31	 */
32
33AES_ENTRY(aes_ecb_encrypt)
34	frame_push	5
35
36	mov		x19, x0
37	mov		x20, x1
38	mov		x21, x2
39	mov		x22, x3
40	mov		x23, x4
41
42.Lecbencrestart:
43	enc_prepare	w22, x21, x5
44
45.LecbencloopNx:
46	subs		w23, w23, #4
47	bmi		.Lecbenc1x
48	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 pt blocks */
49	bl		aes_encrypt_block4x
50	st1		{v0.16b-v3.16b}, [x19], #64
51	cond_yield_neon	.Lecbencrestart
52	b		.LecbencloopNx
53.Lecbenc1x:
54	adds		w23, w23, #4
55	beq		.Lecbencout
56.Lecbencloop:
57	ld1		{v0.16b}, [x20], #16		/* get next pt block */
58	encrypt_block	v0, w22, x21, x5, w6
59	st1		{v0.16b}, [x19], #16
60	subs		w23, w23, #1
61	bne		.Lecbencloop
62.Lecbencout:
63	frame_pop
64	ret
65AES_ENDPROC(aes_ecb_encrypt)
66
67
68AES_ENTRY(aes_ecb_decrypt)
69	frame_push	5
70
71	mov		x19, x0
72	mov		x20, x1
73	mov		x21, x2
74	mov		x22, x3
75	mov		x23, x4
76
77.Lecbdecrestart:
78	dec_prepare	w22, x21, x5
79
80.LecbdecloopNx:
81	subs		w23, w23, #4
82	bmi		.Lecbdec1x
83	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 ct blocks */
84	bl		aes_decrypt_block4x
85	st1		{v0.16b-v3.16b}, [x19], #64
86	cond_yield_neon	.Lecbdecrestart
87	b		.LecbdecloopNx
88.Lecbdec1x:
89	adds		w23, w23, #4
90	beq		.Lecbdecout
91.Lecbdecloop:
92	ld1		{v0.16b}, [x20], #16		/* get next ct block */
93	decrypt_block	v0, w22, x21, x5, w6
94	st1		{v0.16b}, [x19], #16
95	subs		w23, w23, #1
96	bne		.Lecbdecloop
97.Lecbdecout:
98	frame_pop
99	ret
100AES_ENDPROC(aes_ecb_decrypt)
101
102
103	/*
104	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
105	 *		   int blocks, u8 iv[])
106	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
107	 *		   int blocks, u8 iv[])
108	 */
109
110AES_ENTRY(aes_cbc_encrypt)
111	frame_push	6
112
113	mov		x19, x0
114	mov		x20, x1
115	mov		x21, x2
116	mov		x22, x3
117	mov		x23, x4
118	mov		x24, x5
119
120.Lcbcencrestart:
121	ld1		{v4.16b}, [x24]			/* get iv */
122	enc_prepare	w22, x21, x6
123
124.Lcbcencloop4x:
125	subs		w23, w23, #4
126	bmi		.Lcbcenc1x
127	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 pt blocks */
128	eor		v0.16b, v0.16b, v4.16b		/* ..and xor with iv */
129	encrypt_block	v0, w22, x21, x6, w7
130	eor		v1.16b, v1.16b, v0.16b
131	encrypt_block	v1, w22, x21, x6, w7
132	eor		v2.16b, v2.16b, v1.16b
133	encrypt_block	v2, w22, x21, x6, w7
134	eor		v3.16b, v3.16b, v2.16b
135	encrypt_block	v3, w22, x21, x6, w7
136	st1		{v0.16b-v3.16b}, [x19], #64
137	mov		v4.16b, v3.16b
138	st1		{v4.16b}, [x24]			/* return iv */
139	cond_yield_neon	.Lcbcencrestart
140	b		.Lcbcencloop4x
141.Lcbcenc1x:
142	adds		w23, w23, #4
143	beq		.Lcbcencout
144.Lcbcencloop:
145	ld1		{v0.16b}, [x20], #16		/* get next pt block */
146	eor		v4.16b, v4.16b, v0.16b		/* ..and xor with iv */
147	encrypt_block	v4, w22, x21, x6, w7
148	st1		{v4.16b}, [x19], #16
149	subs		w23, w23, #1
150	bne		.Lcbcencloop
151.Lcbcencout:
152	st1		{v4.16b}, [x24]			/* return iv */
153	frame_pop
154	ret
155AES_ENDPROC(aes_cbc_encrypt)
156
157
158AES_ENTRY(aes_cbc_decrypt)
159	frame_push	6
160
161	mov		x19, x0
162	mov		x20, x1
163	mov		x21, x2
164	mov		x22, x3
165	mov		x23, x4
166	mov		x24, x5
167
168.Lcbcdecrestart:
169	ld1		{v7.16b}, [x24]			/* get iv */
170	dec_prepare	w22, x21, x6
171
172.LcbcdecloopNx:
173	subs		w23, w23, #4
174	bmi		.Lcbcdec1x
175	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 ct blocks */
176	mov		v4.16b, v0.16b
177	mov		v5.16b, v1.16b
178	mov		v6.16b, v2.16b
179	bl		aes_decrypt_block4x
180	sub		x20, x20, #16
181	eor		v0.16b, v0.16b, v7.16b
182	eor		v1.16b, v1.16b, v4.16b
183	ld1		{v7.16b}, [x20], #16		/* reload 1 ct block */
184	eor		v2.16b, v2.16b, v5.16b
185	eor		v3.16b, v3.16b, v6.16b
186	st1		{v0.16b-v3.16b}, [x19], #64
187	st1		{v7.16b}, [x24]			/* return iv */
188	cond_yield_neon	.Lcbcdecrestart
189	b		.LcbcdecloopNx
190.Lcbcdec1x:
191	adds		w23, w23, #4
192	beq		.Lcbcdecout
193.Lcbcdecloop:
194	ld1		{v1.16b}, [x20], #16		/* get next ct block */
195	mov		v0.16b, v1.16b			/* ...and copy to v0 */
196	decrypt_block	v0, w22, x21, x6, w7
197	eor		v0.16b, v0.16b, v7.16b		/* xor with iv => pt */
198	mov		v7.16b, v1.16b			/* ct is next iv */
199	st1		{v0.16b}, [x19], #16
200	subs		w23, w23, #1
201	bne		.Lcbcdecloop
202.Lcbcdecout:
203	st1		{v7.16b}, [x24]			/* return iv */
204	frame_pop
205	ret
206AES_ENDPROC(aes_cbc_decrypt)
207
208
209	/*
210	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
211	 *		   int blocks, u8 ctr[])
212	 */
213
214AES_ENTRY(aes_ctr_encrypt)
215	frame_push	6
216
217	mov		x19, x0
218	mov		x20, x1
219	mov		x21, x2
220	mov		x22, x3
221	mov		x23, x4
222	mov		x24, x5
223
224.Lctrrestart:
225	enc_prepare	w22, x21, x6
226	ld1		{v4.16b}, [x24]
227
228	umov		x6, v4.d[1]		/* keep swabbed ctr in reg */
229	rev		x6, x6
230.LctrloopNx:
231	subs		w23, w23, #4
232	bmi		.Lctr1x
233	cmn		w6, #4			/* 32 bit overflow? */
234	bcs		.Lctr1x
235	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
236	dup		v7.4s, w6
237	mov		v0.16b, v4.16b
238	add		v7.4s, v7.4s, v8.4s
239	mov		v1.16b, v4.16b
240	rev32		v8.16b, v7.16b
241	mov		v2.16b, v4.16b
242	mov		v3.16b, v4.16b
243	mov		v1.s[3], v8.s[0]
244	mov		v2.s[3], v8.s[1]
245	mov		v3.s[3], v8.s[2]
246	ld1		{v5.16b-v7.16b}, [x20], #48	/* get 3 input blocks */
247	bl		aes_encrypt_block4x
248	eor		v0.16b, v5.16b, v0.16b
249	ld1		{v5.16b}, [x20], #16		/* get 1 input block  */
250	eor		v1.16b, v6.16b, v1.16b
251	eor		v2.16b, v7.16b, v2.16b
252	eor		v3.16b, v5.16b, v3.16b
253	st1		{v0.16b-v3.16b}, [x19], #64
254	add		x6, x6, #4
255	rev		x7, x6
256	ins		v4.d[1], x7
257	cbz		w23, .Lctrout
258	st1		{v4.16b}, [x24]		/* return next CTR value */
259	cond_yield_neon	.Lctrrestart
260	b		.LctrloopNx
261.Lctr1x:
262	adds		w23, w23, #4
263	beq		.Lctrout
264.Lctrloop:
265	mov		v0.16b, v4.16b
266	encrypt_block	v0, w22, x21, x8, w7
267
268	adds		x6, x6, #1		/* increment BE ctr */
269	rev		x7, x6
270	ins		v4.d[1], x7
271	bcs		.Lctrcarry		/* overflow? */
272
273.Lctrcarrydone:
274	subs		w23, w23, #1
275	bmi		.Lctrtailblock		/* blocks <0 means tail block */
276	ld1		{v3.16b}, [x20], #16
277	eor		v3.16b, v0.16b, v3.16b
278	st1		{v3.16b}, [x19], #16
279	bne		.Lctrloop
280
281.Lctrout:
282	st1		{v4.16b}, [x24]		/* return next CTR value */
283.Lctrret:
284	frame_pop
285	ret
286
287.Lctrtailblock:
288	st1		{v0.16b}, [x19]
289	b		.Lctrret
290
291.Lctrcarry:
292	umov		x7, v4.d[0]		/* load upper word of ctr  */
293	rev		x7, x7			/* ... to handle the carry */
294	add		x7, x7, #1
295	rev		x7, x7
296	ins		v4.d[0], x7
297	b		.Lctrcarrydone
298AES_ENDPROC(aes_ctr_encrypt)
299	.ltorg
300
301
302	/*
303	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
304	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
305	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
306	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
307	 */
308
309	.macro		next_tweak, out, in, const, tmp
310	sshr		\tmp\().2d,  \in\().2d,   #63
311	and		\tmp\().16b, \tmp\().16b, \const\().16b
312	add		\out\().2d,  \in\().2d,   \in\().2d
313	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
314	eor		\out\().16b, \out\().16b, \tmp\().16b
315	.endm
316
317.Lxts_mul_x:
318CPU_LE(	.quad		1, 0x87		)
319CPU_BE(	.quad		0x87, 1		)
320
321AES_ENTRY(aes_xts_encrypt)
322	frame_push	6
323
324	mov		x19, x0
325	mov		x20, x1
326	mov		x21, x2
327	mov		x22, x3
328	mov		x23, x4
329	mov		x24, x6
330
331	ld1		{v4.16b}, [x24]
332	cbz		w7, .Lxtsencnotfirst
333
334	enc_prepare	w3, x5, x8
335	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
336	enc_switch_key	w3, x2, x8
337	ldr		q7, .Lxts_mul_x
338	b		.LxtsencNx
339
340.Lxtsencrestart:
341	ld1		{v4.16b}, [x24]
342.Lxtsencnotfirst:
343	enc_prepare	w22, x21, x8
344.LxtsencloopNx:
345	ldr		q7, .Lxts_mul_x
346	next_tweak	v4, v4, v7, v8
347.LxtsencNx:
348	subs		w23, w23, #4
349	bmi		.Lxtsenc1x
350	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 pt blocks */
351	next_tweak	v5, v4, v7, v8
352	eor		v0.16b, v0.16b, v4.16b
353	next_tweak	v6, v5, v7, v8
354	eor		v1.16b, v1.16b, v5.16b
355	eor		v2.16b, v2.16b, v6.16b
356	next_tweak	v7, v6, v7, v8
357	eor		v3.16b, v3.16b, v7.16b
358	bl		aes_encrypt_block4x
359	eor		v3.16b, v3.16b, v7.16b
360	eor		v0.16b, v0.16b, v4.16b
361	eor		v1.16b, v1.16b, v5.16b
362	eor		v2.16b, v2.16b, v6.16b
363	st1		{v0.16b-v3.16b}, [x19], #64
364	mov		v4.16b, v7.16b
365	cbz		w23, .Lxtsencout
366	st1		{v4.16b}, [x24]
367	cond_yield_neon	.Lxtsencrestart
368	b		.LxtsencloopNx
369.Lxtsenc1x:
370	adds		w23, w23, #4
371	beq		.Lxtsencout
372.Lxtsencloop:
373	ld1		{v1.16b}, [x20], #16
374	eor		v0.16b, v1.16b, v4.16b
375	encrypt_block	v0, w22, x21, x8, w7
376	eor		v0.16b, v0.16b, v4.16b
377	st1		{v0.16b}, [x19], #16
378	subs		w23, w23, #1
379	beq		.Lxtsencout
380	next_tweak	v4, v4, v7, v8
381	b		.Lxtsencloop
382.Lxtsencout:
383	st1		{v4.16b}, [x24]
384	frame_pop
385	ret
386AES_ENDPROC(aes_xts_encrypt)
387
388
389AES_ENTRY(aes_xts_decrypt)
390	frame_push	6
391
392	mov		x19, x0
393	mov		x20, x1
394	mov		x21, x2
395	mov		x22, x3
396	mov		x23, x4
397	mov		x24, x6
398
399	ld1		{v4.16b}, [x24]
400	cbz		w7, .Lxtsdecnotfirst
401
402	enc_prepare	w3, x5, x8
403	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
404	dec_prepare	w3, x2, x8
405	ldr		q7, .Lxts_mul_x
406	b		.LxtsdecNx
407
408.Lxtsdecrestart:
409	ld1		{v4.16b}, [x24]
410.Lxtsdecnotfirst:
411	dec_prepare	w22, x21, x8
412.LxtsdecloopNx:
413	ldr		q7, .Lxts_mul_x
414	next_tweak	v4, v4, v7, v8
415.LxtsdecNx:
416	subs		w23, w23, #4
417	bmi		.Lxtsdec1x
418	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 ct blocks */
419	next_tweak	v5, v4, v7, v8
420	eor		v0.16b, v0.16b, v4.16b
421	next_tweak	v6, v5, v7, v8
422	eor		v1.16b, v1.16b, v5.16b
423	eor		v2.16b, v2.16b, v6.16b
424	next_tweak	v7, v6, v7, v8
425	eor		v3.16b, v3.16b, v7.16b
426	bl		aes_decrypt_block4x
427	eor		v3.16b, v3.16b, v7.16b
428	eor		v0.16b, v0.16b, v4.16b
429	eor		v1.16b, v1.16b, v5.16b
430	eor		v2.16b, v2.16b, v6.16b
431	st1		{v0.16b-v3.16b}, [x19], #64
432	mov		v4.16b, v7.16b
433	cbz		w23, .Lxtsdecout
434	st1		{v4.16b}, [x24]
435	cond_yield_neon	.Lxtsdecrestart
436	b		.LxtsdecloopNx
437.Lxtsdec1x:
438	adds		w23, w23, #4
439	beq		.Lxtsdecout
440.Lxtsdecloop:
441	ld1		{v1.16b}, [x20], #16
442	eor		v0.16b, v1.16b, v4.16b
443	decrypt_block	v0, w22, x21, x8, w7
444	eor		v0.16b, v0.16b, v4.16b
445	st1		{v0.16b}, [x19], #16
446	subs		w23, w23, #1
447	beq		.Lxtsdecout
448	next_tweak	v4, v4, v7, v8
449	b		.Lxtsdecloop
450.Lxtsdecout:
451	st1		{v4.16b}, [x24]
452	frame_pop
453	ret
454AES_ENDPROC(aes_xts_decrypt)
455
456	/*
457	 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
458	 *		  int blocks, u8 dg[], int enc_before, int enc_after)
459	 */
460AES_ENTRY(aes_mac_update)
461	frame_push	6
462
463	mov		x19, x0
464	mov		x20, x1
465	mov		x21, x2
466	mov		x22, x3
467	mov		x23, x4
468	mov		x24, x6
469
470	ld1		{v0.16b}, [x23]			/* get dg */
471	enc_prepare	w2, x1, x7
472	cbz		w5, .Lmacloop4x
473
474	encrypt_block	v0, w2, x1, x7, w8
475
476.Lmacloop4x:
477	subs		w22, w22, #4
478	bmi		.Lmac1x
479	ld1		{v1.16b-v4.16b}, [x19], #64	/* get next pt block */
480	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
481	encrypt_block	v0, w21, x20, x7, w8
482	eor		v0.16b, v0.16b, v2.16b
483	encrypt_block	v0, w21, x20, x7, w8
484	eor		v0.16b, v0.16b, v3.16b
485	encrypt_block	v0, w21, x20, x7, w8
486	eor		v0.16b, v0.16b, v4.16b
487	cmp		w22, wzr
488	csinv		x5, x24, xzr, eq
489	cbz		w5, .Lmacout
490	encrypt_block	v0, w21, x20, x7, w8
491	st1		{v0.16b}, [x23]			/* return dg */
492	cond_yield_neon	.Lmacrestart
493	b		.Lmacloop4x
494.Lmac1x:
495	add		w22, w22, #4
496.Lmacloop:
497	cbz		w22, .Lmacout
498	ld1		{v1.16b}, [x19], #16		/* get next pt block */
499	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
500
501	subs		w22, w22, #1
502	csinv		x5, x24, xzr, eq
503	cbz		w5, .Lmacout
504
505.Lmacenc:
506	encrypt_block	v0, w21, x20, x7, w8
507	b		.Lmacloop
508
509.Lmacout:
510	st1		{v0.16b}, [x23]			/* return dg */
511	frame_pop
512	ret
513
514.Lmacrestart:
515	ld1		{v0.16b}, [x23]			/* get dg */
516	enc_prepare	w21, x20, x0
517	b		.Lmacloop4x
518AES_ENDPROC(aes_mac_update)
519