1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
4 *
5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
6 */
7
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10
11	.text
12	.arch	armv8-a+crypto
13
14	/*
15	 * u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
16	 *			    u32 macp, u8 const rk[], u32 rounds);
17	 */
18SYM_FUNC_START(ce_aes_ccm_auth_data)
19	ld1	{v0.16b}, [x0]			/* load mac */
20	cbz	w3, 1f
21	sub	w3, w3, #16
22	eor	v1.16b, v1.16b, v1.16b
230:	ldrb	w7, [x1], #1			/* get 1 byte of input */
24	subs	w2, w2, #1
25	add	w3, w3, #1
26	ins	v1.b[0], w7
27	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
28	beq	8f				/* out of input? */
29	cbnz	w3, 0b
30	eor	v0.16b, v0.16b, v1.16b
311:	ld1	{v3.4s}, [x4]			/* load first round key */
32	prfm	pldl1strm, [x1]
33	cmp	w5, #12				/* which key size? */
34	add	x6, x4, #16
35	sub	w7, w5, #2			/* modified # of rounds */
36	bmi	2f
37	bne	5f
38	mov	v5.16b, v3.16b
39	b	4f
402:	mov	v4.16b, v3.16b
41	ld1	{v5.4s}, [x6], #16		/* load 2nd round key */
423:	aese	v0.16b, v4.16b
43	aesmc	v0.16b, v0.16b
444:	ld1	{v3.4s}, [x6], #16		/* load next round key */
45	aese	v0.16b, v5.16b
46	aesmc	v0.16b, v0.16b
475:	ld1	{v4.4s}, [x6], #16		/* load next round key */
48	subs	w7, w7, #3
49	aese	v0.16b, v3.16b
50	aesmc	v0.16b, v0.16b
51	ld1	{v5.4s}, [x6], #16		/* load next round key */
52	bpl	3b
53	aese	v0.16b, v4.16b
54	subs	w2, w2, #16			/* last data? */
55	eor	v0.16b, v0.16b, v5.16b		/* final round */
56	bmi	6f
57	ld1	{v1.16b}, [x1], #16		/* load next input block */
58	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
59	bne	1b
606:	st1	{v0.16b}, [x0]			/* store mac */
61	beq	10f
62	adds	w2, w2, #16
63	beq	10f
64	mov	w3, w2
657:	ldrb	w7, [x1], #1
66	umov	w6, v0.b[0]
67	eor	w6, w6, w7
68	strb	w6, [x0], #1
69	subs	w2, w2, #1
70	beq	10f
71	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
72	b	7b
738:	cbz	w3, 91f
74	mov	w7, w3
75	add	w3, w3, #16
769:	ext	v1.16b, v1.16b, v1.16b, #1
77	adds	w7, w7, #1
78	bne	9b
7991:	eor	v0.16b, v0.16b, v1.16b
80	st1	{v0.16b}, [x0]
8110:	mov	w0, w3
82	ret
83SYM_FUNC_END(ce_aes_ccm_auth_data)
84
85	/*
86	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
87	 * 			 u32 rounds);
88	 */
89SYM_FUNC_START(ce_aes_ccm_final)
90	ld1	{v3.4s}, [x2], #16		/* load first round key */
91	ld1	{v0.16b}, [x0]			/* load mac */
92	cmp	w3, #12				/* which key size? */
93	sub	w3, w3, #2			/* modified # of rounds */
94	ld1	{v1.16b}, [x1]			/* load 1st ctriv */
95	bmi	0f
96	bne	3f
97	mov	v5.16b, v3.16b
98	b	2f
990:	mov	v4.16b, v3.16b
1001:	ld1	{v5.4s}, [x2], #16		/* load next round key */
101	aese	v0.16b, v4.16b
102	aesmc	v0.16b, v0.16b
103	aese	v1.16b, v4.16b
104	aesmc	v1.16b, v1.16b
1052:	ld1	{v3.4s}, [x2], #16		/* load next round key */
106	aese	v0.16b, v5.16b
107	aesmc	v0.16b, v0.16b
108	aese	v1.16b, v5.16b
109	aesmc	v1.16b, v1.16b
1103:	ld1	{v4.4s}, [x2], #16		/* load next round key */
111	subs	w3, w3, #3
112	aese	v0.16b, v3.16b
113	aesmc	v0.16b, v0.16b
114	aese	v1.16b, v3.16b
115	aesmc	v1.16b, v1.16b
116	bpl	1b
117	aese	v0.16b, v4.16b
118	aese	v1.16b, v4.16b
119	/* final round key cancels out */
120	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
121	st1	{v0.16b}, [x0]			/* store result */
122	ret
123SYM_FUNC_END(ce_aes_ccm_final)
124
125	.macro	aes_ccm_do_crypt,enc
126	cbz	x2, 5f
127	ldr	x8, [x6, #8]			/* load lower ctr */
128	ld1	{v0.16b}, [x5]			/* load mac */
129CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
1300:	/* outer loop */
131	ld1	{v1.8b}, [x6]			/* load upper ctr */
132	prfm	pldl1strm, [x1]
133	add	x8, x8, #1
134	rev	x9, x8
135	cmp	w4, #12				/* which key size? */
136	sub	w7, w4, #2			/* get modified # of rounds */
137	ins	v1.d[1], x9			/* no carry in lower ctr */
138	ld1	{v3.4s}, [x3]			/* load first round key */
139	add	x10, x3, #16
140	bmi	1f
141	bne	4f
142	mov	v5.16b, v3.16b
143	b	3f
1441:	mov	v4.16b, v3.16b
145	ld1	{v5.4s}, [x10], #16		/* load 2nd round key */
1462:	/* inner loop: 3 rounds, 2x interleaved */
147	aese	v0.16b, v4.16b
148	aesmc	v0.16b, v0.16b
149	aese	v1.16b, v4.16b
150	aesmc	v1.16b, v1.16b
1513:	ld1	{v3.4s}, [x10], #16		/* load next round key */
152	aese	v0.16b, v5.16b
153	aesmc	v0.16b, v0.16b
154	aese	v1.16b, v5.16b
155	aesmc	v1.16b, v1.16b
1564:	ld1	{v4.4s}, [x10], #16		/* load next round key */
157	subs	w7, w7, #3
158	aese	v0.16b, v3.16b
159	aesmc	v0.16b, v0.16b
160	aese	v1.16b, v3.16b
161	aesmc	v1.16b, v1.16b
162	ld1	{v5.4s}, [x10], #16		/* load next round key */
163	bpl	2b
164	aese	v0.16b, v4.16b
165	aese	v1.16b, v4.16b
166	subs	w2, w2, #16
167	bmi	6f				/* partial block? */
168	ld1	{v2.16b}, [x1], #16		/* load next input block */
169	.if	\enc == 1
170	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
171	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
172	.else
173	eor	v2.16b, v2.16b, v1.16b		/* xor with crypted ctr */
174	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
175	.endif
176	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
177	st1	{v1.16b}, [x0], #16		/* write output block */
178	bne	0b
179CPU_LE(	rev	x8, x8			)
180	st1	{v0.16b}, [x5]			/* store mac */
181	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
1825:	ret
183
1846:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
185	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
186	st1	{v0.16b}, [x5]			/* store mac */
187	add	w2, w2, #16			/* process partial tail block */
1887:	ldrb	w9, [x1], #1			/* get 1 byte of input */
189	umov	w6, v1.b[0]			/* get top crypted ctr byte */
190	umov	w7, v0.b[0]			/* get top mac byte */
191	.if	\enc == 1
192	eor	w7, w7, w9
193	eor	w9, w9, w6
194	.else
195	eor	w9, w9, w6
196	eor	w7, w7, w9
197	.endif
198	strb	w9, [x0], #1			/* store out byte */
199	strb	w7, [x5], #1			/* store mac byte */
200	subs	w2, w2, #1
201	beq	5b
202	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
203	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
204	b	7b
205	.endm
206
207	/*
208	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
209	 * 			   u8 const rk[], u32 rounds, u8 mac[],
210	 * 			   u8 ctr[]);
211	 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
212	 * 			   u8 const rk[], u32 rounds, u8 mac[],
213	 * 			   u8 ctr[]);
214	 */
215SYM_FUNC_START(ce_aes_ccm_encrypt)
216	aes_ccm_do_crypt	1
217SYM_FUNC_END(ce_aes_ccm_encrypt)
218
219SYM_FUNC_START(ce_aes_ccm_decrypt)
220	aes_ccm_do_crypt	0
221SYM_FUNC_END(ce_aes_ccm_decrypt)
222