1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
4 *
5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
6 */
7
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10
11	.text
12	.arch	armv8-a+crypto
13
14	/*
15	 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
16	 *			     u32 *macp, u8 const rk[], u32 rounds);
17	 */
18SYM_FUNC_START(ce_aes_ccm_auth_data)
19	ldr	w8, [x3]			/* leftover from prev round? */
20	ld1	{v0.16b}, [x0]			/* load mac */
21	cbz	w8, 1f
22	sub	w8, w8, #16
23	eor	v1.16b, v1.16b, v1.16b
240:	ldrb	w7, [x1], #1			/* get 1 byte of input */
25	subs	w2, w2, #1
26	add	w8, w8, #1
27	ins	v1.b[0], w7
28	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
29	beq	8f				/* out of input? */
30	cbnz	w8, 0b
31	eor	v0.16b, v0.16b, v1.16b
321:	ld1	{v3.4s}, [x4]			/* load first round key */
33	prfm	pldl1strm, [x1]
34	cmp	w5, #12				/* which key size? */
35	add	x6, x4, #16
36	sub	w7, w5, #2			/* modified # of rounds */
37	bmi	2f
38	bne	5f
39	mov	v5.16b, v3.16b
40	b	4f
412:	mov	v4.16b, v3.16b
42	ld1	{v5.4s}, [x6], #16		/* load 2nd round key */
433:	aese	v0.16b, v4.16b
44	aesmc	v0.16b, v0.16b
454:	ld1	{v3.4s}, [x6], #16		/* load next round key */
46	aese	v0.16b, v5.16b
47	aesmc	v0.16b, v0.16b
485:	ld1	{v4.4s}, [x6], #16		/* load next round key */
49	subs	w7, w7, #3
50	aese	v0.16b, v3.16b
51	aesmc	v0.16b, v0.16b
52	ld1	{v5.4s}, [x6], #16		/* load next round key */
53	bpl	3b
54	aese	v0.16b, v4.16b
55	subs	w2, w2, #16			/* last data? */
56	eor	v0.16b, v0.16b, v5.16b		/* final round */
57	bmi	6f
58	ld1	{v1.16b}, [x1], #16		/* load next input block */
59	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
60	bne	1b
616:	st1	{v0.16b}, [x0]			/* store mac */
62	beq	10f
63	adds	w2, w2, #16
64	beq	10f
65	mov	w8, w2
667:	ldrb	w7, [x1], #1
67	umov	w6, v0.b[0]
68	eor	w6, w6, w7
69	strb	w6, [x0], #1
70	subs	w2, w2, #1
71	beq	10f
72	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
73	b	7b
748:	cbz	w8, 91f
75	mov	w7, w8
76	add	w8, w8, #16
779:	ext	v1.16b, v1.16b, v1.16b, #1
78	adds	w7, w7, #1
79	bne	9b
8091:	eor	v0.16b, v0.16b, v1.16b
81	st1	{v0.16b}, [x0]
8210:	str	w8, [x3]
83	ret
84SYM_FUNC_END(ce_aes_ccm_auth_data)
85
86	/*
87	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
88	 * 			 u32 rounds);
89	 */
90SYM_FUNC_START(ce_aes_ccm_final)
91	ld1	{v3.4s}, [x2], #16		/* load first round key */
92	ld1	{v0.16b}, [x0]			/* load mac */
93	cmp	w3, #12				/* which key size? */
94	sub	w3, w3, #2			/* modified # of rounds */
95	ld1	{v1.16b}, [x1]			/* load 1st ctriv */
96	bmi	0f
97	bne	3f
98	mov	v5.16b, v3.16b
99	b	2f
1000:	mov	v4.16b, v3.16b
1011:	ld1	{v5.4s}, [x2], #16		/* load next round key */
102	aese	v0.16b, v4.16b
103	aesmc	v0.16b, v0.16b
104	aese	v1.16b, v4.16b
105	aesmc	v1.16b, v1.16b
1062:	ld1	{v3.4s}, [x2], #16		/* load next round key */
107	aese	v0.16b, v5.16b
108	aesmc	v0.16b, v0.16b
109	aese	v1.16b, v5.16b
110	aesmc	v1.16b, v1.16b
1113:	ld1	{v4.4s}, [x2], #16		/* load next round key */
112	subs	w3, w3, #3
113	aese	v0.16b, v3.16b
114	aesmc	v0.16b, v0.16b
115	aese	v1.16b, v3.16b
116	aesmc	v1.16b, v1.16b
117	bpl	1b
118	aese	v0.16b, v4.16b
119	aese	v1.16b, v4.16b
120	/* final round key cancels out */
121	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
122	st1	{v0.16b}, [x0]			/* store result */
123	ret
124SYM_FUNC_END(ce_aes_ccm_final)
125
126	.macro	aes_ccm_do_crypt,enc
127	ldr	x8, [x6, #8]			/* load lower ctr */
128	ld1	{v0.16b}, [x5]			/* load mac */
129CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
1300:	/* outer loop */
131	ld1	{v1.8b}, [x6]			/* load upper ctr */
132	prfm	pldl1strm, [x1]
133	add	x8, x8, #1
134	rev	x9, x8
135	cmp	w4, #12				/* which key size? */
136	sub	w7, w4, #2			/* get modified # of rounds */
137	ins	v1.d[1], x9			/* no carry in lower ctr */
138	ld1	{v3.4s}, [x3]			/* load first round key */
139	add	x10, x3, #16
140	bmi	1f
141	bne	4f
142	mov	v5.16b, v3.16b
143	b	3f
1441:	mov	v4.16b, v3.16b
145	ld1	{v5.4s}, [x10], #16		/* load 2nd round key */
1462:	/* inner loop: 3 rounds, 2x interleaved */
147	aese	v0.16b, v4.16b
148	aesmc	v0.16b, v0.16b
149	aese	v1.16b, v4.16b
150	aesmc	v1.16b, v1.16b
1513:	ld1	{v3.4s}, [x10], #16		/* load next round key */
152	aese	v0.16b, v5.16b
153	aesmc	v0.16b, v0.16b
154	aese	v1.16b, v5.16b
155	aesmc	v1.16b, v1.16b
1564:	ld1	{v4.4s}, [x10], #16		/* load next round key */
157	subs	w7, w7, #3
158	aese	v0.16b, v3.16b
159	aesmc	v0.16b, v0.16b
160	aese	v1.16b, v3.16b
161	aesmc	v1.16b, v1.16b
162	ld1	{v5.4s}, [x10], #16		/* load next round key */
163	bpl	2b
164	aese	v0.16b, v4.16b
165	aese	v1.16b, v4.16b
166	subs	w2, w2, #16
167	bmi	6f				/* partial block? */
168	ld1	{v2.16b}, [x1], #16		/* load next input block */
169	.if	\enc == 1
170	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
171	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
172	.else
173	eor	v2.16b, v2.16b, v1.16b		/* xor with crypted ctr */
174	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
175	.endif
176	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
177	st1	{v1.16b}, [x0], #16		/* write output block */
178	bne	0b
179CPU_LE(	rev	x8, x8			)
180	st1	{v0.16b}, [x5]			/* store mac */
181	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
1825:	ret
183
1846:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
185	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
186	st1	{v0.16b}, [x5]			/* store mac */
187	add	w2, w2, #16			/* process partial tail block */
1887:	ldrb	w9, [x1], #1			/* get 1 byte of input */
189	umov	w6, v1.b[0]			/* get top crypted ctr byte */
190	umov	w7, v0.b[0]			/* get top mac byte */
191	.if	\enc == 1
192	eor	w7, w7, w9
193	eor	w9, w9, w6
194	.else
195	eor	w9, w9, w6
196	eor	w7, w7, w9
197	.endif
198	strb	w9, [x0], #1			/* store out byte */
199	strb	w7, [x5], #1			/* store mac byte */
200	subs	w2, w2, #1
201	beq	5b
202	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
203	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
204	b	7b
205	.endm
206
207	/*
208	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
209	 * 			   u8 const rk[], u32 rounds, u8 mac[],
210	 * 			   u8 ctr[]);
211	 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
212	 * 			   u8 const rk[], u32 rounds, u8 mac[],
213	 * 			   u8 ctr[]);
214	 */
215SYM_FUNC_START(ce_aes_ccm_encrypt)
216	aes_ccm_do_crypt	1
217SYM_FUNC_END(ce_aes_ccm_encrypt)
218
219SYM_FUNC_START(ce_aes_ccm_decrypt)
220	aes_ccm_do_crypt	0
221SYM_FUNC_END(ce_aes_ccm_decrypt)
222