1/*
2 * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
3 *
4 * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
12
13	.text
14	.arch	armv8-a+crypto
15
16	/*
17	 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
18	 *			     u32 *macp, u8 const rk[], u32 rounds);
19	 */
20ENTRY(ce_aes_ccm_auth_data)
21	ldr	w8, [x3]			/* leftover from prev round? */
22	ld1	{v0.2d}, [x0]			/* load mac */
23	cbz	w8, 1f
24	sub	w8, w8, #16
25	eor	v1.16b, v1.16b, v1.16b
260:	ldrb	w7, [x1], #1			/* get 1 byte of input */
27	subs	w2, w2, #1
28	add	w8, w8, #1
29	ins	v1.b[0], w7
30	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
31	beq	8f				/* out of input? */
32	cbnz	w8, 0b
33	eor	v0.16b, v0.16b, v1.16b
341:	ld1	{v3.2d}, [x4]			/* load first round key */
35	prfm	pldl1strm, [x1]
36	cmp	w5, #12				/* which key size? */
37	add	x6, x4, #16
38	sub	w7, w5, #2			/* modified # of rounds */
39	bmi	2f
40	bne	5f
41	mov	v5.16b, v3.16b
42	b	4f
432:	mov	v4.16b, v3.16b
44	ld1	{v5.2d}, [x6], #16		/* load 2nd round key */
453:	aese	v0.16b, v4.16b
46	aesmc	v0.16b, v0.16b
474:	ld1	{v3.2d}, [x6], #16		/* load next round key */
48	aese	v0.16b, v5.16b
49	aesmc	v0.16b, v0.16b
505:	ld1	{v4.2d}, [x6], #16		/* load next round key */
51	subs	w7, w7, #3
52	aese	v0.16b, v3.16b
53	aesmc	v0.16b, v0.16b
54	ld1	{v5.2d}, [x6], #16		/* load next round key */
55	bpl	3b
56	aese	v0.16b, v4.16b
57	subs	w2, w2, #16			/* last data? */
58	eor	v0.16b, v0.16b, v5.16b		/* final round */
59	bmi	6f
60	ld1	{v1.16b}, [x1], #16		/* load next input block */
61	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
62	bne	1b
636:	st1	{v0.2d}, [x0]			/* store mac */
64	beq	10f
65	adds	w2, w2, #16
66	beq	10f
67	mov	w8, w2
687:	ldrb	w7, [x1], #1
69	umov	w6, v0.b[0]
70	eor	w6, w6, w7
71	strb	w6, [x0], #1
72	subs	w2, w2, #1
73	beq	10f
74	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
75	b	7b
768:	mov	w7, w8
77	add	w8, w8, #16
789:	ext	v1.16b, v1.16b, v1.16b, #1
79	adds	w7, w7, #1
80	bne	9b
81	eor	v0.16b, v0.16b, v1.16b
82	st1	{v0.2d}, [x0]
8310:	str	w8, [x3]
84	ret
85ENDPROC(ce_aes_ccm_auth_data)
86
87	/*
88	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
89	 * 			 u32 rounds);
90	 */
91ENTRY(ce_aes_ccm_final)
92	ld1	{v3.2d}, [x2], #16		/* load first round key */
93	ld1	{v0.2d}, [x0]			/* load mac */
94	cmp	w3, #12				/* which key size? */
95	sub	w3, w3, #2			/* modified # of rounds */
96	ld1	{v1.2d}, [x1]			/* load 1st ctriv */
97	bmi	0f
98	bne	3f
99	mov	v5.16b, v3.16b
100	b	2f
1010:	mov	v4.16b, v3.16b
1021:	ld1	{v5.2d}, [x2], #16		/* load next round key */
103	aese	v0.16b, v4.16b
104	aesmc	v0.16b, v0.16b
105	aese	v1.16b, v4.16b
106	aesmc	v1.16b, v1.16b
1072:	ld1	{v3.2d}, [x2], #16		/* load next round key */
108	aese	v0.16b, v5.16b
109	aesmc	v0.16b, v0.16b
110	aese	v1.16b, v5.16b
111	aesmc	v1.16b, v1.16b
1123:	ld1	{v4.2d}, [x2], #16		/* load next round key */
113	subs	w3, w3, #3
114	aese	v0.16b, v3.16b
115	aesmc	v0.16b, v0.16b
116	aese	v1.16b, v3.16b
117	aesmc	v1.16b, v1.16b
118	bpl	1b
119	aese	v0.16b, v4.16b
120	aese	v1.16b, v4.16b
121	/* final round key cancels out */
122	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
123	st1	{v0.2d}, [x0]			/* store result */
124	ret
125ENDPROC(ce_aes_ccm_final)
126
127	.macro	aes_ccm_do_crypt,enc
128	ldr	x8, [x6, #8]			/* load lower ctr */
129	ld1	{v0.2d}, [x5]			/* load mac */
130	rev	x8, x8				/* keep swabbed ctr in reg */
1310:	/* outer loop */
132	ld1	{v1.1d}, [x6]			/* load upper ctr */
133	prfm	pldl1strm, [x1]
134	add	x8, x8, #1
135	rev	x9, x8
136	cmp	w4, #12				/* which key size? */
137	sub	w7, w4, #2			/* get modified # of rounds */
138	ins	v1.d[1], x9			/* no carry in lower ctr */
139	ld1	{v3.2d}, [x3]			/* load first round key */
140	add	x10, x3, #16
141	bmi	1f
142	bne	4f
143	mov	v5.16b, v3.16b
144	b	3f
1451:	mov	v4.16b, v3.16b
146	ld1	{v5.2d}, [x10], #16		/* load 2nd round key */
1472:	/* inner loop: 3 rounds, 2x interleaved */
148	aese	v0.16b, v4.16b
149	aesmc	v0.16b, v0.16b
150	aese	v1.16b, v4.16b
151	aesmc	v1.16b, v1.16b
1523:	ld1	{v3.2d}, [x10], #16		/* load next round key */
153	aese	v0.16b, v5.16b
154	aesmc	v0.16b, v0.16b
155	aese	v1.16b, v5.16b
156	aesmc	v1.16b, v1.16b
1574:	ld1	{v4.2d}, [x10], #16		/* load next round key */
158	subs	w7, w7, #3
159	aese	v0.16b, v3.16b
160	aesmc	v0.16b, v0.16b
161	aese	v1.16b, v3.16b
162	aesmc	v1.16b, v1.16b
163	ld1	{v5.2d}, [x10], #16		/* load next round key */
164	bpl	2b
165	aese	v0.16b, v4.16b
166	aese	v1.16b, v4.16b
167	subs	w2, w2, #16
168	bmi	6f				/* partial block? */
169	ld1	{v2.16b}, [x1], #16		/* load next input block */
170	.if	\enc == 1
171	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
172	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
173	.else
174	eor	v2.16b, v2.16b, v1.16b		/* xor with crypted ctr */
175	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
176	.endif
177	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
178	st1	{v1.16b}, [x0], #16		/* write output block */
179	bne	0b
180	rev	x8, x8
181	st1	{v0.2d}, [x5]			/* store mac */
182	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
1835:	ret
184
1856:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
186	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
187	st1	{v0.2d}, [x5]			/* store mac */
188	add	w2, w2, #16			/* process partial tail block */
1897:	ldrb	w9, [x1], #1			/* get 1 byte of input */
190	umov	w6, v1.b[0]			/* get top crypted ctr byte */
191	umov	w7, v0.b[0]			/* get top mac byte */
192	.if	\enc == 1
193	eor	w7, w7, w9
194	eor	w9, w9, w6
195	.else
196	eor	w9, w9, w6
197	eor	w7, w7, w9
198	.endif
199	strb	w9, [x0], #1			/* store out byte */
200	strb	w7, [x5], #1			/* store mac byte */
201	subs	w2, w2, #1
202	beq	5b
203	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
204	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
205	b	7b
206	.endm
207
208	/*
209	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
210	 * 			   u8 const rk[], u32 rounds, u8 mac[],
211	 * 			   u8 ctr[]);
212	 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
213	 * 			   u8 const rk[], u32 rounds, u8 mac[],
214	 * 			   u8 ctr[]);
215	 */
216ENTRY(ce_aes_ccm_encrypt)
217	aes_ccm_do_crypt	1
218ENDPROC(ce_aes_ccm_encrypt)
219
220ENTRY(ce_aes_ccm_decrypt)
221	aes_ccm_do_crypt	0
222ENDPROC(ce_aes_ccm_decrypt)
223