1/*
2 * Accelerated GHASH implementation with ARMv8 PMULL instructions.
3 *
4 * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13
14	SHASH	.req	v0
15	SHASH2	.req	v1
16	T1	.req	v2
17	T2	.req	v3
18	MASK	.req	v4
19	XL	.req	v5
20	XM	.req	v6
21	XH	.req	v7
22	IN1	.req	v7
23
24	.text
25	.arch		armv8-a+crypto
26
27	/*
28	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
29	 *			   struct ghash_key const *k, const char *head)
30	 */
31ENTRY(pmull_ghash_update)
32	ld1		{SHASH.2d}, [x3]
33	ld1		{XL.2d}, [x1]
34	movi		MASK.16b, #0xe1
35	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
36	shl		MASK.2d, MASK.2d, #57
37	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
38
39	/* do the head block first, if supplied */
40	cbz		x4, 0f
41	ld1		{T1.2d}, [x4]
42	b		1f
43
440:	ld1		{T1.2d}, [x2], #16
45	sub		w0, w0, #1
46
471:	/* multiply XL by SHASH in GF(2^128) */
48CPU_LE(	rev64		T1.16b, T1.16b	)
49
50	ext		T2.16b, XL.16b, XL.16b, #8
51	ext		IN1.16b, T1.16b, T1.16b, #8
52	eor		T1.16b, T1.16b, T2.16b
53	eor		XL.16b, XL.16b, IN1.16b
54
55	pmull2		XH.1q, SHASH.2d, XL.2d		// a1 * b1
56	eor		T1.16b, T1.16b, XL.16b
57	pmull		XL.1q, SHASH.1d, XL.1d		// a0 * b0
58	pmull		XM.1q, SHASH2.1d, T1.1d		// (a1 + a0)(b1 + b0)
59
60	ext		T1.16b, XL.16b, XH.16b, #8
61	eor		T2.16b, XL.16b, XH.16b
62	eor		XM.16b, XM.16b, T1.16b
63	eor		XM.16b, XM.16b, T2.16b
64	pmull		T2.1q, XL.1d, MASK.1d
65
66	mov		XH.d[0], XM.d[1]
67	mov		XM.d[1], XL.d[0]
68
69	eor		XL.16b, XM.16b, T2.16b
70	ext		T2.16b, XL.16b, XL.16b, #8
71	pmull		XL.1q, XL.1d, MASK.1d
72	eor		T2.16b, T2.16b, XH.16b
73	eor		XL.16b, XL.16b, T2.16b
74
75	cbnz		w0, 0b
76
77	st1		{XL.2d}, [x1]
78	ret
79ENDPROC(pmull_ghash_update)
80
81	KS		.req	v8
82	CTR		.req	v9
83	INP		.req	v10
84
85	.macro		load_round_keys, rounds, rk
86	cmp		\rounds, #12
87	blo		2222f		/* 128 bits */
88	beq		1111f		/* 192 bits */
89	ld1		{v17.4s-v18.4s}, [\rk], #32
901111:	ld1		{v19.4s-v20.4s}, [\rk], #32
912222:	ld1		{v21.4s-v24.4s}, [\rk], #64
92	ld1		{v25.4s-v28.4s}, [\rk], #64
93	ld1		{v29.4s-v31.4s}, [\rk]
94	.endm
95
96	.macro		enc_round, state, key
97	aese		\state\().16b, \key\().16b
98	aesmc		\state\().16b, \state\().16b
99	.endm
100
101	.macro		enc_block, state, rounds
102	cmp		\rounds, #12
103	b.lo		2222f		/* 128 bits */
104	b.eq		1111f		/* 192 bits */
105	enc_round	\state, v17
106	enc_round	\state, v18
1071111:	enc_round	\state, v19
108	enc_round	\state, v20
1092222:	.irp		key, v21, v22, v23, v24, v25, v26, v27, v28, v29
110	enc_round	\state, \key
111	.endr
112	aese		\state\().16b, v30.16b
113	eor		\state\().16b, \state\().16b, v31.16b
114	.endm
115
116	.macro		pmull_gcm_do_crypt, enc
117	ld1		{SHASH.2d}, [x4]
118	ld1		{XL.2d}, [x1]
119	ldr		x8, [x5, #8]			// load lower counter
120
121	movi		MASK.16b, #0xe1
122	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
123CPU_LE(	rev		x8, x8		)
124	shl		MASK.2d, MASK.2d, #57
125	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
126
127	.if		\enc == 1
128	ld1		{KS.16b}, [x7]
129	.endif
130
1310:	ld1		{CTR.8b}, [x5]			// load upper counter
132	ld1		{INP.16b}, [x3], #16
133	rev		x9, x8
134	add		x8, x8, #1
135	sub		w0, w0, #1
136	ins		CTR.d[1], x9			// set lower counter
137
138	.if		\enc == 1
139	eor		INP.16b, INP.16b, KS.16b	// encrypt input
140	st1		{INP.16b}, [x2], #16
141	.endif
142
143	rev64		T1.16b, INP.16b
144
145	cmp		w6, #12
146	b.ge		2f				// AES-192/256?
147
1481:	enc_round	CTR, v21
149
150	ext		T2.16b, XL.16b, XL.16b, #8
151	ext		IN1.16b, T1.16b, T1.16b, #8
152
153	enc_round	CTR, v22
154
155	eor		T1.16b, T1.16b, T2.16b
156	eor		XL.16b, XL.16b, IN1.16b
157
158	enc_round	CTR, v23
159
160	pmull2		XH.1q, SHASH.2d, XL.2d		// a1 * b1
161	eor		T1.16b, T1.16b, XL.16b
162
163	enc_round	CTR, v24
164
165	pmull		XL.1q, SHASH.1d, XL.1d		// a0 * b0
166	pmull		XM.1q, SHASH2.1d, T1.1d		// (a1 + a0)(b1 + b0)
167
168	enc_round	CTR, v25
169
170	ext		T1.16b, XL.16b, XH.16b, #8
171	eor		T2.16b, XL.16b, XH.16b
172	eor		XM.16b, XM.16b, T1.16b
173
174	enc_round	CTR, v26
175
176	eor		XM.16b, XM.16b, T2.16b
177	pmull		T2.1q, XL.1d, MASK.1d
178
179	enc_round	CTR, v27
180
181	mov		XH.d[0], XM.d[1]
182	mov		XM.d[1], XL.d[0]
183
184	enc_round	CTR, v28
185
186	eor		XL.16b, XM.16b, T2.16b
187
188	enc_round	CTR, v29
189
190	ext		T2.16b, XL.16b, XL.16b, #8
191
192	aese		CTR.16b, v30.16b
193
194	pmull		XL.1q, XL.1d, MASK.1d
195	eor		T2.16b, T2.16b, XH.16b
196
197	eor		KS.16b, CTR.16b, v31.16b
198
199	eor		XL.16b, XL.16b, T2.16b
200
201	.if		\enc == 0
202	eor		INP.16b, INP.16b, KS.16b
203	st1		{INP.16b}, [x2], #16
204	.endif
205
206	cbnz		w0, 0b
207
208CPU_LE(	rev		x8, x8		)
209	st1		{XL.2d}, [x1]
210	str		x8, [x5, #8]			// store lower counter
211
212	.if		\enc == 1
213	st1		{KS.16b}, [x7]
214	.endif
215
216	ret
217
2182:	b.eq		3f				// AES-192?
219	enc_round	CTR, v17
220	enc_round	CTR, v18
2213:	enc_round	CTR, v19
222	enc_round	CTR, v20
223	b		1b
224	.endm
225
226	/*
227	 * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
228	 *			  struct ghash_key const *k, u8 ctr[],
229	 *			  int rounds, u8 ks[])
230	 */
231ENTRY(pmull_gcm_encrypt)
232	pmull_gcm_do_crypt	1
233ENDPROC(pmull_gcm_encrypt)
234
235	/*
236	 * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
237	 *			  struct ghash_key const *k, u8 ctr[],
238	 *			  int rounds)
239	 */
240ENTRY(pmull_gcm_decrypt)
241	pmull_gcm_do_crypt	0
242ENDPROC(pmull_gcm_decrypt)
243
244	/*
245	 * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds)
246	 */
247ENTRY(pmull_gcm_encrypt_block)
248	cbz		x2, 0f
249	load_round_keys	w3, x2
2500:	ld1		{v0.16b}, [x1]
251	enc_block	v0, w3
252	st1		{v0.16b}, [x0]
253	ret
254ENDPROC(pmull_gcm_encrypt_block)
255