xref: /openbmc/linux/arch/arm/crypto/aes-neonbs-core.S (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
2cc477bf6SArd Biesheuvel/*
3cc477bf6SArd Biesheuvel * Bit sliced AES using NEON instructions
4cc477bf6SArd Biesheuvel *
5cc477bf6SArd Biesheuvel * Copyright (C) 2017 Linaro Ltd.
6cc477bf6SArd Biesheuvel * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
7cc477bf6SArd Biesheuvel */
8cc477bf6SArd Biesheuvel
9cc477bf6SArd Biesheuvel/*
10cc477bf6SArd Biesheuvel * The algorithm implemented here is described in detail by the paper
11cc477bf6SArd Biesheuvel * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
12cc477bf6SArd Biesheuvel * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
13cc477bf6SArd Biesheuvel *
14cc477bf6SArd Biesheuvel * This implementation is based primarily on the OpenSSL implementation
15cc477bf6SArd Biesheuvel * for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
16cc477bf6SArd Biesheuvel */
17cc477bf6SArd Biesheuvel
18cc477bf6SArd Biesheuvel#include <linux/linkage.h>
19cc477bf6SArd Biesheuvel#include <asm/assembler.h>
20cc477bf6SArd Biesheuvel
21cc477bf6SArd Biesheuvel	.text
22cc477bf6SArd Biesheuvel	.fpu		neon
23cc477bf6SArd Biesheuvel
24cc477bf6SArd Biesheuvel	rounds		.req	ip
25cc477bf6SArd Biesheuvel	bskey		.req	r4
26cc477bf6SArd Biesheuvel
27cc477bf6SArd Biesheuvel	q0l		.req	d0
28cc477bf6SArd Biesheuvel	q0h		.req	d1
29cc477bf6SArd Biesheuvel	q1l		.req	d2
30cc477bf6SArd Biesheuvel	q1h		.req	d3
31cc477bf6SArd Biesheuvel	q2l		.req	d4
32cc477bf6SArd Biesheuvel	q2h		.req	d5
33cc477bf6SArd Biesheuvel	q3l		.req	d6
34cc477bf6SArd Biesheuvel	q3h		.req	d7
35cc477bf6SArd Biesheuvel	q4l		.req	d8
36cc477bf6SArd Biesheuvel	q4h		.req	d9
37cc477bf6SArd Biesheuvel	q5l		.req	d10
38cc477bf6SArd Biesheuvel	q5h		.req	d11
39cc477bf6SArd Biesheuvel	q6l		.req	d12
40cc477bf6SArd Biesheuvel	q6h		.req	d13
41cc477bf6SArd Biesheuvel	q7l		.req	d14
42cc477bf6SArd Biesheuvel	q7h		.req	d15
43cc477bf6SArd Biesheuvel	q8l		.req	d16
44cc477bf6SArd Biesheuvel	q8h		.req	d17
45cc477bf6SArd Biesheuvel	q9l		.req	d18
46cc477bf6SArd Biesheuvel	q9h		.req	d19
47cc477bf6SArd Biesheuvel	q10l		.req	d20
48cc477bf6SArd Biesheuvel	q10h		.req	d21
49cc477bf6SArd Biesheuvel	q11l		.req	d22
50cc477bf6SArd Biesheuvel	q11h		.req	d23
51cc477bf6SArd Biesheuvel	q12l		.req	d24
52cc477bf6SArd Biesheuvel	q12h		.req	d25
53cc477bf6SArd Biesheuvel	q13l		.req	d26
54cc477bf6SArd Biesheuvel	q13h		.req	d27
55cc477bf6SArd Biesheuvel	q14l		.req	d28
56cc477bf6SArd Biesheuvel	q14h		.req	d29
57cc477bf6SArd Biesheuvel	q15l		.req	d30
58cc477bf6SArd Biesheuvel	q15h		.req	d31
59cc477bf6SArd Biesheuvel
60cc477bf6SArd Biesheuvel	.macro		__tbl, out, tbl, in, tmp
61cc477bf6SArd Biesheuvel	.ifc		\out, \tbl
62cc477bf6SArd Biesheuvel	.ifb		\tmp
63cc477bf6SArd Biesheuvel	.error		__tbl needs temp register if out == tbl
64cc477bf6SArd Biesheuvel	.endif
65cc477bf6SArd Biesheuvel	vmov		\tmp, \out
66cc477bf6SArd Biesheuvel	.endif
67cc477bf6SArd Biesheuvel	vtbl.8		\out\()l, {\tbl}, \in\()l
68cc477bf6SArd Biesheuvel	.ifc		\out, \tbl
69cc477bf6SArd Biesheuvel	vtbl.8		\out\()h, {\tmp}, \in\()h
70cc477bf6SArd Biesheuvel	.else
71cc477bf6SArd Biesheuvel	vtbl.8		\out\()h, {\tbl}, \in\()h
72cc477bf6SArd Biesheuvel	.endif
73cc477bf6SArd Biesheuvel	.endm
74cc477bf6SArd Biesheuvel
75cc477bf6SArd Biesheuvel	.macro		__ldr, out, sym
76cc477bf6SArd Biesheuvel	vldr		\out\()l, \sym
77cc477bf6SArd Biesheuvel	vldr		\out\()h, \sym + 8
78cc477bf6SArd Biesheuvel	.endm
79cc477bf6SArd Biesheuvel
80cc477bf6SArd Biesheuvel	.macro		in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
81cc477bf6SArd Biesheuvel	veor		\b2, \b2, \b1
82cc477bf6SArd Biesheuvel	veor		\b5, \b5, \b6
83cc477bf6SArd Biesheuvel	veor		\b3, \b3, \b0
84cc477bf6SArd Biesheuvel	veor		\b6, \b6, \b2
85cc477bf6SArd Biesheuvel	veor		\b5, \b5, \b0
86cc477bf6SArd Biesheuvel	veor		\b6, \b6, \b3
87cc477bf6SArd Biesheuvel	veor		\b3, \b3, \b7
88cc477bf6SArd Biesheuvel	veor		\b7, \b7, \b5
89cc477bf6SArd Biesheuvel	veor		\b3, \b3, \b4
90cc477bf6SArd Biesheuvel	veor		\b4, \b4, \b5
91cc477bf6SArd Biesheuvel	veor		\b2, \b2, \b7
92cc477bf6SArd Biesheuvel	veor		\b3, \b3, \b1
93cc477bf6SArd Biesheuvel	veor		\b1, \b1, \b5
94cc477bf6SArd Biesheuvel	.endm
95cc477bf6SArd Biesheuvel
96cc477bf6SArd Biesheuvel	.macro		out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
97cc477bf6SArd Biesheuvel	veor		\b0, \b0, \b6
98cc477bf6SArd Biesheuvel	veor		\b1, \b1, \b4
99cc477bf6SArd Biesheuvel	veor		\b4, \b4, \b6
100cc477bf6SArd Biesheuvel	veor		\b2, \b2, \b0
101cc477bf6SArd Biesheuvel	veor		\b6, \b6, \b1
102cc477bf6SArd Biesheuvel	veor		\b1, \b1, \b5
103cc477bf6SArd Biesheuvel	veor		\b5, \b5, \b3
104cc477bf6SArd Biesheuvel	veor		\b3, \b3, \b7
105cc477bf6SArd Biesheuvel	veor		\b7, \b7, \b5
106cc477bf6SArd Biesheuvel	veor		\b2, \b2, \b5
107cc477bf6SArd Biesheuvel	veor		\b4, \b4, \b7
108cc477bf6SArd Biesheuvel	.endm
109cc477bf6SArd Biesheuvel
110cc477bf6SArd Biesheuvel	.macro		inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
111cc477bf6SArd Biesheuvel	veor		\b1, \b1, \b7
112cc477bf6SArd Biesheuvel	veor		\b4, \b4, \b7
113cc477bf6SArd Biesheuvel	veor		\b7, \b7, \b5
114cc477bf6SArd Biesheuvel	veor		\b1, \b1, \b3
115cc477bf6SArd Biesheuvel	veor		\b2, \b2, \b5
116cc477bf6SArd Biesheuvel	veor		\b3, \b3, \b7
117cc477bf6SArd Biesheuvel	veor		\b6, \b6, \b1
118cc477bf6SArd Biesheuvel	veor		\b2, \b2, \b0
119cc477bf6SArd Biesheuvel	veor		\b5, \b5, \b3
120cc477bf6SArd Biesheuvel	veor		\b4, \b4, \b6
121cc477bf6SArd Biesheuvel	veor		\b0, \b0, \b6
122cc477bf6SArd Biesheuvel	veor		\b1, \b1, \b4
123cc477bf6SArd Biesheuvel	.endm
124cc477bf6SArd Biesheuvel
125cc477bf6SArd Biesheuvel	.macro		inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
126cc477bf6SArd Biesheuvel	veor		\b1, \b1, \b5
127cc477bf6SArd Biesheuvel	veor		\b2, \b2, \b7
128cc477bf6SArd Biesheuvel	veor		\b3, \b3, \b1
129cc477bf6SArd Biesheuvel	veor		\b4, \b4, \b5
130cc477bf6SArd Biesheuvel	veor		\b7, \b7, \b5
131cc477bf6SArd Biesheuvel	veor		\b3, \b3, \b4
132cc477bf6SArd Biesheuvel	veor 		\b5, \b5, \b0
133cc477bf6SArd Biesheuvel	veor		\b3, \b3, \b7
134cc477bf6SArd Biesheuvel	veor		\b6, \b6, \b2
135cc477bf6SArd Biesheuvel	veor		\b2, \b2, \b1
136cc477bf6SArd Biesheuvel	veor		\b6, \b6, \b3
137cc477bf6SArd Biesheuvel	veor		\b3, \b3, \b0
138cc477bf6SArd Biesheuvel	veor		\b5, \b5, \b6
139cc477bf6SArd Biesheuvel	.endm
140cc477bf6SArd Biesheuvel
141cc477bf6SArd Biesheuvel	.macro		mul_gf4, x0, x1, y0, y1, t0, t1
142cc477bf6SArd Biesheuvel	veor 		\t0, \y0, \y1
143cc477bf6SArd Biesheuvel	vand		\t0, \t0, \x0
144cc477bf6SArd Biesheuvel	veor		\x0, \x0, \x1
145cc477bf6SArd Biesheuvel	vand		\t1, \x1, \y0
146cc477bf6SArd Biesheuvel	vand		\x0, \x0, \y1
147cc477bf6SArd Biesheuvel	veor		\x1, \t1, \t0
148cc477bf6SArd Biesheuvel	veor		\x0, \x0, \t1
149cc477bf6SArd Biesheuvel	.endm
150cc477bf6SArd Biesheuvel
151cc477bf6SArd Biesheuvel	.macro		mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
152cc477bf6SArd Biesheuvel	veor		\t0, \y0, \y1
153cc477bf6SArd Biesheuvel	veor 		\t1, \y2, \y3
154cc477bf6SArd Biesheuvel	vand		\t0, \t0, \x0
155cc477bf6SArd Biesheuvel	vand		\t1, \t1, \x2
156cc477bf6SArd Biesheuvel	veor		\x0, \x0, \x1
157cc477bf6SArd Biesheuvel	veor		\x2, \x2, \x3
158cc477bf6SArd Biesheuvel	vand		\x1, \x1, \y0
159cc477bf6SArd Biesheuvel	vand		\x3, \x3, \y2
160cc477bf6SArd Biesheuvel	vand		\x0, \x0, \y1
161cc477bf6SArd Biesheuvel	vand		\x2, \x2, \y3
162cc477bf6SArd Biesheuvel	veor		\x1, \x1, \x0
163cc477bf6SArd Biesheuvel	veor		\x2, \x2, \x3
164cc477bf6SArd Biesheuvel	veor		\x0, \x0, \t0
165cc477bf6SArd Biesheuvel	veor		\x3, \x3, \t1
166cc477bf6SArd Biesheuvel	.endm
167cc477bf6SArd Biesheuvel
168cc477bf6SArd Biesheuvel	.macro		mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
169cc477bf6SArd Biesheuvel				    y0, y1, y2, y3, t0, t1, t2, t3
170cc477bf6SArd Biesheuvel	veor		\t0, \x0, \x2
171cc477bf6SArd Biesheuvel	veor		\t1, \x1, \x3
172cc477bf6SArd Biesheuvel	mul_gf4  	\x0, \x1, \y0, \y1, \t2, \t3
173cc477bf6SArd Biesheuvel	veor		\y0, \y0, \y2
174cc477bf6SArd Biesheuvel	veor		\y1, \y1, \y3
175cc477bf6SArd Biesheuvel	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
176cc477bf6SArd Biesheuvel	veor		\x0, \x0, \t0
177cc477bf6SArd Biesheuvel	veor		\x2, \x2, \t0
178cc477bf6SArd Biesheuvel	veor		\x1, \x1, \t1
179cc477bf6SArd Biesheuvel	veor		\x3, \x3, \t1
180cc477bf6SArd Biesheuvel	veor		\t0, \x4, \x6
181cc477bf6SArd Biesheuvel	veor		\t1, \x5, \x7
182cc477bf6SArd Biesheuvel	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
183cc477bf6SArd Biesheuvel	veor		\y0, \y0, \y2
184cc477bf6SArd Biesheuvel	veor		\y1, \y1, \y3
185cc477bf6SArd Biesheuvel	mul_gf4  	\x4, \x5, \y0, \y1, \t2, \t3
186cc477bf6SArd Biesheuvel	veor		\x4, \x4, \t0
187cc477bf6SArd Biesheuvel	veor		\x6, \x6, \t0
188cc477bf6SArd Biesheuvel	veor		\x5, \x5, \t1
189cc477bf6SArd Biesheuvel	veor		\x7, \x7, \t1
190cc477bf6SArd Biesheuvel	.endm
191cc477bf6SArd Biesheuvel
192cc477bf6SArd Biesheuvel	.macro		inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
193cc477bf6SArd Biesheuvel				   t0, t1, t2, t3, s0, s1, s2, s3
194cc477bf6SArd Biesheuvel	veor		\t3, \x4, \x6
195cc477bf6SArd Biesheuvel	veor		\t0, \x5, \x7
196cc477bf6SArd Biesheuvel	veor		\t1, \x1, \x3
197cc477bf6SArd Biesheuvel	veor		\s1, \x7, \x6
198cc477bf6SArd Biesheuvel	veor		\s0, \x0, \x2
199cc477bf6SArd Biesheuvel	veor		\s3, \t3, \t0
200cc477bf6SArd Biesheuvel	vorr		\t2, \t0, \t1
201cc477bf6SArd Biesheuvel	vand		\s2, \t3, \s0
202cc477bf6SArd Biesheuvel	vorr		\t3, \t3, \s0
203cc477bf6SArd Biesheuvel	veor		\s0, \s0, \t1
204cc477bf6SArd Biesheuvel	vand		\t0, \t0, \t1
205cc477bf6SArd Biesheuvel	veor		\t1, \x3, \x2
206cc477bf6SArd Biesheuvel	vand		\s3, \s3, \s0
207cc477bf6SArd Biesheuvel	vand		\s1, \s1, \t1
208cc477bf6SArd Biesheuvel	veor		\t1, \x4, \x5
209cc477bf6SArd Biesheuvel	veor		\s0, \x1, \x0
210cc477bf6SArd Biesheuvel	veor		\t3, \t3, \s1
211cc477bf6SArd Biesheuvel	veor		\t2, \t2, \s1
212cc477bf6SArd Biesheuvel	vand		\s1, \t1, \s0
213cc477bf6SArd Biesheuvel	vorr		\t1, \t1, \s0
214cc477bf6SArd Biesheuvel	veor		\t3, \t3, \s3
215cc477bf6SArd Biesheuvel	veor		\t0, \t0, \s1
216cc477bf6SArd Biesheuvel	veor		\t2, \t2, \s2
217cc477bf6SArd Biesheuvel	veor		\t1, \t1, \s3
218cc477bf6SArd Biesheuvel	veor		\t0, \t0, \s2
219cc477bf6SArd Biesheuvel	vand		\s0, \x7, \x3
220cc477bf6SArd Biesheuvel	veor		\t1, \t1, \s2
221cc477bf6SArd Biesheuvel	vand		\s1, \x6, \x2
222cc477bf6SArd Biesheuvel	vand		\s2, \x5, \x1
223cc477bf6SArd Biesheuvel	vorr		\s3, \x4, \x0
224cc477bf6SArd Biesheuvel	veor		\t3, \t3, \s0
225cc477bf6SArd Biesheuvel	veor		\t1, \t1, \s2
226cc477bf6SArd Biesheuvel	veor		\s0, \t0, \s3
227cc477bf6SArd Biesheuvel	veor		\t2, \t2, \s1
228cc477bf6SArd Biesheuvel	vand		\s2, \t3, \t1
229cc477bf6SArd Biesheuvel	veor		\s1, \t2, \s2
230cc477bf6SArd Biesheuvel	veor		\s3, \s0, \s2
231cc477bf6SArd Biesheuvel	vbsl		\s1, \t1, \s0
232cc477bf6SArd Biesheuvel	vmvn		\t0, \s0
233cc477bf6SArd Biesheuvel	vbsl		\s0, \s1, \s3
234cc477bf6SArd Biesheuvel	vbsl		\t0, \s1, \s3
235cc477bf6SArd Biesheuvel	vbsl		\s3, \t3, \t2
236cc477bf6SArd Biesheuvel	veor		\t3, \t3, \t2
237cc477bf6SArd Biesheuvel	vand		\s2, \s0, \s3
238cc477bf6SArd Biesheuvel	veor		\t1, \t1, \t0
239cc477bf6SArd Biesheuvel	veor		\s2, \s2, \t3
240cc477bf6SArd Biesheuvel	mul_gf16_2	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
241cc477bf6SArd Biesheuvel			\s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
242cc477bf6SArd Biesheuvel	.endm
243cc477bf6SArd Biesheuvel
244cc477bf6SArd Biesheuvel	.macro		sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
245cc477bf6SArd Biesheuvel			      t0, t1, t2, t3, s0, s1, s2, s3
246cc477bf6SArd Biesheuvel	in_bs_ch	\b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7
247cc477bf6SArd Biesheuvel	inv_gf256	\b6, \b5, \b0, \b3, \b7, \b1, \b4, \b2, \
248cc477bf6SArd Biesheuvel			\t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3
249cc477bf6SArd Biesheuvel	out_bs_ch	\b7, \b1, \b4, \b2, \b6, \b5, \b0, \b3
250cc477bf6SArd Biesheuvel	.endm
251cc477bf6SArd Biesheuvel
252cc477bf6SArd Biesheuvel	.macro		inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
253cc477bf6SArd Biesheuvel				  t0, t1, t2, t3, s0, s1, s2, s3
254cc477bf6SArd Biesheuvel	inv_in_bs_ch	\b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7
255cc477bf6SArd Biesheuvel	inv_gf256	\b5, \b1, \b2, \b6, \b3, \b7, \b0, \b4, \
256cc477bf6SArd Biesheuvel			\t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3
257cc477bf6SArd Biesheuvel	inv_out_bs_ch	\b3, \b7, \b0, \b4, \b5, \b1, \b2, \b6
258cc477bf6SArd Biesheuvel	.endm
259cc477bf6SArd Biesheuvel
260cc477bf6SArd Biesheuvel	.macro		shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \
261cc477bf6SArd Biesheuvel				    t0, t1, t2, t3, mask
262cc477bf6SArd Biesheuvel	vld1.8		{\t0-\t1}, [bskey, :256]!
263cc477bf6SArd Biesheuvel	veor		\t0, \t0, \x0
264cc477bf6SArd Biesheuvel	vld1.8		{\t2-\t3}, [bskey, :256]!
265cc477bf6SArd Biesheuvel	veor		\t1, \t1, \x1
266cc477bf6SArd Biesheuvel	__tbl		\x0, \t0, \mask
267cc477bf6SArd Biesheuvel	veor		\t2, \t2, \x2
268cc477bf6SArd Biesheuvel	__tbl		\x1, \t1, \mask
269cc477bf6SArd Biesheuvel	vld1.8		{\t0-\t1}, [bskey, :256]!
270cc477bf6SArd Biesheuvel	veor		\t3, \t3, \x3
271cc477bf6SArd Biesheuvel	__tbl		\x2, \t2, \mask
272cc477bf6SArd Biesheuvel	__tbl		\x3, \t3, \mask
273cc477bf6SArd Biesheuvel	vld1.8		{\t2-\t3}, [bskey, :256]!
274cc477bf6SArd Biesheuvel	veor		\t0, \t0, \x4
275cc477bf6SArd Biesheuvel	veor		\t1, \t1, \x5
276cc477bf6SArd Biesheuvel	__tbl		\x4, \t0, \mask
277cc477bf6SArd Biesheuvel	veor		\t2, \t2, \x6
278cc477bf6SArd Biesheuvel	__tbl		\x5, \t1, \mask
279cc477bf6SArd Biesheuvel	veor		\t3, \t3, \x7
280cc477bf6SArd Biesheuvel	__tbl		\x6, \t2, \mask
281cc477bf6SArd Biesheuvel	__tbl		\x7, \t3, \mask
282cc477bf6SArd Biesheuvel	.endm
283cc477bf6SArd Biesheuvel
284cc477bf6SArd Biesheuvel	.macro		inv_shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \
285cc477bf6SArd Biesheuvel					t0, t1, t2, t3, mask
286cc477bf6SArd Biesheuvel	__tbl		\x0, \x0, \mask, \t0
287cc477bf6SArd Biesheuvel	__tbl		\x1, \x1, \mask, \t1
288cc477bf6SArd Biesheuvel	__tbl		\x2, \x2, \mask, \t2
289cc477bf6SArd Biesheuvel	__tbl		\x3, \x3, \mask, \t3
290cc477bf6SArd Biesheuvel	__tbl		\x4, \x4, \mask, \t0
291cc477bf6SArd Biesheuvel	__tbl		\x5, \x5, \mask, \t1
292cc477bf6SArd Biesheuvel	__tbl		\x6, \x6, \mask, \t2
293cc477bf6SArd Biesheuvel	__tbl		\x7, \x7, \mask, \t3
294cc477bf6SArd Biesheuvel	.endm
295cc477bf6SArd Biesheuvel
296cc477bf6SArd Biesheuvel	.macro		mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
297cc477bf6SArd Biesheuvel				  t0, t1, t2, t3, t4, t5, t6, t7, inv
298cc477bf6SArd Biesheuvel	vext.8		\t0, \x0, \x0, #12
299cc477bf6SArd Biesheuvel	vext.8		\t1, \x1, \x1, #12
300cc477bf6SArd Biesheuvel	veor		\x0, \x0, \t0
301cc477bf6SArd Biesheuvel	vext.8		\t2, \x2, \x2, #12
302cc477bf6SArd Biesheuvel	veor		\x1, \x1, \t1
303cc477bf6SArd Biesheuvel	vext.8		\t3, \x3, \x3, #12
304cc477bf6SArd Biesheuvel	veor		\x2, \x2, \t2
305cc477bf6SArd Biesheuvel	vext.8		\t4, \x4, \x4, #12
306cc477bf6SArd Biesheuvel	veor		\x3, \x3, \t3
307cc477bf6SArd Biesheuvel	vext.8		\t5, \x5, \x5, #12
308cc477bf6SArd Biesheuvel	veor		\x4, \x4, \t4
309cc477bf6SArd Biesheuvel	vext.8		\t6, \x6, \x6, #12
310cc477bf6SArd Biesheuvel	veor		\x5, \x5, \t5
311cc477bf6SArd Biesheuvel	vext.8		\t7, \x7, \x7, #12
312cc477bf6SArd Biesheuvel	veor		\x6, \x6, \t6
313cc477bf6SArd Biesheuvel	veor		\t1, \t1, \x0
314cc477bf6SArd Biesheuvel	veor.8		\x7, \x7, \t7
315cc477bf6SArd Biesheuvel	vext.8		\x0, \x0, \x0, #8
316cc477bf6SArd Biesheuvel	veor		\t2, \t2, \x1
317cc477bf6SArd Biesheuvel	veor		\t0, \t0, \x7
318cc477bf6SArd Biesheuvel	veor		\t1, \t1, \x7
319cc477bf6SArd Biesheuvel	vext.8		\x1, \x1, \x1, #8
320cc477bf6SArd Biesheuvel	veor		\t5, \t5, \x4
321cc477bf6SArd Biesheuvel	veor		\x0, \x0, \t0
322cc477bf6SArd Biesheuvel	veor		\t6, \t6, \x5
323cc477bf6SArd Biesheuvel	veor		\x1, \x1, \t1
324cc477bf6SArd Biesheuvel	vext.8		\t0, \x4, \x4, #8
325cc477bf6SArd Biesheuvel	veor		\t4, \t4, \x3
326cc477bf6SArd Biesheuvel	vext.8		\t1, \x5, \x5, #8
327cc477bf6SArd Biesheuvel	veor		\t7, \t7, \x6
328cc477bf6SArd Biesheuvel	vext.8		\x4, \x3, \x3, #8
329cc477bf6SArd Biesheuvel	veor		\t3, \t3, \x2
330cc477bf6SArd Biesheuvel	vext.8		\x5, \x7, \x7, #8
331cc477bf6SArd Biesheuvel	veor		\t4, \t4, \x7
332cc477bf6SArd Biesheuvel	vext.8		\x3, \x6, \x6, #8
333cc477bf6SArd Biesheuvel	veor		\t3, \t3, \x7
334cc477bf6SArd Biesheuvel	vext.8		\x6, \x2, \x2, #8
335cc477bf6SArd Biesheuvel	veor		\x7, \t1, \t5
336cc477bf6SArd Biesheuvel	.ifb		\inv
337cc477bf6SArd Biesheuvel	veor		\x2, \t0, \t4
338cc477bf6SArd Biesheuvel	veor		\x4, \x4, \t3
339cc477bf6SArd Biesheuvel	veor		\x5, \x5, \t7
340cc477bf6SArd Biesheuvel	veor		\x3, \x3, \t6
341cc477bf6SArd Biesheuvel	veor		\x6, \x6, \t2
342cc477bf6SArd Biesheuvel	.else
343cc477bf6SArd Biesheuvel	veor		\t3, \t3, \x4
344cc477bf6SArd Biesheuvel	veor		\x5, \x5, \t7
345cc477bf6SArd Biesheuvel	veor		\x2, \x3, \t6
346cc477bf6SArd Biesheuvel	veor		\x3, \t0, \t4
347cc477bf6SArd Biesheuvel	veor		\x4, \x6, \t2
348cc477bf6SArd Biesheuvel	vmov		\x6, \t3
349cc477bf6SArd Biesheuvel	.endif
350cc477bf6SArd Biesheuvel	.endm
351cc477bf6SArd Biesheuvel
352cc477bf6SArd Biesheuvel	.macro		inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
353cc477bf6SArd Biesheuvel				      t0, t1, t2, t3, t4, t5, t6, t7
354cc477bf6SArd Biesheuvel	vld1.8		{\t0-\t1}, [bskey, :256]!
355cc477bf6SArd Biesheuvel	veor		\x0, \x0, \t0
356cc477bf6SArd Biesheuvel	vld1.8		{\t2-\t3}, [bskey, :256]!
357cc477bf6SArd Biesheuvel	veor		\x1, \x1, \t1
358cc477bf6SArd Biesheuvel	vld1.8		{\t4-\t5}, [bskey, :256]!
359cc477bf6SArd Biesheuvel	veor		\x2, \x2, \t2
360cc477bf6SArd Biesheuvel	vld1.8		{\t6-\t7}, [bskey, :256]
361cc477bf6SArd Biesheuvel	sub		bskey, bskey, #224
362cc477bf6SArd Biesheuvel	veor		\x3, \x3, \t3
363cc477bf6SArd Biesheuvel	veor		\x4, \x4, \t4
364cc477bf6SArd Biesheuvel	veor		\x5, \x5, \t5
365cc477bf6SArd Biesheuvel	veor		\x6, \x6, \t6
366cc477bf6SArd Biesheuvel	veor		\x7, \x7, \t7
367cc477bf6SArd Biesheuvel	vext.8		\t0, \x0, \x0, #8
368cc477bf6SArd Biesheuvel	vext.8		\t6, \x6, \x6, #8
369cc477bf6SArd Biesheuvel	vext.8		\t7, \x7, \x7, #8
370cc477bf6SArd Biesheuvel	veor		\t0, \t0, \x0
371cc477bf6SArd Biesheuvel	vext.8		\t1, \x1, \x1, #8
372cc477bf6SArd Biesheuvel	veor		\t6, \t6, \x6
373cc477bf6SArd Biesheuvel	vext.8		\t2, \x2, \x2, #8
374cc477bf6SArd Biesheuvel	veor		\t7, \t7, \x7
375cc477bf6SArd Biesheuvel	vext.8		\t3, \x3, \x3, #8
376cc477bf6SArd Biesheuvel	veor		\t1, \t1, \x1
377cc477bf6SArd Biesheuvel	vext.8		\t4, \x4, \x4, #8
378cc477bf6SArd Biesheuvel	veor		\t2, \t2, \x2
379cc477bf6SArd Biesheuvel	vext.8		\t5, \x5, \x5, #8
380cc477bf6SArd Biesheuvel	veor		\t3, \t3, \x3
381cc477bf6SArd Biesheuvel	veor		\t4, \t4, \x4
382cc477bf6SArd Biesheuvel	veor		\t5, \t5, \x5
383cc477bf6SArd Biesheuvel	veor		\x0, \x0, \t6
384cc477bf6SArd Biesheuvel	veor		\x1, \x1, \t6
385cc477bf6SArd Biesheuvel	veor		\x2, \x2, \t0
386cc477bf6SArd Biesheuvel	veor		\x4, \x4, \t2
387cc477bf6SArd Biesheuvel	veor		\x3, \x3, \t1
388cc477bf6SArd Biesheuvel	veor		\x1, \x1, \t7
389cc477bf6SArd Biesheuvel	veor		\x2, \x2, \t7
390cc477bf6SArd Biesheuvel	veor		\x4, \x4, \t6
391cc477bf6SArd Biesheuvel	veor		\x5, \x5, \t3
392cc477bf6SArd Biesheuvel	veor		\x3, \x3, \t6
393cc477bf6SArd Biesheuvel	veor		\x6, \x6, \t4
394cc477bf6SArd Biesheuvel	veor		\x4, \x4, \t7
395cc477bf6SArd Biesheuvel	veor		\x5, \x5, \t7
396cc477bf6SArd Biesheuvel	veor		\x7, \x7, \t5
397cc477bf6SArd Biesheuvel	mix_cols	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
398cc477bf6SArd Biesheuvel			\t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
399cc477bf6SArd Biesheuvel	.endm
400cc477bf6SArd Biesheuvel
401cc477bf6SArd Biesheuvel	.macro		swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
402cc477bf6SArd Biesheuvel	vshr.u64	\t0, \b0, #\n
403cc477bf6SArd Biesheuvel	vshr.u64	\t1, \b1, #\n
404cc477bf6SArd Biesheuvel	veor		\t0, \t0, \a0
405cc477bf6SArd Biesheuvel	veor		\t1, \t1, \a1
406cc477bf6SArd Biesheuvel	vand		\t0, \t0, \mask
407cc477bf6SArd Biesheuvel	vand		\t1, \t1, \mask
408cc477bf6SArd Biesheuvel	veor		\a0, \a0, \t0
409cc477bf6SArd Biesheuvel	vshl.s64	\t0, \t0, #\n
410cc477bf6SArd Biesheuvel	veor		\a1, \a1, \t1
411cc477bf6SArd Biesheuvel	vshl.s64	\t1, \t1, #\n
412cc477bf6SArd Biesheuvel	veor		\b0, \b0, \t0
413cc477bf6SArd Biesheuvel	veor		\b1, \b1, \t1
414cc477bf6SArd Biesheuvel	.endm
415cc477bf6SArd Biesheuvel
416cc477bf6SArd Biesheuvel	.macro		bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
417cc477bf6SArd Biesheuvel	vmov.i8		\t0, #0x55
418cc477bf6SArd Biesheuvel	vmov.i8		\t1, #0x33
419cc477bf6SArd Biesheuvel	swapmove_2x	\x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
420cc477bf6SArd Biesheuvel	swapmove_2x	\x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
421cc477bf6SArd Biesheuvel	vmov.i8		\t0, #0x0f
422cc477bf6SArd Biesheuvel	swapmove_2x	\x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
423cc477bf6SArd Biesheuvel	swapmove_2x	\x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
424cc477bf6SArd Biesheuvel	swapmove_2x	\x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
425cc477bf6SArd Biesheuvel	swapmove_2x	\x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
426cc477bf6SArd Biesheuvel	.endm
427cc477bf6SArd Biesheuvel
428cc477bf6SArd Biesheuvel	.align		4
429cc477bf6SArd BiesheuvelM0:	.quad		0x02060a0e03070b0f, 0x0004080c0105090d
430cc477bf6SArd Biesheuvel
431cc477bf6SArd Biesheuvel	/*
432cc477bf6SArd Biesheuvel	 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
433cc477bf6SArd Biesheuvel	 */
434cc477bf6SArd BiesheuvelENTRY(aesbs_convert_key)
435cc477bf6SArd Biesheuvel	vld1.32		{q7}, [r1]!		// load round 0 key
436cc477bf6SArd Biesheuvel	vld1.32		{q15}, [r1]!		// load round 1 key
437cc477bf6SArd Biesheuvel
438cc477bf6SArd Biesheuvel	vmov.i8		q8,  #0x01		// bit masks
439cc477bf6SArd Biesheuvel	vmov.i8		q9,  #0x02
440cc477bf6SArd Biesheuvel	vmov.i8		q10, #0x04
441cc477bf6SArd Biesheuvel	vmov.i8		q11, #0x08
442cc477bf6SArd Biesheuvel	vmov.i8		q12, #0x10
443cc477bf6SArd Biesheuvel	vmov.i8		q13, #0x20
444cc477bf6SArd Biesheuvel	__ldr		q14, M0
445cc477bf6SArd Biesheuvel
446cc477bf6SArd Biesheuvel	sub		r2, r2, #1
447cc477bf6SArd Biesheuvel	vst1.8		{q7}, [r0, :128]!	// save round 0 key
448cc477bf6SArd Biesheuvel
449cc477bf6SArd Biesheuvel.Lkey_loop:
450cc477bf6SArd Biesheuvel	__tbl		q7, q15, q14
451cc477bf6SArd Biesheuvel	vmov.i8		q6, #0x40
452cc477bf6SArd Biesheuvel	vmov.i8		q15, #0x80
453cc477bf6SArd Biesheuvel
454cc477bf6SArd Biesheuvel	vtst.8		q0, q7, q8
455cc477bf6SArd Biesheuvel	vtst.8		q1, q7, q9
456cc477bf6SArd Biesheuvel	vtst.8		q2, q7, q10
457cc477bf6SArd Biesheuvel	vtst.8		q3, q7, q11
458cc477bf6SArd Biesheuvel	vtst.8		q4, q7, q12
459cc477bf6SArd Biesheuvel	vtst.8		q5, q7, q13
460cc477bf6SArd Biesheuvel	vtst.8		q6, q7, q6
461cc477bf6SArd Biesheuvel	vtst.8		q7, q7, q15
462cc477bf6SArd Biesheuvel	vld1.32		{q15}, [r1]!		// load next round key
463cc477bf6SArd Biesheuvel	vmvn		q0, q0
464cc477bf6SArd Biesheuvel	vmvn		q1, q1
465cc477bf6SArd Biesheuvel	vmvn		q5, q5
466cc477bf6SArd Biesheuvel	vmvn		q6, q6
467cc477bf6SArd Biesheuvel
468cc477bf6SArd Biesheuvel	subs		r2, r2, #1
469cc477bf6SArd Biesheuvel	vst1.8		{q0-q1}, [r0, :256]!
470cc477bf6SArd Biesheuvel	vst1.8		{q2-q3}, [r0, :256]!
471cc477bf6SArd Biesheuvel	vst1.8		{q4-q5}, [r0, :256]!
472cc477bf6SArd Biesheuvel	vst1.8		{q6-q7}, [r0, :256]!
473cc477bf6SArd Biesheuvel	bne		.Lkey_loop
474cc477bf6SArd Biesheuvel
475cc477bf6SArd Biesheuvel	vmov.i8		q7, #0x63		// compose .L63
476cc477bf6SArd Biesheuvel	veor		q15, q15, q7
477cc477bf6SArd Biesheuvel	vst1.8		{q15}, [r0, :128]
478cc477bf6SArd Biesheuvel	bx		lr
479cc477bf6SArd BiesheuvelENDPROC(aesbs_convert_key)
480cc477bf6SArd Biesheuvel
481cc477bf6SArd Biesheuvel	.align		4
482cc477bf6SArd BiesheuvelM0SR:	.quad		0x0a0e02060f03070b, 0x0004080c05090d01
483cc477bf6SArd Biesheuvel
484cc477bf6SArd Biesheuvelaesbs_encrypt8:
485cc477bf6SArd Biesheuvel	vld1.8		{q9}, [bskey, :128]!	// round 0 key
486cc477bf6SArd Biesheuvel	__ldr		q8, M0SR
487cc477bf6SArd Biesheuvel
488cc477bf6SArd Biesheuvel	veor		q10, q0, q9		// xor with round0 key
489cc477bf6SArd Biesheuvel	veor		q11, q1, q9
490cc477bf6SArd Biesheuvel	__tbl		q0, q10, q8
491cc477bf6SArd Biesheuvel	veor		q12, q2, q9
492cc477bf6SArd Biesheuvel	__tbl		q1, q11, q8
493cc477bf6SArd Biesheuvel	veor		q13, q3, q9
494cc477bf6SArd Biesheuvel	__tbl		q2, q12, q8
495cc477bf6SArd Biesheuvel	veor		q14, q4, q9
496cc477bf6SArd Biesheuvel	__tbl		q3, q13, q8
497cc477bf6SArd Biesheuvel	veor		q15, q5, q9
498cc477bf6SArd Biesheuvel	__tbl		q4, q14, q8
499cc477bf6SArd Biesheuvel	veor		q10, q6, q9
500cc477bf6SArd Biesheuvel	__tbl		q5, q15, q8
501cc477bf6SArd Biesheuvel	veor		q11, q7, q9
502cc477bf6SArd Biesheuvel	__tbl		q6, q10, q8
503cc477bf6SArd Biesheuvel	__tbl		q7, q11, q8
504cc477bf6SArd Biesheuvel
505cc477bf6SArd Biesheuvel	bitslice	q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11
506cc477bf6SArd Biesheuvel
507cc477bf6SArd Biesheuvel	sub		rounds, rounds, #1
508cc477bf6SArd Biesheuvel	b		.Lenc_sbox
509cc477bf6SArd Biesheuvel
510cc477bf6SArd Biesheuvel	.align		5
511cc477bf6SArd BiesheuvelSR:	.quad		0x0504070600030201, 0x0f0e0d0c0a09080b
512cc477bf6SArd BiesheuvelSRM0:	.quad		0x0304090e00050a0f, 0x01060b0c0207080d
513cc477bf6SArd Biesheuvel
514cc477bf6SArd Biesheuvel.Lenc_last:
515cc477bf6SArd Biesheuvel	__ldr		q12, SRM0
516cc477bf6SArd Biesheuvel.Lenc_loop:
517cc477bf6SArd Biesheuvel	shift_rows	q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12
518cc477bf6SArd Biesheuvel.Lenc_sbox:
519cc477bf6SArd Biesheuvel	sbox		q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \
520cc477bf6SArd Biesheuvel								q13, q14, q15
521cc477bf6SArd Biesheuvel	subs		rounds, rounds, #1
522cc477bf6SArd Biesheuvel	bcc		.Lenc_done
523cc477bf6SArd Biesheuvel
524cc477bf6SArd Biesheuvel	mix_cols	q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11, q12, \
525cc477bf6SArd Biesheuvel								q13, q14, q15
526cc477bf6SArd Biesheuvel
527cc477bf6SArd Biesheuvel	beq		.Lenc_last
528cc477bf6SArd Biesheuvel	__ldr		q12, SR
529cc477bf6SArd Biesheuvel	b		.Lenc_loop
530cc477bf6SArd Biesheuvel
531cc477bf6SArd Biesheuvel.Lenc_done:
532cc477bf6SArd Biesheuvel	vld1.8		{q12}, [bskey, :128]	// last round key
533cc477bf6SArd Biesheuvel
534cc477bf6SArd Biesheuvel	bitslice	q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11
535cc477bf6SArd Biesheuvel
536cc477bf6SArd Biesheuvel	veor		q0, q0, q12
537cc477bf6SArd Biesheuvel	veor		q1, q1, q12
538cc477bf6SArd Biesheuvel	veor		q4, q4, q12
539cc477bf6SArd Biesheuvel	veor		q6, q6, q12
540cc477bf6SArd Biesheuvel	veor		q3, q3, q12
541cc477bf6SArd Biesheuvel	veor		q7, q7, q12
542cc477bf6SArd Biesheuvel	veor		q2, q2, q12
543cc477bf6SArd Biesheuvel	veor		q5, q5, q12
544cc477bf6SArd Biesheuvel	bx		lr
545cc477bf6SArd BiesheuvelENDPROC(aesbs_encrypt8)
546cc477bf6SArd Biesheuvel
547cc477bf6SArd Biesheuvel	.align		4
548cc477bf6SArd BiesheuvelM0ISR:	.quad		0x0a0e0206070b0f03, 0x0004080c0d010509
549cc477bf6SArd Biesheuvel
550cc477bf6SArd Biesheuvelaesbs_decrypt8:
551cc477bf6SArd Biesheuvel	add		bskey, bskey, rounds, lsl #7
552cc477bf6SArd Biesheuvel	sub		bskey, bskey, #112
553cc477bf6SArd Biesheuvel	vld1.8		{q9}, [bskey, :128]	// round 0 key
554cc477bf6SArd Biesheuvel	sub		bskey, bskey, #128
555cc477bf6SArd Biesheuvel	__ldr		q8, M0ISR
556cc477bf6SArd Biesheuvel
557cc477bf6SArd Biesheuvel	veor		q10, q0, q9		// xor with round0 key
558cc477bf6SArd Biesheuvel	veor		q11, q1, q9
559cc477bf6SArd Biesheuvel	__tbl		q0, q10, q8
560cc477bf6SArd Biesheuvel	veor		q12, q2, q9
561cc477bf6SArd Biesheuvel	__tbl		q1, q11, q8
562cc477bf6SArd Biesheuvel	veor		q13, q3, q9
563cc477bf6SArd Biesheuvel	__tbl		q2, q12, q8
564cc477bf6SArd Biesheuvel	veor		q14, q4, q9
565cc477bf6SArd Biesheuvel	__tbl		q3, q13, q8
566cc477bf6SArd Biesheuvel	veor		q15, q5, q9
567cc477bf6SArd Biesheuvel	__tbl		q4, q14, q8
568cc477bf6SArd Biesheuvel	veor		q10, q6, q9
569cc477bf6SArd Biesheuvel	__tbl		q5, q15, q8
570cc477bf6SArd Biesheuvel	veor		q11, q7, q9
571cc477bf6SArd Biesheuvel	__tbl		q6, q10, q8
572cc477bf6SArd Biesheuvel	__tbl		q7, q11, q8
573cc477bf6SArd Biesheuvel
574cc477bf6SArd Biesheuvel	bitslice	q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11
575cc477bf6SArd Biesheuvel
576cc477bf6SArd Biesheuvel	sub		rounds, rounds, #1
577cc477bf6SArd Biesheuvel	b		.Ldec_sbox
578cc477bf6SArd Biesheuvel
579cc477bf6SArd Biesheuvel	.align		5
580cc477bf6SArd BiesheuvelISR:	.quad		0x0504070602010003, 0x0f0e0d0c080b0a09
581cc477bf6SArd BiesheuvelISRM0:	.quad		0x01040b0e0205080f, 0x0306090c00070a0d
582cc477bf6SArd Biesheuvel
583cc477bf6SArd Biesheuvel.Ldec_last:
584cc477bf6SArd Biesheuvel	__ldr		q12, ISRM0
585cc477bf6SArd Biesheuvel.Ldec_loop:
586cc477bf6SArd Biesheuvel	inv_shift_rows	q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12
587cc477bf6SArd Biesheuvel.Ldec_sbox:
588cc477bf6SArd Biesheuvel	inv_sbox	q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \
589cc477bf6SArd Biesheuvel								q13, q14, q15
590cc477bf6SArd Biesheuvel	subs		rounds, rounds, #1
591cc477bf6SArd Biesheuvel	bcc		.Ldec_done
592cc477bf6SArd Biesheuvel
593cc477bf6SArd Biesheuvel	inv_mix_cols	q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11, q12, \
594cc477bf6SArd Biesheuvel								q13, q14, q15
595cc477bf6SArd Biesheuvel
596cc477bf6SArd Biesheuvel	beq		.Ldec_last
597cc477bf6SArd Biesheuvel	__ldr		q12, ISR
598cc477bf6SArd Biesheuvel	b		.Ldec_loop
599cc477bf6SArd Biesheuvel
600cc477bf6SArd Biesheuvel.Ldec_done:
601cc477bf6SArd Biesheuvel	add		bskey, bskey, #112
602cc477bf6SArd Biesheuvel	vld1.8		{q12}, [bskey, :128]	// last round key
603cc477bf6SArd Biesheuvel
604cc477bf6SArd Biesheuvel	bitslice	q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11
605cc477bf6SArd Biesheuvel
606cc477bf6SArd Biesheuvel	veor		q0, q0, q12
607cc477bf6SArd Biesheuvel	veor		q1, q1, q12
608cc477bf6SArd Biesheuvel	veor		q6, q6, q12
609cc477bf6SArd Biesheuvel	veor		q4, q4, q12
610cc477bf6SArd Biesheuvel	veor		q2, q2, q12
611cc477bf6SArd Biesheuvel	veor		q7, q7, q12
612cc477bf6SArd Biesheuvel	veor		q3, q3, q12
613cc477bf6SArd Biesheuvel	veor		q5, q5, q12
614cc477bf6SArd Biesheuvel	bx		lr
615cc477bf6SArd BiesheuvelENDPROC(aesbs_decrypt8)
616cc477bf6SArd Biesheuvel
617cc477bf6SArd Biesheuvel	/*
618cc477bf6SArd Biesheuvel	 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
619cc477bf6SArd Biesheuvel	 *		     int blocks)
620cc477bf6SArd Biesheuvel	 * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
621cc477bf6SArd Biesheuvel	 *		     int blocks)
622cc477bf6SArd Biesheuvel	 */
623cc477bf6SArd Biesheuvel	.macro		__ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
624cc477bf6SArd Biesheuvel	push		{r4-r6, lr}
625cc477bf6SArd Biesheuvel	ldr		r5, [sp, #16]		// number of blocks
626cc477bf6SArd Biesheuvel
62745a4777eSArd Biesheuvel99:	adr		ip, 0f
628cc477bf6SArd Biesheuvel	and		lr, r5, #7
629cc477bf6SArd Biesheuvel	cmp		r5, #8
630cc477bf6SArd Biesheuvel	sub		ip, ip, lr, lsl #2
63145a4777eSArd Biesheuvel	movlt		pc, ip			// computed goto if blocks < 8
632cc477bf6SArd Biesheuvel
633cc477bf6SArd Biesheuvel	vld1.8		{q0}, [r1]!
634cc477bf6SArd Biesheuvel	vld1.8		{q1}, [r1]!
635cc477bf6SArd Biesheuvel	vld1.8		{q2}, [r1]!
636cc477bf6SArd Biesheuvel	vld1.8		{q3}, [r1]!
637cc477bf6SArd Biesheuvel	vld1.8		{q4}, [r1]!
638cc477bf6SArd Biesheuvel	vld1.8		{q5}, [r1]!
639cc477bf6SArd Biesheuvel	vld1.8		{q6}, [r1]!
640cc477bf6SArd Biesheuvel	vld1.8		{q7}, [r1]!
641cc477bf6SArd Biesheuvel
642cc477bf6SArd Biesheuvel0:	mov		bskey, r2
643cc477bf6SArd Biesheuvel	mov		rounds, r3
644cc477bf6SArd Biesheuvel	bl		\do8
645cc477bf6SArd Biesheuvel
64645a4777eSArd Biesheuvel	adr		ip, 1f
647cc477bf6SArd Biesheuvel	and		lr, r5, #7
648cc477bf6SArd Biesheuvel	cmp		r5, #8
649cc477bf6SArd Biesheuvel	sub		ip, ip, lr, lsl #2
65045a4777eSArd Biesheuvel	movlt		pc, ip			// computed goto if blocks < 8
651cc477bf6SArd Biesheuvel
652cc477bf6SArd Biesheuvel	vst1.8		{\o0}, [r0]!
653cc477bf6SArd Biesheuvel	vst1.8		{\o1}, [r0]!
654cc477bf6SArd Biesheuvel	vst1.8		{\o2}, [r0]!
655cc477bf6SArd Biesheuvel	vst1.8		{\o3}, [r0]!
656cc477bf6SArd Biesheuvel	vst1.8		{\o4}, [r0]!
657cc477bf6SArd Biesheuvel	vst1.8		{\o5}, [r0]!
658cc477bf6SArd Biesheuvel	vst1.8		{\o6}, [r0]!
659cc477bf6SArd Biesheuvel	vst1.8		{\o7}, [r0]!
660cc477bf6SArd Biesheuvel
661cc477bf6SArd Biesheuvel1:	subs		r5, r5, #8
662cc477bf6SArd Biesheuvel	bgt		99b
663cc477bf6SArd Biesheuvel
664cc477bf6SArd Biesheuvel	pop		{r4-r6, pc}
665cc477bf6SArd Biesheuvel	.endm
666cc477bf6SArd Biesheuvel
667cc477bf6SArd Biesheuvel	.align		4
668cc477bf6SArd BiesheuvelENTRY(aesbs_ecb_encrypt)
669cc477bf6SArd Biesheuvel	__ecb_crypt	aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5
670cc477bf6SArd BiesheuvelENDPROC(aesbs_ecb_encrypt)
671cc477bf6SArd Biesheuvel
672cc477bf6SArd Biesheuvel	.align		4
673cc477bf6SArd BiesheuvelENTRY(aesbs_ecb_decrypt)
674cc477bf6SArd Biesheuvel	__ecb_crypt	aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5
675cc477bf6SArd BiesheuvelENDPROC(aesbs_ecb_decrypt)
676cc477bf6SArd Biesheuvel
677cc477bf6SArd Biesheuvel	/*
678cc477bf6SArd Biesheuvel	 * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
679cc477bf6SArd Biesheuvel	 *		     int rounds, int blocks, u8 iv[])
680cc477bf6SArd Biesheuvel	 */
681cc477bf6SArd Biesheuvel	.align		4
682cc477bf6SArd BiesheuvelENTRY(aesbs_cbc_decrypt)
683cc477bf6SArd Biesheuvel	mov		ip, sp
684cc477bf6SArd Biesheuvel	push		{r4-r6, lr}
685cc477bf6SArd Biesheuvel	ldm		ip, {r5-r6}		// load args 4-5
686cc477bf6SArd Biesheuvel
68745a4777eSArd Biesheuvel99:	adr		ip, 0f
688cc477bf6SArd Biesheuvel	and		lr, r5, #7
689cc477bf6SArd Biesheuvel	cmp		r5, #8
690cc477bf6SArd Biesheuvel	sub		ip, ip, lr, lsl #2
691cc477bf6SArd Biesheuvel	mov		lr, r1
69245a4777eSArd Biesheuvel	movlt		pc, ip			// computed goto if blocks < 8
693cc477bf6SArd Biesheuvel
694cc477bf6SArd Biesheuvel	vld1.8		{q0}, [lr]!
695cc477bf6SArd Biesheuvel	vld1.8		{q1}, [lr]!
696cc477bf6SArd Biesheuvel	vld1.8		{q2}, [lr]!
697cc477bf6SArd Biesheuvel	vld1.8		{q3}, [lr]!
698cc477bf6SArd Biesheuvel	vld1.8		{q4}, [lr]!
699cc477bf6SArd Biesheuvel	vld1.8		{q5}, [lr]!
700cc477bf6SArd Biesheuvel	vld1.8		{q6}, [lr]!
701cc477bf6SArd Biesheuvel	vld1.8		{q7}, [lr]
702cc477bf6SArd Biesheuvel
703cc477bf6SArd Biesheuvel0:	mov		bskey, r2
704cc477bf6SArd Biesheuvel	mov		rounds, r3
705cc477bf6SArd Biesheuvel	bl		aesbs_decrypt8
706cc477bf6SArd Biesheuvel
707cc477bf6SArd Biesheuvel	vld1.8		{q8}, [r6]
708cc477bf6SArd Biesheuvel	vmov		q9, q8
709cc477bf6SArd Biesheuvel	vmov		q10, q8
710cc477bf6SArd Biesheuvel	vmov		q11, q8
711cc477bf6SArd Biesheuvel	vmov		q12, q8
712cc477bf6SArd Biesheuvel	vmov		q13, q8
713cc477bf6SArd Biesheuvel	vmov		q14, q8
714cc477bf6SArd Biesheuvel	vmov		q15, q8
715cc477bf6SArd Biesheuvel
71645a4777eSArd Biesheuvel	adr		ip, 1f
717cc477bf6SArd Biesheuvel	and		lr, r5, #7
718cc477bf6SArd Biesheuvel	cmp		r5, #8
719cc477bf6SArd Biesheuvel	sub		ip, ip, lr, lsl #2
72045a4777eSArd Biesheuvel	movlt		pc, ip			// computed goto if blocks < 8
721cc477bf6SArd Biesheuvel
722cc477bf6SArd Biesheuvel	vld1.8		{q9}, [r1]!
723cc477bf6SArd Biesheuvel	vld1.8		{q10}, [r1]!
724cc477bf6SArd Biesheuvel	vld1.8		{q11}, [r1]!
725cc477bf6SArd Biesheuvel	vld1.8		{q12}, [r1]!
726cc477bf6SArd Biesheuvel	vld1.8		{q13}, [r1]!
727cc477bf6SArd Biesheuvel	vld1.8		{q14}, [r1]!
728cc477bf6SArd Biesheuvel	vld1.8		{q15}, [r1]!
729cc477bf6SArd Biesheuvel	W(nop)
730cc477bf6SArd Biesheuvel
73145a4777eSArd Biesheuvel1:	adr		ip, 2f
732cc477bf6SArd Biesheuvel	sub		ip, ip, lr, lsl #3
73345a4777eSArd Biesheuvel	movlt		pc, ip			// computed goto if blocks < 8
734cc477bf6SArd Biesheuvel
735cc477bf6SArd Biesheuvel	veor		q0, q0, q8
736cc477bf6SArd Biesheuvel	vst1.8		{q0}, [r0]!
737cc477bf6SArd Biesheuvel	veor		q1, q1, q9
738cc477bf6SArd Biesheuvel	vst1.8		{q1}, [r0]!
739cc477bf6SArd Biesheuvel	veor		q6, q6, q10
740cc477bf6SArd Biesheuvel	vst1.8		{q6}, [r0]!
741cc477bf6SArd Biesheuvel	veor		q4, q4, q11
742cc477bf6SArd Biesheuvel	vst1.8		{q4}, [r0]!
743cc477bf6SArd Biesheuvel	veor		q2, q2, q12
744cc477bf6SArd Biesheuvel	vst1.8		{q2}, [r0]!
745cc477bf6SArd Biesheuvel	veor		q7, q7, q13
746cc477bf6SArd Biesheuvel	vst1.8		{q7}, [r0]!
747cc477bf6SArd Biesheuvel	veor		q3, q3, q14
748cc477bf6SArd Biesheuvel	vst1.8		{q3}, [r0]!
749cc477bf6SArd Biesheuvel	veor		q5, q5, q15
750cc477bf6SArd Biesheuvel	vld1.8		{q8}, [r1]!		// load next round's iv
751cc477bf6SArd Biesheuvel2:	vst1.8		{q5}, [r0]!
752cc477bf6SArd Biesheuvel
753cc477bf6SArd Biesheuvel	subs		r5, r5, #8
754cc477bf6SArd Biesheuvel	vst1.8		{q8}, [r6]		// store next round's iv
755cc477bf6SArd Biesheuvel	bgt		99b
756cc477bf6SArd Biesheuvel
757cc477bf6SArd Biesheuvel	pop		{r4-r6, pc}
758cc477bf6SArd BiesheuvelENDPROC(aesbs_cbc_decrypt)
759cc477bf6SArd Biesheuvel
760cc477bf6SArd Biesheuvel	.macro		next_ctr, q
761*c8bf850eSArd Biesheuvel	vmov		\q\()h, r9, r10
762cc477bf6SArd Biesheuvel	adds		r10, r10, #1
763cc477bf6SArd Biesheuvel	adcs		r9, r9, #0
764*c8bf850eSArd Biesheuvel	vmov		\q\()l, r7, r8
765cc477bf6SArd Biesheuvel	adcs		r8, r8, #0
766cc477bf6SArd Biesheuvel	adc		r7, r7, #0
767cc477bf6SArd Biesheuvel	vrev32.8	\q, \q
768cc477bf6SArd Biesheuvel	.endm
769cc477bf6SArd Biesheuvel
770cc477bf6SArd Biesheuvel	/*
771cc477bf6SArd Biesheuvel	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
772*c8bf850eSArd Biesheuvel	 *		     int rounds, int bytes, u8 ctr[])
773cc477bf6SArd Biesheuvel	 */
774cc477bf6SArd BiesheuvelENTRY(aesbs_ctr_encrypt)
775cc477bf6SArd Biesheuvel	mov		ip, sp
776cc477bf6SArd Biesheuvel	push		{r4-r10, lr}
777cc477bf6SArd Biesheuvel
778*c8bf850eSArd Biesheuvel	ldm		ip, {r5, r6}		// load args 4-5
779cc477bf6SArd Biesheuvel	vld1.8		{q0}, [r6]		// load counter
780cc477bf6SArd Biesheuvel	vrev32.8	q1, q0
781cc477bf6SArd Biesheuvel	vmov		r9, r10, d3
782cc477bf6SArd Biesheuvel	vmov		r7, r8, d2
783cc477bf6SArd Biesheuvel
784cc477bf6SArd Biesheuvel	adds		r10, r10, #1
785cc477bf6SArd Biesheuvel	adcs		r9, r9, #0
786cc477bf6SArd Biesheuvel	adcs		r8, r8, #0
787cc477bf6SArd Biesheuvel	adc		r7, r7, #0
788cc477bf6SArd Biesheuvel
789cc477bf6SArd Biesheuvel99:	vmov		q1, q0
790cc477bf6SArd Biesheuvel	sub		lr, r5, #1
791*c8bf850eSArd Biesheuvel	vmov		q2, q0
792*c8bf850eSArd Biesheuvel	adr		ip, 0f
793*c8bf850eSArd Biesheuvel	vmov		q3, q0
794*c8bf850eSArd Biesheuvel	and		lr, lr, #112
795*c8bf850eSArd Biesheuvel	vmov		q4, q0
796*c8bf850eSArd Biesheuvel	cmp		r5, #112
797*c8bf850eSArd Biesheuvel	vmov		q5, q0
798*c8bf850eSArd Biesheuvel	sub		ip, ip, lr, lsl #1
799*c8bf850eSArd Biesheuvel	vmov		q6, q0
800*c8bf850eSArd Biesheuvel	add		ip, ip, lr, lsr #2
801*c8bf850eSArd Biesheuvel	vmov		q7, q0
802*c8bf850eSArd Biesheuvel	movle		pc, ip			// computed goto if bytes < 112
803cc477bf6SArd Biesheuvel
804cc477bf6SArd Biesheuvel	next_ctr	q1
805cc477bf6SArd Biesheuvel	next_ctr	q2
806cc477bf6SArd Biesheuvel	next_ctr	q3
807cc477bf6SArd Biesheuvel	next_ctr	q4
808cc477bf6SArd Biesheuvel	next_ctr	q5
809cc477bf6SArd Biesheuvel	next_ctr	q6
810cc477bf6SArd Biesheuvel	next_ctr	q7
811cc477bf6SArd Biesheuvel
812cc477bf6SArd Biesheuvel0:	mov		bskey, r2
813cc477bf6SArd Biesheuvel	mov		rounds, r3
814cc477bf6SArd Biesheuvel	bl		aesbs_encrypt8
815cc477bf6SArd Biesheuvel
81645a4777eSArd Biesheuvel	adr		ip, 1f
817*c8bf850eSArd Biesheuvel	sub		lr, r5, #1
818*c8bf850eSArd Biesheuvel	cmp		r5, #128
819*c8bf850eSArd Biesheuvel	bic		lr, lr, #15
820*c8bf850eSArd Biesheuvel	ands		r4, r5, #15		// preserves C flag
821*c8bf850eSArd Biesheuvel	teqcs		r5, r5			// set Z flag if not last iteration
822*c8bf850eSArd Biesheuvel	sub		ip, ip, lr, lsr #2
823*c8bf850eSArd Biesheuvel	rsb		r4, r4, #16
824*c8bf850eSArd Biesheuvel	movcc		pc, ip			// computed goto if bytes < 128
825cc477bf6SArd Biesheuvel
826cc477bf6SArd Biesheuvel	vld1.8		{q8}, [r1]!
827cc477bf6SArd Biesheuvel	vld1.8		{q9}, [r1]!
828cc477bf6SArd Biesheuvel	vld1.8		{q10}, [r1]!
829cc477bf6SArd Biesheuvel	vld1.8		{q11}, [r1]!
830cc477bf6SArd Biesheuvel	vld1.8		{q12}, [r1]!
831cc477bf6SArd Biesheuvel	vld1.8		{q13}, [r1]!
832cc477bf6SArd Biesheuvel	vld1.8		{q14}, [r1]!
833*c8bf850eSArd Biesheuvel1:	subne		r1, r1, r4
834cc477bf6SArd Biesheuvel	vld1.8		{q15}, [r1]!
835cc477bf6SArd Biesheuvel
836*c8bf850eSArd Biesheuvel	add		ip, ip, #2f - 1b
837cc477bf6SArd Biesheuvel
838cc477bf6SArd Biesheuvel	veor		q0, q0, q8
839cc477bf6SArd Biesheuvel	veor		q1, q1, q9
840cc477bf6SArd Biesheuvel	veor		q4, q4, q10
841cc477bf6SArd Biesheuvel	veor		q6, q6, q11
842cc477bf6SArd Biesheuvel	veor		q3, q3, q12
843cc477bf6SArd Biesheuvel	veor		q7, q7, q13
844cc477bf6SArd Biesheuvel	veor		q2, q2, q14
845*c8bf850eSArd Biesheuvel	bne		3f
846*c8bf850eSArd Biesheuvel	veor		q5, q5, q15
847*c8bf850eSArd Biesheuvel
848*c8bf850eSArd Biesheuvel	movcc		pc, ip			// computed goto if bytes < 128
849*c8bf850eSArd Biesheuvel
850*c8bf850eSArd Biesheuvel	vst1.8		{q0}, [r0]!
851*c8bf850eSArd Biesheuvel	vst1.8		{q1}, [r0]!
852*c8bf850eSArd Biesheuvel	vst1.8		{q4}, [r0]!
853*c8bf850eSArd Biesheuvel	vst1.8		{q6}, [r0]!
854*c8bf850eSArd Biesheuvel	vst1.8		{q3}, [r0]!
855*c8bf850eSArd Biesheuvel	vst1.8		{q7}, [r0]!
856cc477bf6SArd Biesheuvel	vst1.8		{q2}, [r0]!
857*c8bf850eSArd Biesheuvel2:	subne		r0, r0, r4
858cc477bf6SArd Biesheuvel	vst1.8		{q5}, [r0]!
859cc477bf6SArd Biesheuvel
860*c8bf850eSArd Biesheuvel	next_ctr	q0
861cc477bf6SArd Biesheuvel
862*c8bf850eSArd Biesheuvel	subs		r5, r5, #128
863cc477bf6SArd Biesheuvel	bgt		99b
864cc477bf6SArd Biesheuvel
8651a20b966SArd Biesheuvel	vst1.8		{q0}, [r6]
866cc477bf6SArd Biesheuvel	pop		{r4-r10, pc}
8671a20b966SArd Biesheuvel
868*c8bf850eSArd Biesheuvel3:	adr		lr, .Lpermute_table + 16
869*c8bf850eSArd Biesheuvel	cmp		r5, #16			// Z flag remains cleared
870*c8bf850eSArd Biesheuvel	sub		lr, lr, r4
871*c8bf850eSArd Biesheuvel	vld1.8		{q8-q9}, [lr]
872*c8bf850eSArd Biesheuvel	vtbl.8		d16, {q5}, d16
873*c8bf850eSArd Biesheuvel	vtbl.8		d17, {q5}, d17
874*c8bf850eSArd Biesheuvel	veor		q5, q8, q15
875*c8bf850eSArd Biesheuvel	bcc		4f			// have to reload prev if R5 < 16
876*c8bf850eSArd Biesheuvel	vtbx.8		d10, {q2}, d18
877*c8bf850eSArd Biesheuvel	vtbx.8		d11, {q2}, d19
878*c8bf850eSArd Biesheuvel	mov		pc, ip			// branch back to VST sequence
879*c8bf850eSArd Biesheuvel
880*c8bf850eSArd Biesheuvel4:	sub		r0, r0, r4
881*c8bf850eSArd Biesheuvel	vshr.s8		q9, q9, #7		// create mask for VBIF
882*c8bf850eSArd Biesheuvel	vld1.8		{q8}, [r0]		// reload
883*c8bf850eSArd Biesheuvel	vbif		q5, q8, q9
884*c8bf850eSArd Biesheuvel	vst1.8		{q5}, [r0]
885*c8bf850eSArd Biesheuvel	pop		{r4-r10, pc}
886cc477bf6SArd BiesheuvelENDPROC(aesbs_ctr_encrypt)
887cc477bf6SArd Biesheuvel
888*c8bf850eSArd Biesheuvel	.align		6
889*c8bf850eSArd Biesheuvel.Lpermute_table:
890*c8bf850eSArd Biesheuvel	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
891*c8bf850eSArd Biesheuvel	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
892*c8bf850eSArd Biesheuvel	.byte		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
893*c8bf850eSArd Biesheuvel	.byte		0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
894*c8bf850eSArd Biesheuvel	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
895*c8bf850eSArd Biesheuvel	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
896*c8bf850eSArd Biesheuvel
897cc477bf6SArd Biesheuvel	.macro		next_tweak, out, in, const, tmp
898cc477bf6SArd Biesheuvel	vshr.s64	\tmp, \in, #63
899cc477bf6SArd Biesheuvel	vand		\tmp, \tmp, \const
900cc477bf6SArd Biesheuvel	vadd.u64	\out, \in, \in
901cc477bf6SArd Biesheuvel	vext.8		\tmp, \tmp, \tmp, #8
902cc477bf6SArd Biesheuvel	veor		\out, \out, \tmp
903cc477bf6SArd Biesheuvel	.endm
904cc477bf6SArd Biesheuvel
905cc477bf6SArd Biesheuvel	/*
906cc477bf6SArd Biesheuvel	 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
9072ed8b790SArd Biesheuvel	 *		     int blocks, u8 iv[], int reorder_last_tweak)
908cc477bf6SArd Biesheuvel	 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
9092ed8b790SArd Biesheuvel	 *		     int blocks, u8 iv[], int reorder_last_tweak)
910cc477bf6SArd Biesheuvel	 */
911*c8bf850eSArd Biesheuvel	.align		6
912cc477bf6SArd Biesheuvel__xts_prepare8:
913cc477bf6SArd Biesheuvel	vld1.8		{q14}, [r7]		// load iv
91438e73b3dSArd Biesheuvel	vmov.i32	d30, #0x87		// compose tweak mask vector
91538e73b3dSArd Biesheuvel	vmovl.u32	q15, d30
91638e73b3dSArd Biesheuvel	vshr.u64	d30, d31, #7
917cc477bf6SArd Biesheuvel	vmov		q12, q14
918cc477bf6SArd Biesheuvel
91945a4777eSArd Biesheuvel	adr		ip, 0f
920cc477bf6SArd Biesheuvel	and		r4, r6, #7
921cc477bf6SArd Biesheuvel	cmp		r6, #8
922cc477bf6SArd Biesheuvel	sub		ip, ip, r4, lsl #5
923cc477bf6SArd Biesheuvel	mov		r4, sp
92445a4777eSArd Biesheuvel	movlt		pc, ip			// computed goto if blocks < 8
925cc477bf6SArd Biesheuvel
926cc477bf6SArd Biesheuvel	vld1.8		{q0}, [r1]!
927cc477bf6SArd Biesheuvel	next_tweak	q12, q14, q15, q13
928cc477bf6SArd Biesheuvel	veor		q0, q0, q14
929cc477bf6SArd Biesheuvel	vst1.8		{q14}, [r4, :128]!
930cc477bf6SArd Biesheuvel
931cc477bf6SArd Biesheuvel	vld1.8		{q1}, [r1]!
932cc477bf6SArd Biesheuvel	next_tweak	q14, q12, q15, q13
933cc477bf6SArd Biesheuvel	veor		q1, q1, q12
934cc477bf6SArd Biesheuvel	vst1.8		{q12}, [r4, :128]!
935cc477bf6SArd Biesheuvel
936cc477bf6SArd Biesheuvel	vld1.8		{q2}, [r1]!
937cc477bf6SArd Biesheuvel	next_tweak	q12, q14, q15, q13
938cc477bf6SArd Biesheuvel	veor		q2, q2, q14
939cc477bf6SArd Biesheuvel	vst1.8		{q14}, [r4, :128]!
940cc477bf6SArd Biesheuvel
941cc477bf6SArd Biesheuvel	vld1.8		{q3}, [r1]!
942cc477bf6SArd Biesheuvel	next_tweak	q14, q12, q15, q13
943cc477bf6SArd Biesheuvel	veor		q3, q3, q12
944cc477bf6SArd Biesheuvel	vst1.8		{q12}, [r4, :128]!
945cc477bf6SArd Biesheuvel
946cc477bf6SArd Biesheuvel	vld1.8		{q4}, [r1]!
947cc477bf6SArd Biesheuvel	next_tweak	q12, q14, q15, q13
948cc477bf6SArd Biesheuvel	veor		q4, q4, q14
949cc477bf6SArd Biesheuvel	vst1.8		{q14}, [r4, :128]!
950cc477bf6SArd Biesheuvel
951cc477bf6SArd Biesheuvel	vld1.8		{q5}, [r1]!
952cc477bf6SArd Biesheuvel	next_tweak	q14, q12, q15, q13
953cc477bf6SArd Biesheuvel	veor		q5, q5, q12
954cc477bf6SArd Biesheuvel	vst1.8		{q12}, [r4, :128]!
955cc477bf6SArd Biesheuvel
956cc477bf6SArd Biesheuvel	vld1.8		{q6}, [r1]!
957cc477bf6SArd Biesheuvel	next_tweak	q12, q14, q15, q13
958cc477bf6SArd Biesheuvel	veor		q6, q6, q14
959cc477bf6SArd Biesheuvel	vst1.8		{q14}, [r4, :128]!
960cc477bf6SArd Biesheuvel
961cc477bf6SArd Biesheuvel	vld1.8		{q7}, [r1]!
962cc477bf6SArd Biesheuvel	next_tweak	q14, q12, q15, q13
9632ed8b790SArd BiesheuvelTHUMB(	itt		le		)
9642ed8b790SArd Biesheuvel	W(cmple)	r8, #0
9652ed8b790SArd Biesheuvel	ble		1f
9662ed8b790SArd Biesheuvel0:	veor		q7, q7, q12
967cc477bf6SArd Biesheuvel	vst1.8		{q12}, [r4, :128]
968cc477bf6SArd Biesheuvel
9692ed8b790SArd Biesheuvel	vst1.8		{q14}, [r7]		// store next iv
970cc477bf6SArd Biesheuvel	bx		lr
9712ed8b790SArd Biesheuvel
9722ed8b790SArd Biesheuvel1:	vswp		q12, q14
9732ed8b790SArd Biesheuvel	b		0b
974cc477bf6SArd BiesheuvelENDPROC(__xts_prepare8)
975cc477bf6SArd Biesheuvel
976cc477bf6SArd Biesheuvel	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
977cc477bf6SArd Biesheuvel	push		{r4-r8, lr}
978cc477bf6SArd Biesheuvel	mov		r5, sp			// preserve sp
979cc477bf6SArd Biesheuvel	ldrd		r6, r7, [sp, #24]	// get blocks and iv args
980be6d6993SArd Biesheuvel	rsb		r8, ip, #1
981cc477bf6SArd Biesheuvel	sub		ip, sp, #128		// make room for 8x tweak
982cc477bf6SArd Biesheuvel	bic		ip, ip, #0xf		// align sp to 16 bytes
983cc477bf6SArd Biesheuvel	mov		sp, ip
984cc477bf6SArd Biesheuvel
985cc477bf6SArd Biesheuvel99:	bl		__xts_prepare8
986cc477bf6SArd Biesheuvel
987cc477bf6SArd Biesheuvel	mov		bskey, r2
988cc477bf6SArd Biesheuvel	mov		rounds, r3
989cc477bf6SArd Biesheuvel	bl		\do8
990cc477bf6SArd Biesheuvel
99145a4777eSArd Biesheuvel	adr		ip, 0f
992cc477bf6SArd Biesheuvel	and		lr, r6, #7
993cc477bf6SArd Biesheuvel	cmp		r6, #8
994cc477bf6SArd Biesheuvel	sub		ip, ip, lr, lsl #2
995cc477bf6SArd Biesheuvel	mov		r4, sp
99645a4777eSArd Biesheuvel	movlt		pc, ip			// computed goto if blocks < 8
997cc477bf6SArd Biesheuvel
998cc477bf6SArd Biesheuvel	vld1.8		{q8}, [r4, :128]!
999cc477bf6SArd Biesheuvel	vld1.8		{q9}, [r4, :128]!
1000cc477bf6SArd Biesheuvel	vld1.8		{q10}, [r4, :128]!
1001cc477bf6SArd Biesheuvel	vld1.8		{q11}, [r4, :128]!
1002cc477bf6SArd Biesheuvel	vld1.8		{q12}, [r4, :128]!
1003cc477bf6SArd Biesheuvel	vld1.8		{q13}, [r4, :128]!
1004cc477bf6SArd Biesheuvel	vld1.8		{q14}, [r4, :128]!
1005cc477bf6SArd Biesheuvel	vld1.8		{q15}, [r4, :128]
1006cc477bf6SArd Biesheuvel
100745a4777eSArd Biesheuvel0:	adr		ip, 1f
1008cc477bf6SArd Biesheuvel	sub		ip, ip, lr, lsl #3
100945a4777eSArd Biesheuvel	movlt		pc, ip			// computed goto if blocks < 8
1010cc477bf6SArd Biesheuvel
1011cc477bf6SArd Biesheuvel	veor		\o0, \o0, q8
1012cc477bf6SArd Biesheuvel	vst1.8		{\o0}, [r0]!
1013cc477bf6SArd Biesheuvel	veor		\o1, \o1, q9
1014cc477bf6SArd Biesheuvel	vst1.8		{\o1}, [r0]!
1015cc477bf6SArd Biesheuvel	veor		\o2, \o2, q10
1016cc477bf6SArd Biesheuvel	vst1.8		{\o2}, [r0]!
1017cc477bf6SArd Biesheuvel	veor		\o3, \o3, q11
1018cc477bf6SArd Biesheuvel	vst1.8		{\o3}, [r0]!
1019cc477bf6SArd Biesheuvel	veor		\o4, \o4, q12
1020cc477bf6SArd Biesheuvel	vst1.8		{\o4}, [r0]!
1021cc477bf6SArd Biesheuvel	veor		\o5, \o5, q13
1022cc477bf6SArd Biesheuvel	vst1.8		{\o5}, [r0]!
1023cc477bf6SArd Biesheuvel	veor		\o6, \o6, q14
1024cc477bf6SArd Biesheuvel	vst1.8		{\o6}, [r0]!
1025cc477bf6SArd Biesheuvel	veor		\o7, \o7, q15
1026cc477bf6SArd Biesheuvel	vst1.8		{\o7}, [r0]!
1027cc477bf6SArd Biesheuvel
1028cc477bf6SArd Biesheuvel1:	subs		r6, r6, #8
1029cc477bf6SArd Biesheuvel	bgt		99b
1030cc477bf6SArd Biesheuvel
1031cc477bf6SArd Biesheuvel	mov		sp, r5
1032cc477bf6SArd Biesheuvel	pop		{r4-r8, pc}
1033cc477bf6SArd Biesheuvel	.endm
1034cc477bf6SArd Biesheuvel
1035cc477bf6SArd BiesheuvelENTRY(aesbs_xts_encrypt)
1036be6d6993SArd Biesheuvel	mov		ip, #0			// never reorder final tweak
1037cc477bf6SArd Biesheuvel	__xts_crypt	aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5
1038cc477bf6SArd BiesheuvelENDPROC(aesbs_xts_encrypt)
1039cc477bf6SArd Biesheuvel
1040cc477bf6SArd BiesheuvelENTRY(aesbs_xts_decrypt)
1041be6d6993SArd Biesheuvel	ldr		ip, [sp, #8]		// reorder final tweak?
1042cc477bf6SArd Biesheuvel	__xts_crypt	aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5
1043cc477bf6SArd BiesheuvelENDPROC(aesbs_xts_decrypt)
1044