xref: /openbmc/linux/arch/arm/crypto/aes-ce-core.S (revision f3456b9f)
1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
286464859SArd Biesheuvel/*
386464859SArd Biesheuvel * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
486464859SArd Biesheuvel *
586464859SArd Biesheuvel * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
686464859SArd Biesheuvel */
786464859SArd Biesheuvel
886464859SArd Biesheuvel#include <linux/linkage.h>
986464859SArd Biesheuvel#include <asm/assembler.h>
1086464859SArd Biesheuvel
1186464859SArd Biesheuvel	.text
123aa6d4abSArd Biesheuvel	.arch		armv8-a
1386464859SArd Biesheuvel	.fpu		crypto-neon-fp-armv8
1486464859SArd Biesheuvel	.align		3
1586464859SArd Biesheuvel
1686464859SArd Biesheuvel	.macro		enc_round, state, key
1786464859SArd Biesheuvel	aese.8		\state, \key
1886464859SArd Biesheuvel	aesmc.8		\state, \state
1986464859SArd Biesheuvel	.endm
2086464859SArd Biesheuvel
2186464859SArd Biesheuvel	.macro		dec_round, state, key
2286464859SArd Biesheuvel	aesd.8		\state, \key
2386464859SArd Biesheuvel	aesimc.8	\state, \state
2486464859SArd Biesheuvel	.endm
2586464859SArd Biesheuvel
2686464859SArd Biesheuvel	.macro		enc_dround, key1, key2
2786464859SArd Biesheuvel	enc_round	q0, \key1
2886464859SArd Biesheuvel	enc_round	q0, \key2
2986464859SArd Biesheuvel	.endm
3086464859SArd Biesheuvel
3186464859SArd Biesheuvel	.macro		dec_dround, key1, key2
3286464859SArd Biesheuvel	dec_round	q0, \key1
3386464859SArd Biesheuvel	dec_round	q0, \key2
3486464859SArd Biesheuvel	.endm
3586464859SArd Biesheuvel
3686464859SArd Biesheuvel	.macro		enc_fround, key1, key2, key3
3786464859SArd Biesheuvel	enc_round	q0, \key1
3886464859SArd Biesheuvel	aese.8		q0, \key2
3986464859SArd Biesheuvel	veor		q0, q0, \key3
4086464859SArd Biesheuvel	.endm
4186464859SArd Biesheuvel
4286464859SArd Biesheuvel	.macro		dec_fround, key1, key2, key3
4386464859SArd Biesheuvel	dec_round	q0, \key1
4486464859SArd Biesheuvel	aesd.8		q0, \key2
4586464859SArd Biesheuvel	veor		q0, q0, \key3
4686464859SArd Biesheuvel	.endm
4786464859SArd Biesheuvel
481dede02bSArd Biesheuvel	.macro		enc_dround_4x, key1, key2
4986464859SArd Biesheuvel	enc_round	q0, \key1
5086464859SArd Biesheuvel	enc_round	q1, \key1
5186464859SArd Biesheuvel	enc_round	q2, \key1
521dede02bSArd Biesheuvel	enc_round	q3, \key1
5386464859SArd Biesheuvel	enc_round	q0, \key2
5486464859SArd Biesheuvel	enc_round	q1, \key2
5586464859SArd Biesheuvel	enc_round	q2, \key2
561dede02bSArd Biesheuvel	enc_round	q3, \key2
5786464859SArd Biesheuvel	.endm
5886464859SArd Biesheuvel
591dede02bSArd Biesheuvel	.macro		dec_dround_4x, key1, key2
6086464859SArd Biesheuvel	dec_round	q0, \key1
6186464859SArd Biesheuvel	dec_round	q1, \key1
6286464859SArd Biesheuvel	dec_round	q2, \key1
631dede02bSArd Biesheuvel	dec_round	q3, \key1
6486464859SArd Biesheuvel	dec_round	q0, \key2
6586464859SArd Biesheuvel	dec_round	q1, \key2
6686464859SArd Biesheuvel	dec_round	q2, \key2
671dede02bSArd Biesheuvel	dec_round	q3, \key2
6886464859SArd Biesheuvel	.endm
6986464859SArd Biesheuvel
701dede02bSArd Biesheuvel	.macro		enc_fround_4x, key1, key2, key3
7186464859SArd Biesheuvel	enc_round	q0, \key1
7286464859SArd Biesheuvel	enc_round	q1, \key1
7386464859SArd Biesheuvel	enc_round	q2, \key1
741dede02bSArd Biesheuvel	enc_round	q3, \key1
7586464859SArd Biesheuvel	aese.8		q0, \key2
7686464859SArd Biesheuvel	aese.8		q1, \key2
7786464859SArd Biesheuvel	aese.8		q2, \key2
781dede02bSArd Biesheuvel	aese.8		q3, \key2
7986464859SArd Biesheuvel	veor		q0, q0, \key3
8086464859SArd Biesheuvel	veor		q1, q1, \key3
8186464859SArd Biesheuvel	veor		q2, q2, \key3
821dede02bSArd Biesheuvel	veor		q3, q3, \key3
8386464859SArd Biesheuvel	.endm
8486464859SArd Biesheuvel
851dede02bSArd Biesheuvel	.macro		dec_fround_4x, key1, key2, key3
8686464859SArd Biesheuvel	dec_round	q0, \key1
8786464859SArd Biesheuvel	dec_round	q1, \key1
8886464859SArd Biesheuvel	dec_round	q2, \key1
891dede02bSArd Biesheuvel	dec_round	q3, \key1
9086464859SArd Biesheuvel	aesd.8		q0, \key2
9186464859SArd Biesheuvel	aesd.8		q1, \key2
9286464859SArd Biesheuvel	aesd.8		q2, \key2
931dede02bSArd Biesheuvel	aesd.8		q3, \key2
9486464859SArd Biesheuvel	veor		q0, q0, \key3
9586464859SArd Biesheuvel	veor		q1, q1, \key3
9686464859SArd Biesheuvel	veor		q2, q2, \key3
971dede02bSArd Biesheuvel	veor		q3, q3, \key3
9886464859SArd Biesheuvel	.endm
9986464859SArd Biesheuvel
10086464859SArd Biesheuvel	.macro		do_block, dround, fround
10186464859SArd Biesheuvel	cmp		r3, #12			@ which key size?
102fafb1dcaSArd Biesheuvel	vld1.32		{q10-q11}, [ip]!
10386464859SArd Biesheuvel	\dround		q8, q9
104fafb1dcaSArd Biesheuvel	vld1.32		{q12-q13}, [ip]!
10586464859SArd Biesheuvel	\dround		q10, q11
106fafb1dcaSArd Biesheuvel	vld1.32		{q10-q11}, [ip]!
10786464859SArd Biesheuvel	\dround		q12, q13
108fafb1dcaSArd Biesheuvel	vld1.32		{q12-q13}, [ip]!
10986464859SArd Biesheuvel	\dround		q10, q11
11086464859SArd Biesheuvel	blo		0f			@ AES-128: 10 rounds
111fafb1dcaSArd Biesheuvel	vld1.32		{q10-q11}, [ip]!
11286464859SArd Biesheuvel	\dround		q12, q13
1136499e8cfSArd Biesheuvel	beq		1f			@ AES-192: 12 rounds
114fafb1dcaSArd Biesheuvel	vld1.32		{q12-q13}, [ip]
11586464859SArd Biesheuvel	\dround		q10, q11
11686464859SArd Biesheuvel0:	\fround		q12, q13, q14
11786464859SArd Biesheuvel	bx		lr
11886464859SArd Biesheuvel
1196499e8cfSArd Biesheuvel1:	\fround		q10, q11, q14
12086464859SArd Biesheuvel	bx		lr
12186464859SArd Biesheuvel	.endm
12286464859SArd Biesheuvel
12386464859SArd Biesheuvel	/*
12486464859SArd Biesheuvel	 * Internal, non-AAPCS compliant functions that implement the core AES
12586464859SArd Biesheuvel	 * transforms. These should preserve all registers except q0 - q2 and ip
12686464859SArd Biesheuvel	 * Arguments:
12786464859SArd Biesheuvel	 *   q0        : first in/output block
1281dede02bSArd Biesheuvel	 *   q1        : second in/output block (_4x version only)
1291dede02bSArd Biesheuvel	 *   q2        : third in/output block (_4x version only)
1301dede02bSArd Biesheuvel	 *   q3        : fourth in/output block (_4x version only)
13186464859SArd Biesheuvel	 *   q8        : first round key
13286464859SArd Biesheuvel	 *   q9        : secound round key
13386464859SArd Biesheuvel	 *   q14       : final round key
1346499e8cfSArd Biesheuvel	 *   r2        : address of round key array
13586464859SArd Biesheuvel	 *   r3        : number of rounds
13686464859SArd Biesheuvel	 */
13786464859SArd Biesheuvel	.align		6
13886464859SArd Biesheuvelaes_encrypt:
13986464859SArd Biesheuvel	add		ip, r2, #32		@ 3rd round key
14086464859SArd Biesheuvel.Laes_encrypt_tweak:
14186464859SArd Biesheuvel	do_block	enc_dround, enc_fround
14286464859SArd BiesheuvelENDPROC(aes_encrypt)
14386464859SArd Biesheuvel
14486464859SArd Biesheuvel	.align		6
14586464859SArd Biesheuvelaes_decrypt:
14686464859SArd Biesheuvel	add		ip, r2, #32		@ 3rd round key
14786464859SArd Biesheuvel	do_block	dec_dround, dec_fround
14886464859SArd BiesheuvelENDPROC(aes_decrypt)
14986464859SArd Biesheuvel
15086464859SArd Biesheuvel	.align		6
1511dede02bSArd Biesheuvelaes_encrypt_4x:
15286464859SArd Biesheuvel	add		ip, r2, #32		@ 3rd round key
1531dede02bSArd Biesheuvel	do_block	enc_dround_4x, enc_fround_4x
1541dede02bSArd BiesheuvelENDPROC(aes_encrypt_4x)
15586464859SArd Biesheuvel
15686464859SArd Biesheuvel	.align		6
1571dede02bSArd Biesheuvelaes_decrypt_4x:
15886464859SArd Biesheuvel	add		ip, r2, #32		@ 3rd round key
1591dede02bSArd Biesheuvel	do_block	dec_dround_4x, dec_fround_4x
1601dede02bSArd BiesheuvelENDPROC(aes_decrypt_4x)
16186464859SArd Biesheuvel
16286464859SArd Biesheuvel	.macro		prepare_key, rk, rounds
16386464859SArd Biesheuvel	add		ip, \rk, \rounds, lsl #4
164fafb1dcaSArd Biesheuvel	vld1.32		{q8-q9}, [\rk]		@ load first 2 round keys
165fafb1dcaSArd Biesheuvel	vld1.32		{q14}, [ip]		@ load last round key
16686464859SArd Biesheuvel	.endm
16786464859SArd Biesheuvel
16886464859SArd Biesheuvel	/*
169fcb0e30dSArd Biesheuvel	 * aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
17086464859SArd Biesheuvel	 *		   int blocks)
171fcb0e30dSArd Biesheuvel	 * aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
17286464859SArd Biesheuvel	 *		   int blocks)
17386464859SArd Biesheuvel	 */
17486464859SArd BiesheuvelENTRY(ce_aes_ecb_encrypt)
17586464859SArd Biesheuvel	push		{r4, lr}
17686464859SArd Biesheuvel	ldr		r4, [sp, #8]
17786464859SArd Biesheuvel	prepare_key	r2, r3
1781dede02bSArd Biesheuvel.Lecbencloop4x:
1791dede02bSArd Biesheuvel	subs		r4, r4, #4
18086464859SArd Biesheuvel	bmi		.Lecbenc1x
1811465fb13SArd Biesheuvel	vld1.8		{q0-q1}, [r1]!
1821dede02bSArd Biesheuvel	vld1.8		{q2-q3}, [r1]!
1831dede02bSArd Biesheuvel	bl		aes_encrypt_4x
1841465fb13SArd Biesheuvel	vst1.8		{q0-q1}, [r0]!
1851dede02bSArd Biesheuvel	vst1.8		{q2-q3}, [r0]!
1861dede02bSArd Biesheuvel	b		.Lecbencloop4x
18786464859SArd Biesheuvel.Lecbenc1x:
1881dede02bSArd Biesheuvel	adds		r4, r4, #4
18986464859SArd Biesheuvel	beq		.Lecbencout
19086464859SArd Biesheuvel.Lecbencloop:
1911465fb13SArd Biesheuvel	vld1.8		{q0}, [r1]!
19286464859SArd Biesheuvel	bl		aes_encrypt
1931465fb13SArd Biesheuvel	vst1.8		{q0}, [r0]!
19486464859SArd Biesheuvel	subs		r4, r4, #1
19586464859SArd Biesheuvel	bne		.Lecbencloop
19686464859SArd Biesheuvel.Lecbencout:
19786464859SArd Biesheuvel	pop		{r4, pc}
19886464859SArd BiesheuvelENDPROC(ce_aes_ecb_encrypt)
19986464859SArd Biesheuvel
20086464859SArd BiesheuvelENTRY(ce_aes_ecb_decrypt)
20186464859SArd Biesheuvel	push		{r4, lr}
20286464859SArd Biesheuvel	ldr		r4, [sp, #8]
20386464859SArd Biesheuvel	prepare_key	r2, r3
2041dede02bSArd Biesheuvel.Lecbdecloop4x:
2051dede02bSArd Biesheuvel	subs		r4, r4, #4
20686464859SArd Biesheuvel	bmi		.Lecbdec1x
2071465fb13SArd Biesheuvel	vld1.8		{q0-q1}, [r1]!
2081dede02bSArd Biesheuvel	vld1.8		{q2-q3}, [r1]!
2091dede02bSArd Biesheuvel	bl		aes_decrypt_4x
2101465fb13SArd Biesheuvel	vst1.8		{q0-q1}, [r0]!
2111dede02bSArd Biesheuvel	vst1.8		{q2-q3}, [r0]!
2121dede02bSArd Biesheuvel	b		.Lecbdecloop4x
21386464859SArd Biesheuvel.Lecbdec1x:
2141dede02bSArd Biesheuvel	adds		r4, r4, #4
21586464859SArd Biesheuvel	beq		.Lecbdecout
21686464859SArd Biesheuvel.Lecbdecloop:
2171465fb13SArd Biesheuvel	vld1.8		{q0}, [r1]!
21886464859SArd Biesheuvel	bl		aes_decrypt
2191465fb13SArd Biesheuvel	vst1.8		{q0}, [r0]!
22086464859SArd Biesheuvel	subs		r4, r4, #1
22186464859SArd Biesheuvel	bne		.Lecbdecloop
22286464859SArd Biesheuvel.Lecbdecout:
22386464859SArd Biesheuvel	pop		{r4, pc}
22486464859SArd BiesheuvelENDPROC(ce_aes_ecb_decrypt)
22586464859SArd Biesheuvel
22686464859SArd Biesheuvel	/*
227fcb0e30dSArd Biesheuvel	 * aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
22886464859SArd Biesheuvel	 *		   int blocks, u8 iv[])
229fcb0e30dSArd Biesheuvel	 * aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
23086464859SArd Biesheuvel	 *		   int blocks, u8 iv[])
23186464859SArd Biesheuvel	 */
23286464859SArd BiesheuvelENTRY(ce_aes_cbc_encrypt)
23386464859SArd Biesheuvel	push		{r4-r6, lr}
23486464859SArd Biesheuvel	ldrd		r4, r5, [sp, #16]
23586464859SArd Biesheuvel	vld1.8		{q0}, [r5]
23686464859SArd Biesheuvel	prepare_key	r2, r3
23786464859SArd Biesheuvel.Lcbcencloop:
2381465fb13SArd Biesheuvel	vld1.8		{q1}, [r1]!		@ get next pt block
23986464859SArd Biesheuvel	veor		q0, q0, q1		@ ..and xor with iv
24086464859SArd Biesheuvel	bl		aes_encrypt
2411465fb13SArd Biesheuvel	vst1.8		{q0}, [r0]!
24286464859SArd Biesheuvel	subs		r4, r4, #1
24386464859SArd Biesheuvel	bne		.Lcbcencloop
24486464859SArd Biesheuvel	vst1.8		{q0}, [r5]
24586464859SArd Biesheuvel	pop		{r4-r6, pc}
24686464859SArd BiesheuvelENDPROC(ce_aes_cbc_encrypt)
24786464859SArd Biesheuvel
24886464859SArd BiesheuvelENTRY(ce_aes_cbc_decrypt)
24986464859SArd Biesheuvel	push		{r4-r6, lr}
25086464859SArd Biesheuvel	ldrd		r4, r5, [sp, #16]
2511dede02bSArd Biesheuvel	vld1.8		{q15}, [r5]		@ keep iv in q15
25286464859SArd Biesheuvel	prepare_key	r2, r3
2531dede02bSArd Biesheuvel.Lcbcdecloop4x:
2541dede02bSArd Biesheuvel	subs		r4, r4, #4
25586464859SArd Biesheuvel	bmi		.Lcbcdec1x
2561465fb13SArd Biesheuvel	vld1.8		{q0-q1}, [r1]!
2571dede02bSArd Biesheuvel	vld1.8		{q2-q3}, [r1]!
2581dede02bSArd Biesheuvel	vmov		q4, q0
2591dede02bSArd Biesheuvel	vmov		q5, q1
2601dede02bSArd Biesheuvel	vmov		q6, q2
2611dede02bSArd Biesheuvel	vmov		q7, q3
2621dede02bSArd Biesheuvel	bl		aes_decrypt_4x
2631dede02bSArd Biesheuvel	veor		q0, q0, q15
2641dede02bSArd Biesheuvel	veor		q1, q1, q4
2651dede02bSArd Biesheuvel	veor		q2, q2, q5
2661dede02bSArd Biesheuvel	veor		q3, q3, q6
2671dede02bSArd Biesheuvel	vmov		q15, q7
2681465fb13SArd Biesheuvel	vst1.8		{q0-q1}, [r0]!
2691dede02bSArd Biesheuvel	vst1.8		{q2-q3}, [r0]!
2701dede02bSArd Biesheuvel	b		.Lcbcdecloop4x
27186464859SArd Biesheuvel.Lcbcdec1x:
2721dede02bSArd Biesheuvel	adds		r4, r4, #4
27386464859SArd Biesheuvel	beq		.Lcbcdecout
2741dede02bSArd Biesheuvel	vmov		q6, q14			@ preserve last round key
27586464859SArd Biesheuvel.Lcbcdecloop:
2761465fb13SArd Biesheuvel	vld1.8		{q0}, [r1]!		@ get next ct block
27786464859SArd Biesheuvel	veor		q14, q15, q6		@ combine prev ct with last key
2781dede02bSArd Biesheuvel	vmov		q15, q0
27986464859SArd Biesheuvel	bl		aes_decrypt
2801465fb13SArd Biesheuvel	vst1.8		{q0}, [r0]!
28186464859SArd Biesheuvel	subs		r4, r4, #1
28286464859SArd Biesheuvel	bne		.Lcbcdecloop
28386464859SArd Biesheuvel.Lcbcdecout:
2841dede02bSArd Biesheuvel	vst1.8		{q15}, [r5]		@ keep iv in q15
28586464859SArd Biesheuvel	pop		{r4-r6, pc}
28686464859SArd BiesheuvelENDPROC(ce_aes_cbc_decrypt)
28786464859SArd Biesheuvel
288143d2647SArd Biesheuvel
289143d2647SArd Biesheuvel	/*
290143d2647SArd Biesheuvel	 * ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
291143d2647SArd Biesheuvel	 *			  int rounds, int bytes, u8 const iv[])
292143d2647SArd Biesheuvel	 * ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
293143d2647SArd Biesheuvel	 *			  int rounds, int bytes, u8 const iv[])
294143d2647SArd Biesheuvel	 */
295143d2647SArd Biesheuvel
296143d2647SArd BiesheuvelENTRY(ce_aes_cbc_cts_encrypt)
297143d2647SArd Biesheuvel	push		{r4-r6, lr}
298143d2647SArd Biesheuvel	ldrd		r4, r5, [sp, #16]
299143d2647SArd Biesheuvel
300143d2647SArd Biesheuvel	movw		ip, :lower16:.Lcts_permute_table
301143d2647SArd Biesheuvel	movt		ip, :upper16:.Lcts_permute_table
302143d2647SArd Biesheuvel	sub		r4, r4, #16
303143d2647SArd Biesheuvel	add		lr, ip, #32
304143d2647SArd Biesheuvel	add		ip, ip, r4
305143d2647SArd Biesheuvel	sub		lr, lr, r4
306143d2647SArd Biesheuvel	vld1.8		{q5}, [ip]
307143d2647SArd Biesheuvel	vld1.8		{q6}, [lr]
308143d2647SArd Biesheuvel
309143d2647SArd Biesheuvel	add		ip, r1, r4
310143d2647SArd Biesheuvel	vld1.8		{q0}, [r1]			@ overlapping loads
311143d2647SArd Biesheuvel	vld1.8		{q3}, [ip]
312143d2647SArd Biesheuvel
313143d2647SArd Biesheuvel	vld1.8		{q1}, [r5]			@ get iv
314143d2647SArd Biesheuvel	prepare_key	r2, r3
315143d2647SArd Biesheuvel
316143d2647SArd Biesheuvel	veor		q0, q0, q1			@ xor with iv
317143d2647SArd Biesheuvel	bl		aes_encrypt
318143d2647SArd Biesheuvel
319143d2647SArd Biesheuvel	vtbl.8		d4, {d0-d1}, d10
320143d2647SArd Biesheuvel	vtbl.8		d5, {d0-d1}, d11
321143d2647SArd Biesheuvel	vtbl.8		d2, {d6-d7}, d12
322143d2647SArd Biesheuvel	vtbl.8		d3, {d6-d7}, d13
323143d2647SArd Biesheuvel
324143d2647SArd Biesheuvel	veor		q0, q0, q1
325143d2647SArd Biesheuvel	bl		aes_encrypt
326143d2647SArd Biesheuvel
327143d2647SArd Biesheuvel	add		r4, r0, r4
328143d2647SArd Biesheuvel	vst1.8		{q2}, [r4]			@ overlapping stores
329143d2647SArd Biesheuvel	vst1.8		{q0}, [r0]
330143d2647SArd Biesheuvel
331143d2647SArd Biesheuvel	pop		{r4-r6, pc}
332143d2647SArd BiesheuvelENDPROC(ce_aes_cbc_cts_encrypt)
333143d2647SArd Biesheuvel
334143d2647SArd BiesheuvelENTRY(ce_aes_cbc_cts_decrypt)
335143d2647SArd Biesheuvel	push		{r4-r6, lr}
336143d2647SArd Biesheuvel	ldrd		r4, r5, [sp, #16]
337143d2647SArd Biesheuvel
338143d2647SArd Biesheuvel	movw		ip, :lower16:.Lcts_permute_table
339143d2647SArd Biesheuvel	movt		ip, :upper16:.Lcts_permute_table
340143d2647SArd Biesheuvel	sub		r4, r4, #16
341143d2647SArd Biesheuvel	add		lr, ip, #32
342143d2647SArd Biesheuvel	add		ip, ip, r4
343143d2647SArd Biesheuvel	sub		lr, lr, r4
344143d2647SArd Biesheuvel	vld1.8		{q5}, [ip]
345143d2647SArd Biesheuvel	vld1.8		{q6}, [lr]
346143d2647SArd Biesheuvel
347143d2647SArd Biesheuvel	add		ip, r1, r4
348143d2647SArd Biesheuvel	vld1.8		{q0}, [r1]			@ overlapping loads
349143d2647SArd Biesheuvel	vld1.8		{q1}, [ip]
350143d2647SArd Biesheuvel
351143d2647SArd Biesheuvel	vld1.8		{q3}, [r5]			@ get iv
352143d2647SArd Biesheuvel	prepare_key	r2, r3
353143d2647SArd Biesheuvel
354143d2647SArd Biesheuvel	bl		aes_decrypt
355143d2647SArd Biesheuvel
356143d2647SArd Biesheuvel	vtbl.8		d4, {d0-d1}, d10
357143d2647SArd Biesheuvel	vtbl.8		d5, {d0-d1}, d11
358143d2647SArd Biesheuvel	vtbx.8		d0, {d2-d3}, d12
359143d2647SArd Biesheuvel	vtbx.8		d1, {d2-d3}, d13
360143d2647SArd Biesheuvel
361143d2647SArd Biesheuvel	veor		q1, q1, q2
362143d2647SArd Biesheuvel	bl		aes_decrypt
363143d2647SArd Biesheuvel	veor		q0, q0, q3			@ xor with iv
364143d2647SArd Biesheuvel
365143d2647SArd Biesheuvel	add		r4, r0, r4
366143d2647SArd Biesheuvel	vst1.8		{q1}, [r4]			@ overlapping stores
367143d2647SArd Biesheuvel	vst1.8		{q0}, [r0]
368143d2647SArd Biesheuvel
369143d2647SArd Biesheuvel	pop		{r4-r6, pc}
370143d2647SArd BiesheuvelENDPROC(ce_aes_cbc_cts_decrypt)
371143d2647SArd Biesheuvel
372143d2647SArd Biesheuvel
37386464859SArd Biesheuvel	/*
374fcb0e30dSArd Biesheuvel	 * aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
37586464859SArd Biesheuvel	 *		   int blocks, u8 ctr[])
37686464859SArd Biesheuvel	 */
37786464859SArd BiesheuvelENTRY(ce_aes_ctr_encrypt)
37886464859SArd Biesheuvel	push		{r4-r6, lr}
37986464859SArd Biesheuvel	ldrd		r4, r5, [sp, #16]
3801dede02bSArd Biesheuvel	vld1.8		{q7}, [r5]		@ load ctr
38186464859SArd Biesheuvel	prepare_key	r2, r3
3821dede02bSArd Biesheuvel	vmov		r6, s31			@ keep swabbed ctr in r6
38386464859SArd Biesheuvel	rev		r6, r6
38486464859SArd Biesheuvel	cmn		r6, r4			@ 32 bit overflow?
38586464859SArd Biesheuvel	bcs		.Lctrloop
3861dede02bSArd Biesheuvel.Lctrloop4x:
3871dede02bSArd Biesheuvel	subs		r4, r4, #4
38886464859SArd Biesheuvel	bmi		.Lctr1x
389*f3456b9fSArd Biesheuvel
390*f3456b9fSArd Biesheuvel	/*
391*f3456b9fSArd Biesheuvel	 * NOTE: the sequence below has been carefully tweaked to avoid
392*f3456b9fSArd Biesheuvel	 * a silicon erratum that exists in Cortex-A57 (#1742098) and
393*f3456b9fSArd Biesheuvel	 * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs
394*f3456b9fSArd Biesheuvel	 * may produce an incorrect result if they take their input from a
395*f3456b9fSArd Biesheuvel	 * register of which a single 32-bit lane has been updated the last
396*f3456b9fSArd Biesheuvel	 * time it was modified. To work around this, the lanes of registers
397*f3456b9fSArd Biesheuvel	 * q0-q3 below are not manipulated individually, and the different
398*f3456b9fSArd Biesheuvel	 * counter values are prepared by successive manipulations of q7.
399*f3456b9fSArd Biesheuvel	 */
400*f3456b9fSArd Biesheuvel	add		ip, r6, #1
4011dede02bSArd Biesheuvel	vmov		q0, q7
402*f3456b9fSArd Biesheuvel	rev		ip, ip
403*f3456b9fSArd Biesheuvel	add		lr, r6, #2
404*f3456b9fSArd Biesheuvel	vmov		s31, ip			@ set lane 3 of q1 via q7
405*f3456b9fSArd Biesheuvel	add		ip, r6, #3
406*f3456b9fSArd Biesheuvel	rev		lr, lr
4071dede02bSArd Biesheuvel	vmov		q1, q7
408*f3456b9fSArd Biesheuvel	vmov		s31, lr			@ set lane 3 of q2 via q7
409*f3456b9fSArd Biesheuvel	rev		ip, ip
4101dede02bSArd Biesheuvel	vmov		q2, q7
411*f3456b9fSArd Biesheuvel	vmov		s31, ip			@ set lane 3 of q3 via q7
412*f3456b9fSArd Biesheuvel	add		r6, r6, #4
4131dede02bSArd Biesheuvel	vmov		q3, q7
414*f3456b9fSArd Biesheuvel
4151dede02bSArd Biesheuvel	vld1.8		{q4-q5}, [r1]!
4161dede02bSArd Biesheuvel	vld1.8		{q6}, [r1]!
4171dede02bSArd Biesheuvel	vld1.8		{q15}, [r1]!
4181dede02bSArd Biesheuvel	bl		aes_encrypt_4x
4191dede02bSArd Biesheuvel	veor		q0, q0, q4
4201dede02bSArd Biesheuvel	veor		q1, q1, q5
4211dede02bSArd Biesheuvel	veor		q2, q2, q6
4221dede02bSArd Biesheuvel	veor		q3, q3, q15
42386464859SArd Biesheuvel	rev		ip, r6
4241465fb13SArd Biesheuvel	vst1.8		{q0-q1}, [r0]!
4251dede02bSArd Biesheuvel	vst1.8		{q2-q3}, [r0]!
4261dede02bSArd Biesheuvel	vmov		s31, ip
4271dede02bSArd Biesheuvel	b		.Lctrloop4x
42886464859SArd Biesheuvel.Lctr1x:
4291dede02bSArd Biesheuvel	adds		r4, r4, #4
43086464859SArd Biesheuvel	beq		.Lctrout
43186464859SArd Biesheuvel.Lctrloop:
4321dede02bSArd Biesheuvel	vmov		q0, q7
43386464859SArd Biesheuvel	bl		aes_encrypt
43486464859SArd Biesheuvel
43586464859SArd Biesheuvel	adds		r6, r6, #1		@ increment BE ctr
43686464859SArd Biesheuvel	rev		ip, r6
4371dede02bSArd Biesheuvel	vmov		s31, ip
43886464859SArd Biesheuvel	bcs		.Lctrcarry
439511306b2SEric Biggers
440511306b2SEric Biggers.Lctrcarrydone:
441511306b2SEric Biggers	subs		r4, r4, #1
442511306b2SEric Biggers	bmi		.Lctrtailblock		@ blocks < 0 means tail block
443511306b2SEric Biggers	vld1.8		{q3}, [r1]!
444511306b2SEric Biggers	veor		q3, q0, q3
445511306b2SEric Biggers	vst1.8		{q3}, [r0]!
44686464859SArd Biesheuvel	bne		.Lctrloop
447511306b2SEric Biggers
44886464859SArd Biesheuvel.Lctrout:
4491dede02bSArd Biesheuvel	vst1.8		{q7}, [r5]		@ return next CTR value
45086464859SArd Biesheuvel	pop		{r4-r6, pc}
45186464859SArd Biesheuvel
4521465fb13SArd Biesheuvel.Lctrtailblock:
453511306b2SEric Biggers	vst1.8		{q0}, [r0, :64]		@ return the key stream
454511306b2SEric Biggers	b		.Lctrout
45586464859SArd Biesheuvel
45686464859SArd Biesheuvel.Lctrcarry:
4571dede02bSArd Biesheuvel	.irp		sreg, s30, s29, s28
45886464859SArd Biesheuvel	vmov		ip, \sreg		@ load next word of ctr
45986464859SArd Biesheuvel	rev		ip, ip			@ ... to handle the carry
46086464859SArd Biesheuvel	adds		ip, ip, #1
46186464859SArd Biesheuvel	rev		ip, ip
46286464859SArd Biesheuvel	vmov		\sreg, ip
463511306b2SEric Biggers	bcc		.Lctrcarrydone
46486464859SArd Biesheuvel	.endr
465511306b2SEric Biggers	b		.Lctrcarrydone
46686464859SArd BiesheuvelENDPROC(ce_aes_ctr_encrypt)
46786464859SArd Biesheuvel
46886464859SArd Biesheuvel	/*
469fcb0e30dSArd Biesheuvel	 * aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
470c61b1607SArd Biesheuvel	 *		   int bytes, u8 iv[], u32 const rk2[], int first)
471fcb0e30dSArd Biesheuvel	 * aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
472c61b1607SArd Biesheuvel	 *		   int bytes, u8 iv[], u32 const rk2[], int first)
47386464859SArd Biesheuvel	 */
47486464859SArd Biesheuvel
47586464859SArd Biesheuvel	.macro		next_tweak, out, in, const, tmp
47686464859SArd Biesheuvel	vshr.s64	\tmp, \in, #63
47786464859SArd Biesheuvel	vand		\tmp, \tmp, \const
47886464859SArd Biesheuvel	vadd.u64	\out, \in, \in
47986464859SArd Biesheuvel	vext.8		\tmp, \tmp, \tmp, #8
48086464859SArd Biesheuvel	veor		\out, \out, \tmp
48186464859SArd Biesheuvel	.endm
48286464859SArd Biesheuvel
48386464859SArd Biesheuvelce_aes_xts_init:
484e53b43d8SArd Biesheuvel	vmov.i32	d30, #0x87		@ compose tweak mask vector
485e53b43d8SArd Biesheuvel	vmovl.u32	q15, d30
486e53b43d8SArd Biesheuvel	vshr.u64	d30, d31, #7
48786464859SArd Biesheuvel
48886464859SArd Biesheuvel	ldrd		r4, r5, [sp, #16]	@ load args
48986464859SArd Biesheuvel	ldr		r6, [sp, #28]
49086464859SArd Biesheuvel	vld1.8		{q0}, [r5]		@ load iv
49186464859SArd Biesheuvel	teq		r6, #1			@ start of a block?
49286464859SArd Biesheuvel	bxne		lr
49386464859SArd Biesheuvel
49486464859SArd Biesheuvel	@ Encrypt the IV in q0 with the second AES key. This should only
49586464859SArd Biesheuvel	@ be done at the start of a block.
49686464859SArd Biesheuvel	ldr		r6, [sp, #24]		@ load AES key 2
49786464859SArd Biesheuvel	prepare_key	r6, r3
49886464859SArd Biesheuvel	add		ip, r6, #32		@ 3rd round key of key 2
49986464859SArd Biesheuvel	b		.Laes_encrypt_tweak	@ tail call
50086464859SArd BiesheuvelENDPROC(ce_aes_xts_init)
50186464859SArd Biesheuvel
50286464859SArd BiesheuvelENTRY(ce_aes_xts_encrypt)
50386464859SArd Biesheuvel	push		{r4-r6, lr}
50486464859SArd Biesheuvel
50586464859SArd Biesheuvel	bl		ce_aes_xts_init		@ run shared prologue
50686464859SArd Biesheuvel	prepare_key	r2, r3
5071dede02bSArd Biesheuvel	vmov		q4, q0
50886464859SArd Biesheuvel
50986464859SArd Biesheuvel	teq		r6, #0			@ start of a block?
5101dede02bSArd Biesheuvel	bne		.Lxtsenc4x
51186464859SArd Biesheuvel
5121dede02bSArd Biesheuvel.Lxtsencloop4x:
5131dede02bSArd Biesheuvel	next_tweak	q4, q4, q15, q10
5141dede02bSArd Biesheuvel.Lxtsenc4x:
515c61b1607SArd Biesheuvel	subs		r4, r4, #64
51686464859SArd Biesheuvel	bmi		.Lxtsenc1x
5171dede02bSArd Biesheuvel	vld1.8		{q0-q1}, [r1]!		@ get 4 pt blocks
5181dede02bSArd Biesheuvel	vld1.8		{q2-q3}, [r1]!
5191dede02bSArd Biesheuvel	next_tweak	q5, q4, q15, q10
5201dede02bSArd Biesheuvel	veor		q0, q0, q4
5211dede02bSArd Biesheuvel	next_tweak	q6, q5, q15, q10
5221dede02bSArd Biesheuvel	veor		q1, q1, q5
5231dede02bSArd Biesheuvel	next_tweak	q7, q6, q15, q10
5241dede02bSArd Biesheuvel	veor		q2, q2, q6
5251dede02bSArd Biesheuvel	veor		q3, q3, q7
5261dede02bSArd Biesheuvel	bl		aes_encrypt_4x
5271dede02bSArd Biesheuvel	veor		q0, q0, q4
5281dede02bSArd Biesheuvel	veor		q1, q1, q5
5291dede02bSArd Biesheuvel	veor		q2, q2, q6
5301dede02bSArd Biesheuvel	veor		q3, q3, q7
5311dede02bSArd Biesheuvel	vst1.8		{q0-q1}, [r0]!		@ write 4 ct blocks
5321dede02bSArd Biesheuvel	vst1.8		{q2-q3}, [r0]!
5331dede02bSArd Biesheuvel	vmov		q4, q7
53486464859SArd Biesheuvel	teq		r4, #0
535c61b1607SArd Biesheuvel	beq		.Lxtsencret
5361dede02bSArd Biesheuvel	b		.Lxtsencloop4x
53786464859SArd Biesheuvel.Lxtsenc1x:
538c61b1607SArd Biesheuvel	adds		r4, r4, #64
53986464859SArd Biesheuvel	beq		.Lxtsencout
540c61b1607SArd Biesheuvel	subs		r4, r4, #16
541c61b1607SArd Biesheuvel	bmi		.LxtsencctsNx
54286464859SArd Biesheuvel.Lxtsencloop:
5431465fb13SArd Biesheuvel	vld1.8		{q0}, [r1]!
544c61b1607SArd Biesheuvel.Lxtsencctsout:
5451dede02bSArd Biesheuvel	veor		q0, q0, q4
54686464859SArd Biesheuvel	bl		aes_encrypt
5471dede02bSArd Biesheuvel	veor		q0, q0, q4
548c61b1607SArd Biesheuvel	teq		r4, #0
54986464859SArd Biesheuvel	beq		.Lxtsencout
550c61b1607SArd Biesheuvel	subs		r4, r4, #16
5511dede02bSArd Biesheuvel	next_tweak	q4, q4, q15, q6
552c61b1607SArd Biesheuvel	bmi		.Lxtsenccts
553c61b1607SArd Biesheuvel	vst1.8		{q0}, [r0]!
55486464859SArd Biesheuvel	b		.Lxtsencloop
55586464859SArd Biesheuvel.Lxtsencout:
556c61b1607SArd Biesheuvel	vst1.8		{q0}, [r0]
557c61b1607SArd Biesheuvel.Lxtsencret:
5581dede02bSArd Biesheuvel	vst1.8		{q4}, [r5]
55986464859SArd Biesheuvel	pop		{r4-r6, pc}
560c61b1607SArd Biesheuvel
561c61b1607SArd Biesheuvel.LxtsencctsNx:
562c61b1607SArd Biesheuvel	vmov		q0, q3
563c61b1607SArd Biesheuvel	sub		r0, r0, #16
564c61b1607SArd Biesheuvel.Lxtsenccts:
565c61b1607SArd Biesheuvel	movw		ip, :lower16:.Lcts_permute_table
566c61b1607SArd Biesheuvel	movt		ip, :upper16:.Lcts_permute_table
567c61b1607SArd Biesheuvel
568c61b1607SArd Biesheuvel	add		r1, r1, r4		@ rewind input pointer
569c61b1607SArd Biesheuvel	add		r4, r4, #16		@ # bytes in final block
570c61b1607SArd Biesheuvel	add		lr, ip, #32
571c61b1607SArd Biesheuvel	add		ip, ip, r4
572c61b1607SArd Biesheuvel	sub		lr, lr, r4
573c61b1607SArd Biesheuvel	add		r4, r0, r4		@ output address of final block
574c61b1607SArd Biesheuvel
575c61b1607SArd Biesheuvel	vld1.8		{q1}, [r1]		@ load final partial block
576c61b1607SArd Biesheuvel	vld1.8		{q2}, [ip]
577c61b1607SArd Biesheuvel	vld1.8		{q3}, [lr]
578c61b1607SArd Biesheuvel
579c61b1607SArd Biesheuvel	vtbl.8		d4, {d0-d1}, d4
580c61b1607SArd Biesheuvel	vtbl.8		d5, {d0-d1}, d5
581c61b1607SArd Biesheuvel	vtbx.8		d0, {d2-d3}, d6
582c61b1607SArd Biesheuvel	vtbx.8		d1, {d2-d3}, d7
583c61b1607SArd Biesheuvel
584c61b1607SArd Biesheuvel	vst1.8		{q2}, [r4]		@ overlapping stores
585c61b1607SArd Biesheuvel	mov		r4, #0
586c61b1607SArd Biesheuvel	b		.Lxtsencctsout
58786464859SArd BiesheuvelENDPROC(ce_aes_xts_encrypt)
58886464859SArd Biesheuvel
58986464859SArd Biesheuvel
59086464859SArd BiesheuvelENTRY(ce_aes_xts_decrypt)
59186464859SArd Biesheuvel	push		{r4-r6, lr}
59286464859SArd Biesheuvel
59386464859SArd Biesheuvel	bl		ce_aes_xts_init		@ run shared prologue
59486464859SArd Biesheuvel	prepare_key	r2, r3
5951dede02bSArd Biesheuvel	vmov		q4, q0
59686464859SArd Biesheuvel
597c61b1607SArd Biesheuvel	/* subtract 16 bytes if we are doing CTS */
598c61b1607SArd Biesheuvel	tst		r4, #0xf
599c61b1607SArd Biesheuvel	subne		r4, r4, #0x10
600c61b1607SArd Biesheuvel
60186464859SArd Biesheuvel	teq		r6, #0			@ start of a block?
6021dede02bSArd Biesheuvel	bne		.Lxtsdec4x
60386464859SArd Biesheuvel
6041dede02bSArd Biesheuvel.Lxtsdecloop4x:
6051dede02bSArd Biesheuvel	next_tweak	q4, q4, q15, q10
6061dede02bSArd Biesheuvel.Lxtsdec4x:
607c61b1607SArd Biesheuvel	subs		r4, r4, #64
60886464859SArd Biesheuvel	bmi		.Lxtsdec1x
6091dede02bSArd Biesheuvel	vld1.8		{q0-q1}, [r1]!		@ get 4 ct blocks
6101dede02bSArd Biesheuvel	vld1.8		{q2-q3}, [r1]!
6111dede02bSArd Biesheuvel	next_tweak	q5, q4, q15, q10
6121dede02bSArd Biesheuvel	veor		q0, q0, q4
6131dede02bSArd Biesheuvel	next_tweak	q6, q5, q15, q10
6141dede02bSArd Biesheuvel	veor		q1, q1, q5
6151dede02bSArd Biesheuvel	next_tweak	q7, q6, q15, q10
6161dede02bSArd Biesheuvel	veor		q2, q2, q6
6171dede02bSArd Biesheuvel	veor		q3, q3, q7
6181dede02bSArd Biesheuvel	bl		aes_decrypt_4x
6191dede02bSArd Biesheuvel	veor		q0, q0, q4
6201dede02bSArd Biesheuvel	veor		q1, q1, q5
6211dede02bSArd Biesheuvel	veor		q2, q2, q6
6221dede02bSArd Biesheuvel	veor		q3, q3, q7
6231dede02bSArd Biesheuvel	vst1.8		{q0-q1}, [r0]!		@ write 4 pt blocks
6241dede02bSArd Biesheuvel	vst1.8		{q2-q3}, [r0]!
6251dede02bSArd Biesheuvel	vmov		q4, q7
62686464859SArd Biesheuvel	teq		r4, #0
62786464859SArd Biesheuvel	beq		.Lxtsdecout
6281dede02bSArd Biesheuvel	b		.Lxtsdecloop4x
62986464859SArd Biesheuvel.Lxtsdec1x:
630c61b1607SArd Biesheuvel	adds		r4, r4, #64
63186464859SArd Biesheuvel	beq		.Lxtsdecout
632c61b1607SArd Biesheuvel	subs		r4, r4, #16
63386464859SArd Biesheuvel.Lxtsdecloop:
6341465fb13SArd Biesheuvel	vld1.8		{q0}, [r1]!
635c61b1607SArd Biesheuvel	bmi		.Lxtsdeccts
636c61b1607SArd Biesheuvel.Lxtsdecctsout:
6371dede02bSArd Biesheuvel	veor		q0, q0, q4
63886464859SArd Biesheuvel	bl		aes_decrypt
6391dede02bSArd Biesheuvel	veor		q0, q0, q4
6401465fb13SArd Biesheuvel	vst1.8		{q0}, [r0]!
641c61b1607SArd Biesheuvel	teq		r4, #0
64286464859SArd Biesheuvel	beq		.Lxtsdecout
643c61b1607SArd Biesheuvel	subs		r4, r4, #16
6441dede02bSArd Biesheuvel	next_tweak	q4, q4, q15, q6
64586464859SArd Biesheuvel	b		.Lxtsdecloop
64686464859SArd Biesheuvel.Lxtsdecout:
6471dede02bSArd Biesheuvel	vst1.8		{q4}, [r5]
64886464859SArd Biesheuvel	pop		{r4-r6, pc}
649c61b1607SArd Biesheuvel
650c61b1607SArd Biesheuvel.Lxtsdeccts:
651c61b1607SArd Biesheuvel	movw		ip, :lower16:.Lcts_permute_table
652c61b1607SArd Biesheuvel	movt		ip, :upper16:.Lcts_permute_table
653c61b1607SArd Biesheuvel
654c61b1607SArd Biesheuvel	add		r1, r1, r4		@ rewind input pointer
655c61b1607SArd Biesheuvel	add		r4, r4, #16		@ # bytes in final block
656c61b1607SArd Biesheuvel	add		lr, ip, #32
657c61b1607SArd Biesheuvel	add		ip, ip, r4
658c61b1607SArd Biesheuvel	sub		lr, lr, r4
659c61b1607SArd Biesheuvel	add		r4, r0, r4		@ output address of final block
660c61b1607SArd Biesheuvel
661c61b1607SArd Biesheuvel	next_tweak	q5, q4, q15, q6
662c61b1607SArd Biesheuvel
663c61b1607SArd Biesheuvel	vld1.8		{q1}, [r1]		@ load final partial block
664c61b1607SArd Biesheuvel	vld1.8		{q2}, [ip]
665c61b1607SArd Biesheuvel	vld1.8		{q3}, [lr]
666c61b1607SArd Biesheuvel
667c61b1607SArd Biesheuvel	veor		q0, q0, q5
668c61b1607SArd Biesheuvel	bl		aes_decrypt
669c61b1607SArd Biesheuvel	veor		q0, q0, q5
670c61b1607SArd Biesheuvel
671c61b1607SArd Biesheuvel	vtbl.8		d4, {d0-d1}, d4
672c61b1607SArd Biesheuvel	vtbl.8		d5, {d0-d1}, d5
673c61b1607SArd Biesheuvel	vtbx.8		d0, {d2-d3}, d6
674c61b1607SArd Biesheuvel	vtbx.8		d1, {d2-d3}, d7
675c61b1607SArd Biesheuvel
676c61b1607SArd Biesheuvel	vst1.8		{q2}, [r4]		@ overlapping stores
677c61b1607SArd Biesheuvel	mov		r4, #0
678c61b1607SArd Biesheuvel	b		.Lxtsdecctsout
67986464859SArd BiesheuvelENDPROC(ce_aes_xts_decrypt)
68086464859SArd Biesheuvel
68186464859SArd Biesheuvel	/*
68286464859SArd Biesheuvel	 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
68386464859SArd Biesheuvel	 *                             AES sbox substitution on each byte in
68486464859SArd Biesheuvel	 *                             'input'
68586464859SArd Biesheuvel	 */
68686464859SArd BiesheuvelENTRY(ce_aes_sub)
68786464859SArd Biesheuvel	vdup.32		q1, r0
68886464859SArd Biesheuvel	veor		q0, q0, q0
68986464859SArd Biesheuvel	aese.8		q0, q1
69086464859SArd Biesheuvel	vmov		r0, s0
69186464859SArd Biesheuvel	bx		lr
69286464859SArd BiesheuvelENDPROC(ce_aes_sub)
69386464859SArd Biesheuvel
69486464859SArd Biesheuvel	/*
69586464859SArd Biesheuvel	 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
69686464859SArd Biesheuvel	 *                                        operation on round key *src
69786464859SArd Biesheuvel	 */
69886464859SArd BiesheuvelENTRY(ce_aes_invert)
699fafb1dcaSArd Biesheuvel	vld1.32		{q0}, [r1]
70086464859SArd Biesheuvel	aesimc.8	q0, q0
701fafb1dcaSArd Biesheuvel	vst1.32		{q0}, [r0]
70286464859SArd Biesheuvel	bx		lr
70386464859SArd BiesheuvelENDPROC(ce_aes_invert)
704c61b1607SArd Biesheuvel
705c61b1607SArd Biesheuvel	.section	".rodata", "a"
706c61b1607SArd Biesheuvel	.align		6
707c61b1607SArd Biesheuvel.Lcts_permute_table:
708c61b1607SArd Biesheuvel	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
709c61b1607SArd Biesheuvel	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
710c61b1607SArd Biesheuvel	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
711c61b1607SArd Biesheuvel	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
712c61b1607SArd Biesheuvel	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
713c61b1607SArd Biesheuvel	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
714