xref: /openbmc/linux/arch/arm/crypto/aes-ce-core.S (revision d2912cb1)
1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
286464859SArd Biesheuvel/*
386464859SArd Biesheuvel * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
486464859SArd Biesheuvel *
586464859SArd Biesheuvel * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
686464859SArd Biesheuvel */
786464859SArd Biesheuvel
886464859SArd Biesheuvel#include <linux/linkage.h>
986464859SArd Biesheuvel#include <asm/assembler.h>
1086464859SArd Biesheuvel
1186464859SArd Biesheuvel	.text
1286464859SArd Biesheuvel	.fpu		crypto-neon-fp-armv8
1386464859SArd Biesheuvel	.align		3
1486464859SArd Biesheuvel
1586464859SArd Biesheuvel	.macro		enc_round, state, key
1686464859SArd Biesheuvel	aese.8		\state, \key
1786464859SArd Biesheuvel	aesmc.8		\state, \state
1886464859SArd Biesheuvel	.endm
1986464859SArd Biesheuvel
2086464859SArd Biesheuvel	.macro		dec_round, state, key
2186464859SArd Biesheuvel	aesd.8		\state, \key
2286464859SArd Biesheuvel	aesimc.8	\state, \state
2386464859SArd Biesheuvel	.endm
2486464859SArd Biesheuvel
2586464859SArd Biesheuvel	.macro		enc_dround, key1, key2
2686464859SArd Biesheuvel	enc_round	q0, \key1
2786464859SArd Biesheuvel	enc_round	q0, \key2
2886464859SArd Biesheuvel	.endm
2986464859SArd Biesheuvel
3086464859SArd Biesheuvel	.macro		dec_dround, key1, key2
3186464859SArd Biesheuvel	dec_round	q0, \key1
3286464859SArd Biesheuvel	dec_round	q0, \key2
3386464859SArd Biesheuvel	.endm
3486464859SArd Biesheuvel
3586464859SArd Biesheuvel	.macro		enc_fround, key1, key2, key3
3686464859SArd Biesheuvel	enc_round	q0, \key1
3786464859SArd Biesheuvel	aese.8		q0, \key2
3886464859SArd Biesheuvel	veor		q0, q0, \key3
3986464859SArd Biesheuvel	.endm
4086464859SArd Biesheuvel
4186464859SArd Biesheuvel	.macro		dec_fround, key1, key2, key3
4286464859SArd Biesheuvel	dec_round	q0, \key1
4386464859SArd Biesheuvel	aesd.8		q0, \key2
4486464859SArd Biesheuvel	veor		q0, q0, \key3
4586464859SArd Biesheuvel	.endm
4686464859SArd Biesheuvel
4786464859SArd Biesheuvel	.macro		enc_dround_3x, key1, key2
4886464859SArd Biesheuvel	enc_round	q0, \key1
4986464859SArd Biesheuvel	enc_round	q1, \key1
5086464859SArd Biesheuvel	enc_round	q2, \key1
5186464859SArd Biesheuvel	enc_round	q0, \key2
5286464859SArd Biesheuvel	enc_round	q1, \key2
5386464859SArd Biesheuvel	enc_round	q2, \key2
5486464859SArd Biesheuvel	.endm
5586464859SArd Biesheuvel
5686464859SArd Biesheuvel	.macro		dec_dround_3x, key1, key2
5786464859SArd Biesheuvel	dec_round	q0, \key1
5886464859SArd Biesheuvel	dec_round	q1, \key1
5986464859SArd Biesheuvel	dec_round	q2, \key1
6086464859SArd Biesheuvel	dec_round	q0, \key2
6186464859SArd Biesheuvel	dec_round	q1, \key2
6286464859SArd Biesheuvel	dec_round	q2, \key2
6386464859SArd Biesheuvel	.endm
6486464859SArd Biesheuvel
6586464859SArd Biesheuvel	.macro		enc_fround_3x, key1, key2, key3
6686464859SArd Biesheuvel	enc_round	q0, \key1
6786464859SArd Biesheuvel	enc_round	q1, \key1
6886464859SArd Biesheuvel	enc_round	q2, \key1
6986464859SArd Biesheuvel	aese.8		q0, \key2
7086464859SArd Biesheuvel	aese.8		q1, \key2
7186464859SArd Biesheuvel	aese.8		q2, \key2
7286464859SArd Biesheuvel	veor		q0, q0, \key3
7386464859SArd Biesheuvel	veor		q1, q1, \key3
7486464859SArd Biesheuvel	veor		q2, q2, \key3
7586464859SArd Biesheuvel	.endm
7686464859SArd Biesheuvel
7786464859SArd Biesheuvel	.macro		dec_fround_3x, key1, key2, key3
7886464859SArd Biesheuvel	dec_round	q0, \key1
7986464859SArd Biesheuvel	dec_round	q1, \key1
8086464859SArd Biesheuvel	dec_round	q2, \key1
8186464859SArd Biesheuvel	aesd.8		q0, \key2
8286464859SArd Biesheuvel	aesd.8		q1, \key2
8386464859SArd Biesheuvel	aesd.8		q2, \key2
8486464859SArd Biesheuvel	veor		q0, q0, \key3
8586464859SArd Biesheuvel	veor		q1, q1, \key3
8686464859SArd Biesheuvel	veor		q2, q2, \key3
8786464859SArd Biesheuvel	.endm
8886464859SArd Biesheuvel
8986464859SArd Biesheuvel	.macro		do_block, dround, fround
9086464859SArd Biesheuvel	cmp		r3, #12			@ which key size?
9186464859SArd Biesheuvel	vld1.8		{q10-q11}, [ip]!
9286464859SArd Biesheuvel	\dround		q8, q9
9386464859SArd Biesheuvel	vld1.8		{q12-q13}, [ip]!
9486464859SArd Biesheuvel	\dround		q10, q11
9586464859SArd Biesheuvel	vld1.8		{q10-q11}, [ip]!
9686464859SArd Biesheuvel	\dround		q12, q13
9786464859SArd Biesheuvel	vld1.8		{q12-q13}, [ip]!
9886464859SArd Biesheuvel	\dround		q10, q11
9986464859SArd Biesheuvel	blo		0f			@ AES-128: 10 rounds
10086464859SArd Biesheuvel	vld1.8		{q10-q11}, [ip]!
10186464859SArd Biesheuvel	\dround		q12, q13
1026499e8cfSArd Biesheuvel	beq		1f			@ AES-192: 12 rounds
10386464859SArd Biesheuvel	vld1.8		{q12-q13}, [ip]
10486464859SArd Biesheuvel	\dround		q10, q11
10586464859SArd Biesheuvel0:	\fround		q12, q13, q14
10686464859SArd Biesheuvel	bx		lr
10786464859SArd Biesheuvel
1086499e8cfSArd Biesheuvel1:	\fround		q10, q11, q14
10986464859SArd Biesheuvel	bx		lr
11086464859SArd Biesheuvel	.endm
11186464859SArd Biesheuvel
11286464859SArd Biesheuvel	/*
11386464859SArd Biesheuvel	 * Internal, non-AAPCS compliant functions that implement the core AES
11486464859SArd Biesheuvel	 * transforms. These should preserve all registers except q0 - q2 and ip
11586464859SArd Biesheuvel	 * Arguments:
11686464859SArd Biesheuvel	 *   q0        : first in/output block
11786464859SArd Biesheuvel	 *   q1        : second in/output block (_3x version only)
11886464859SArd Biesheuvel	 *   q2        : third in/output block (_3x version only)
11986464859SArd Biesheuvel	 *   q8        : first round key
12086464859SArd Biesheuvel	 *   q9        : secound round key
12186464859SArd Biesheuvel	 *   q14       : final round key
1226499e8cfSArd Biesheuvel	 *   r2        : address of round key array
12386464859SArd Biesheuvel	 *   r3        : number of rounds
12486464859SArd Biesheuvel	 */
12586464859SArd Biesheuvel	.align		6
12686464859SArd Biesheuvelaes_encrypt:
12786464859SArd Biesheuvel	add		ip, r2, #32		@ 3rd round key
12886464859SArd Biesheuvel.Laes_encrypt_tweak:
12986464859SArd Biesheuvel	do_block	enc_dround, enc_fround
13086464859SArd BiesheuvelENDPROC(aes_encrypt)
13186464859SArd Biesheuvel
13286464859SArd Biesheuvel	.align		6
13386464859SArd Biesheuvelaes_decrypt:
13486464859SArd Biesheuvel	add		ip, r2, #32		@ 3rd round key
13586464859SArd Biesheuvel	do_block	dec_dround, dec_fround
13686464859SArd BiesheuvelENDPROC(aes_decrypt)
13786464859SArd Biesheuvel
13886464859SArd Biesheuvel	.align		6
13986464859SArd Biesheuvelaes_encrypt_3x:
14086464859SArd Biesheuvel	add		ip, r2, #32		@ 3rd round key
14186464859SArd Biesheuvel	do_block	enc_dround_3x, enc_fround_3x
14286464859SArd BiesheuvelENDPROC(aes_encrypt_3x)
14386464859SArd Biesheuvel
14486464859SArd Biesheuvel	.align		6
14586464859SArd Biesheuvelaes_decrypt_3x:
14686464859SArd Biesheuvel	add		ip, r2, #32		@ 3rd round key
14786464859SArd Biesheuvel	do_block	dec_dround_3x, dec_fround_3x
14886464859SArd BiesheuvelENDPROC(aes_decrypt_3x)
14986464859SArd Biesheuvel
15086464859SArd Biesheuvel	.macro		prepare_key, rk, rounds
15186464859SArd Biesheuvel	add		ip, \rk, \rounds, lsl #4
15286464859SArd Biesheuvel	vld1.8		{q8-q9}, [\rk]		@ load first 2 round keys
15386464859SArd Biesheuvel	vld1.8		{q14}, [ip]		@ load last round key
15486464859SArd Biesheuvel	.endm
15586464859SArd Biesheuvel
15686464859SArd Biesheuvel	/*
15786464859SArd Biesheuvel	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
15886464859SArd Biesheuvel	 *		   int blocks)
15986464859SArd Biesheuvel	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
16086464859SArd Biesheuvel	 *		   int blocks)
16186464859SArd Biesheuvel	 */
16286464859SArd BiesheuvelENTRY(ce_aes_ecb_encrypt)
16386464859SArd Biesheuvel	push		{r4, lr}
16486464859SArd Biesheuvel	ldr		r4, [sp, #8]
16586464859SArd Biesheuvel	prepare_key	r2, r3
16686464859SArd Biesheuvel.Lecbencloop3x:
16786464859SArd Biesheuvel	subs		r4, r4, #3
16886464859SArd Biesheuvel	bmi		.Lecbenc1x
1691465fb13SArd Biesheuvel	vld1.8		{q0-q1}, [r1]!
1701465fb13SArd Biesheuvel	vld1.8		{q2}, [r1]!
17186464859SArd Biesheuvel	bl		aes_encrypt_3x
1721465fb13SArd Biesheuvel	vst1.8		{q0-q1}, [r0]!
1731465fb13SArd Biesheuvel	vst1.8		{q2}, [r0]!
17486464859SArd Biesheuvel	b		.Lecbencloop3x
17586464859SArd Biesheuvel.Lecbenc1x:
17686464859SArd Biesheuvel	adds		r4, r4, #3
17786464859SArd Biesheuvel	beq		.Lecbencout
17886464859SArd Biesheuvel.Lecbencloop:
1791465fb13SArd Biesheuvel	vld1.8		{q0}, [r1]!
18086464859SArd Biesheuvel	bl		aes_encrypt
1811465fb13SArd Biesheuvel	vst1.8		{q0}, [r0]!
18286464859SArd Biesheuvel	subs		r4, r4, #1
18386464859SArd Biesheuvel	bne		.Lecbencloop
18486464859SArd Biesheuvel.Lecbencout:
18586464859SArd Biesheuvel	pop		{r4, pc}
18686464859SArd BiesheuvelENDPROC(ce_aes_ecb_encrypt)
18786464859SArd Biesheuvel
18886464859SArd BiesheuvelENTRY(ce_aes_ecb_decrypt)
18986464859SArd Biesheuvel	push		{r4, lr}
19086464859SArd Biesheuvel	ldr		r4, [sp, #8]
19186464859SArd Biesheuvel	prepare_key	r2, r3
19286464859SArd Biesheuvel.Lecbdecloop3x:
19386464859SArd Biesheuvel	subs		r4, r4, #3
19486464859SArd Biesheuvel	bmi		.Lecbdec1x
1951465fb13SArd Biesheuvel	vld1.8		{q0-q1}, [r1]!
1961465fb13SArd Biesheuvel	vld1.8		{q2}, [r1]!
19786464859SArd Biesheuvel	bl		aes_decrypt_3x
1981465fb13SArd Biesheuvel	vst1.8		{q0-q1}, [r0]!
1991465fb13SArd Biesheuvel	vst1.8		{q2}, [r0]!
20086464859SArd Biesheuvel	b		.Lecbdecloop3x
20186464859SArd Biesheuvel.Lecbdec1x:
20286464859SArd Biesheuvel	adds		r4, r4, #3
20386464859SArd Biesheuvel	beq		.Lecbdecout
20486464859SArd Biesheuvel.Lecbdecloop:
2051465fb13SArd Biesheuvel	vld1.8		{q0}, [r1]!
20686464859SArd Biesheuvel	bl		aes_decrypt
2071465fb13SArd Biesheuvel	vst1.8		{q0}, [r0]!
20886464859SArd Biesheuvel	subs		r4, r4, #1
20986464859SArd Biesheuvel	bne		.Lecbdecloop
21086464859SArd Biesheuvel.Lecbdecout:
21186464859SArd Biesheuvel	pop		{r4, pc}
21286464859SArd BiesheuvelENDPROC(ce_aes_ecb_decrypt)
21386464859SArd Biesheuvel
21486464859SArd Biesheuvel	/*
21586464859SArd Biesheuvel	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
21686464859SArd Biesheuvel	 *		   int blocks, u8 iv[])
21786464859SArd Biesheuvel	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
21886464859SArd Biesheuvel	 *		   int blocks, u8 iv[])
21986464859SArd Biesheuvel	 */
22086464859SArd BiesheuvelENTRY(ce_aes_cbc_encrypt)
22186464859SArd Biesheuvel	push		{r4-r6, lr}
22286464859SArd Biesheuvel	ldrd		r4, r5, [sp, #16]
22386464859SArd Biesheuvel	vld1.8		{q0}, [r5]
22486464859SArd Biesheuvel	prepare_key	r2, r3
22586464859SArd Biesheuvel.Lcbcencloop:
2261465fb13SArd Biesheuvel	vld1.8		{q1}, [r1]!		@ get next pt block
22786464859SArd Biesheuvel	veor		q0, q0, q1		@ ..and xor with iv
22886464859SArd Biesheuvel	bl		aes_encrypt
2291465fb13SArd Biesheuvel	vst1.8		{q0}, [r0]!
23086464859SArd Biesheuvel	subs		r4, r4, #1
23186464859SArd Biesheuvel	bne		.Lcbcencloop
23286464859SArd Biesheuvel	vst1.8		{q0}, [r5]
23386464859SArd Biesheuvel	pop		{r4-r6, pc}
23486464859SArd BiesheuvelENDPROC(ce_aes_cbc_encrypt)
23586464859SArd Biesheuvel
23686464859SArd BiesheuvelENTRY(ce_aes_cbc_decrypt)
23786464859SArd Biesheuvel	push		{r4-r6, lr}
23886464859SArd Biesheuvel	ldrd		r4, r5, [sp, #16]
23986464859SArd Biesheuvel	vld1.8		{q6}, [r5]		@ keep iv in q6
24086464859SArd Biesheuvel	prepare_key	r2, r3
24186464859SArd Biesheuvel.Lcbcdecloop3x:
24286464859SArd Biesheuvel	subs		r4, r4, #3
24386464859SArd Biesheuvel	bmi		.Lcbcdec1x
2441465fb13SArd Biesheuvel	vld1.8		{q0-q1}, [r1]!
2451465fb13SArd Biesheuvel	vld1.8		{q2}, [r1]!
24686464859SArd Biesheuvel	vmov		q3, q0
24786464859SArd Biesheuvel	vmov		q4, q1
24886464859SArd Biesheuvel	vmov		q5, q2
24986464859SArd Biesheuvel	bl		aes_decrypt_3x
25086464859SArd Biesheuvel	veor		q0, q0, q6
25186464859SArd Biesheuvel	veor		q1, q1, q3
25286464859SArd Biesheuvel	veor		q2, q2, q4
25386464859SArd Biesheuvel	vmov		q6, q5
2541465fb13SArd Biesheuvel	vst1.8		{q0-q1}, [r0]!
2551465fb13SArd Biesheuvel	vst1.8		{q2}, [r0]!
25686464859SArd Biesheuvel	b		.Lcbcdecloop3x
25786464859SArd Biesheuvel.Lcbcdec1x:
25886464859SArd Biesheuvel	adds		r4, r4, #3
25986464859SArd Biesheuvel	beq		.Lcbcdecout
26086464859SArd Biesheuvel	vmov		q15, q14		@ preserve last round key
26186464859SArd Biesheuvel.Lcbcdecloop:
2621465fb13SArd Biesheuvel	vld1.8		{q0}, [r1]!		@ get next ct block
26386464859SArd Biesheuvel	veor		q14, q15, q6		@ combine prev ct with last key
26486464859SArd Biesheuvel	vmov		q6, q0
26586464859SArd Biesheuvel	bl		aes_decrypt
2661465fb13SArd Biesheuvel	vst1.8		{q0}, [r0]!
26786464859SArd Biesheuvel	subs		r4, r4, #1
26886464859SArd Biesheuvel	bne		.Lcbcdecloop
26986464859SArd Biesheuvel.Lcbcdecout:
27086464859SArd Biesheuvel	vst1.8		{q6}, [r5]		@ keep iv in q6
27186464859SArd Biesheuvel	pop		{r4-r6, pc}
27286464859SArd BiesheuvelENDPROC(ce_aes_cbc_decrypt)
27386464859SArd Biesheuvel
27486464859SArd Biesheuvel	/*
27586464859SArd Biesheuvel	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
27686464859SArd Biesheuvel	 *		   int blocks, u8 ctr[])
27786464859SArd Biesheuvel	 */
27886464859SArd BiesheuvelENTRY(ce_aes_ctr_encrypt)
27986464859SArd Biesheuvel	push		{r4-r6, lr}
28086464859SArd Biesheuvel	ldrd		r4, r5, [sp, #16]
28186464859SArd Biesheuvel	vld1.8		{q6}, [r5]		@ load ctr
28286464859SArd Biesheuvel	prepare_key	r2, r3
28386464859SArd Biesheuvel	vmov		r6, s27			@ keep swabbed ctr in r6
28486464859SArd Biesheuvel	rev		r6, r6
28586464859SArd Biesheuvel	cmn		r6, r4			@ 32 bit overflow?
28686464859SArd Biesheuvel	bcs		.Lctrloop
28786464859SArd Biesheuvel.Lctrloop3x:
28886464859SArd Biesheuvel	subs		r4, r4, #3
28986464859SArd Biesheuvel	bmi		.Lctr1x
29086464859SArd Biesheuvel	add		r6, r6, #1
29186464859SArd Biesheuvel	vmov		q0, q6
29286464859SArd Biesheuvel	vmov		q1, q6
29386464859SArd Biesheuvel	rev		ip, r6
29486464859SArd Biesheuvel	add		r6, r6, #1
29586464859SArd Biesheuvel	vmov		q2, q6
29686464859SArd Biesheuvel	vmov		s7, ip
29786464859SArd Biesheuvel	rev		ip, r6
29886464859SArd Biesheuvel	add		r6, r6, #1
29986464859SArd Biesheuvel	vmov		s11, ip
3001465fb13SArd Biesheuvel	vld1.8		{q3-q4}, [r1]!
3011465fb13SArd Biesheuvel	vld1.8		{q5}, [r1]!
30286464859SArd Biesheuvel	bl		aes_encrypt_3x
30386464859SArd Biesheuvel	veor		q0, q0, q3
30486464859SArd Biesheuvel	veor		q1, q1, q4
30586464859SArd Biesheuvel	veor		q2, q2, q5
30686464859SArd Biesheuvel	rev		ip, r6
3071465fb13SArd Biesheuvel	vst1.8		{q0-q1}, [r0]!
3081465fb13SArd Biesheuvel	vst1.8		{q2}, [r0]!
30986464859SArd Biesheuvel	vmov		s27, ip
31086464859SArd Biesheuvel	b		.Lctrloop3x
31186464859SArd Biesheuvel.Lctr1x:
31286464859SArd Biesheuvel	adds		r4, r4, #3
31386464859SArd Biesheuvel	beq		.Lctrout
31486464859SArd Biesheuvel.Lctrloop:
31586464859SArd Biesheuvel	vmov		q0, q6
31686464859SArd Biesheuvel	bl		aes_encrypt
31786464859SArd Biesheuvel
31886464859SArd Biesheuvel	adds		r6, r6, #1		@ increment BE ctr
31986464859SArd Biesheuvel	rev		ip, r6
32086464859SArd Biesheuvel	vmov		s27, ip
32186464859SArd Biesheuvel	bcs		.Lctrcarry
322511306b2SEric Biggers
323511306b2SEric Biggers.Lctrcarrydone:
324511306b2SEric Biggers	subs		r4, r4, #1
325511306b2SEric Biggers	bmi		.Lctrtailblock		@ blocks < 0 means tail block
326511306b2SEric Biggers	vld1.8		{q3}, [r1]!
327511306b2SEric Biggers	veor		q3, q0, q3
328511306b2SEric Biggers	vst1.8		{q3}, [r0]!
32986464859SArd Biesheuvel	bne		.Lctrloop
330511306b2SEric Biggers
33186464859SArd Biesheuvel.Lctrout:
332511306b2SEric Biggers	vst1.8		{q6}, [r5]		@ return next CTR value
33386464859SArd Biesheuvel	pop		{r4-r6, pc}
33486464859SArd Biesheuvel
3351465fb13SArd Biesheuvel.Lctrtailblock:
336511306b2SEric Biggers	vst1.8		{q0}, [r0, :64]		@ return the key stream
337511306b2SEric Biggers	b		.Lctrout
33886464859SArd Biesheuvel
33986464859SArd Biesheuvel.Lctrcarry:
34086464859SArd Biesheuvel	.irp		sreg, s26, s25, s24
34186464859SArd Biesheuvel	vmov		ip, \sreg		@ load next word of ctr
34286464859SArd Biesheuvel	rev		ip, ip			@ ... to handle the carry
34386464859SArd Biesheuvel	adds		ip, ip, #1
34486464859SArd Biesheuvel	rev		ip, ip
34586464859SArd Biesheuvel	vmov		\sreg, ip
346511306b2SEric Biggers	bcc		.Lctrcarrydone
34786464859SArd Biesheuvel	.endr
348511306b2SEric Biggers	b		.Lctrcarrydone
34986464859SArd BiesheuvelENDPROC(ce_aes_ctr_encrypt)
35086464859SArd Biesheuvel
35186464859SArd Biesheuvel	/*
35286464859SArd Biesheuvel	 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
35386464859SArd Biesheuvel	 *		   int blocks, u8 iv[], u8 const rk2[], int first)
35486464859SArd Biesheuvel	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
35586464859SArd Biesheuvel	 *		   int blocks, u8 iv[], u8 const rk2[], int first)
35686464859SArd Biesheuvel	 */
35786464859SArd Biesheuvel
35886464859SArd Biesheuvel	.macro		next_tweak, out, in, const, tmp
35986464859SArd Biesheuvel	vshr.s64	\tmp, \in, #63
36086464859SArd Biesheuvel	vand		\tmp, \tmp, \const
36186464859SArd Biesheuvel	vadd.u64	\out, \in, \in
36286464859SArd Biesheuvel	vext.8		\tmp, \tmp, \tmp, #8
36386464859SArd Biesheuvel	veor		\out, \out, \tmp
36486464859SArd Biesheuvel	.endm
36586464859SArd Biesheuvel
36686464859SArd Biesheuvel	.align		3
36786464859SArd Biesheuvel.Lxts_mul_x:
36886464859SArd Biesheuvel	.quad		1, 0x87
36986464859SArd Biesheuvel
37086464859SArd Biesheuvelce_aes_xts_init:
37186464859SArd Biesheuvel	vldr		d14, .Lxts_mul_x
37286464859SArd Biesheuvel	vldr		d15, .Lxts_mul_x + 8
37386464859SArd Biesheuvel
37486464859SArd Biesheuvel	ldrd		r4, r5, [sp, #16]	@ load args
37586464859SArd Biesheuvel	ldr		r6, [sp, #28]
37686464859SArd Biesheuvel	vld1.8		{q0}, [r5]		@ load iv
37786464859SArd Biesheuvel	teq		r6, #1			@ start of a block?
37886464859SArd Biesheuvel	bxne		lr
37986464859SArd Biesheuvel
38086464859SArd Biesheuvel	@ Encrypt the IV in q0 with the second AES key. This should only
38186464859SArd Biesheuvel	@ be done at the start of a block.
38286464859SArd Biesheuvel	ldr		r6, [sp, #24]		@ load AES key 2
38386464859SArd Biesheuvel	prepare_key	r6, r3
38486464859SArd Biesheuvel	add		ip, r6, #32		@ 3rd round key of key 2
38586464859SArd Biesheuvel	b		.Laes_encrypt_tweak	@ tail call
38686464859SArd BiesheuvelENDPROC(ce_aes_xts_init)
38786464859SArd Biesheuvel
38886464859SArd BiesheuvelENTRY(ce_aes_xts_encrypt)
38986464859SArd Biesheuvel	push		{r4-r6, lr}
39086464859SArd Biesheuvel
39186464859SArd Biesheuvel	bl		ce_aes_xts_init		@ run shared prologue
39286464859SArd Biesheuvel	prepare_key	r2, r3
39386464859SArd Biesheuvel	vmov		q3, q0
39486464859SArd Biesheuvel
39586464859SArd Biesheuvel	teq		r6, #0			@ start of a block?
39686464859SArd Biesheuvel	bne		.Lxtsenc3x
39786464859SArd Biesheuvel
39886464859SArd Biesheuvel.Lxtsencloop3x:
39986464859SArd Biesheuvel	next_tweak	q3, q3, q7, q6
40086464859SArd Biesheuvel.Lxtsenc3x:
40186464859SArd Biesheuvel	subs		r4, r4, #3
40286464859SArd Biesheuvel	bmi		.Lxtsenc1x
4031465fb13SArd Biesheuvel	vld1.8		{q0-q1}, [r1]!		@ get 3 pt blocks
4041465fb13SArd Biesheuvel	vld1.8		{q2}, [r1]!
40586464859SArd Biesheuvel	next_tweak	q4, q3, q7, q6
40686464859SArd Biesheuvel	veor		q0, q0, q3
40786464859SArd Biesheuvel	next_tweak	q5, q4, q7, q6
40886464859SArd Biesheuvel	veor		q1, q1, q4
40986464859SArd Biesheuvel	veor		q2, q2, q5
41086464859SArd Biesheuvel	bl		aes_encrypt_3x
41186464859SArd Biesheuvel	veor		q0, q0, q3
41286464859SArd Biesheuvel	veor		q1, q1, q4
41386464859SArd Biesheuvel	veor		q2, q2, q5
4141465fb13SArd Biesheuvel	vst1.8		{q0-q1}, [r0]!		@ write 3 ct blocks
4151465fb13SArd Biesheuvel	vst1.8		{q2}, [r0]!
41686464859SArd Biesheuvel	vmov		q3, q5
41786464859SArd Biesheuvel	teq		r4, #0
41886464859SArd Biesheuvel	beq		.Lxtsencout
41986464859SArd Biesheuvel	b		.Lxtsencloop3x
42086464859SArd Biesheuvel.Lxtsenc1x:
42186464859SArd Biesheuvel	adds		r4, r4, #3
42286464859SArd Biesheuvel	beq		.Lxtsencout
42386464859SArd Biesheuvel.Lxtsencloop:
4241465fb13SArd Biesheuvel	vld1.8		{q0}, [r1]!
42586464859SArd Biesheuvel	veor		q0, q0, q3
42686464859SArd Biesheuvel	bl		aes_encrypt
42786464859SArd Biesheuvel	veor		q0, q0, q3
4281465fb13SArd Biesheuvel	vst1.8		{q0}, [r0]!
42986464859SArd Biesheuvel	subs		r4, r4, #1
43086464859SArd Biesheuvel	beq		.Lxtsencout
43186464859SArd Biesheuvel	next_tweak	q3, q3, q7, q6
43286464859SArd Biesheuvel	b		.Lxtsencloop
43386464859SArd Biesheuvel.Lxtsencout:
43486464859SArd Biesheuvel	vst1.8		{q3}, [r5]
43586464859SArd Biesheuvel	pop		{r4-r6, pc}
43686464859SArd BiesheuvelENDPROC(ce_aes_xts_encrypt)
43786464859SArd Biesheuvel
43886464859SArd Biesheuvel
43986464859SArd BiesheuvelENTRY(ce_aes_xts_decrypt)
44086464859SArd Biesheuvel	push		{r4-r6, lr}
44186464859SArd Biesheuvel
44286464859SArd Biesheuvel	bl		ce_aes_xts_init		@ run shared prologue
44386464859SArd Biesheuvel	prepare_key	r2, r3
44486464859SArd Biesheuvel	vmov		q3, q0
44586464859SArd Biesheuvel
44686464859SArd Biesheuvel	teq		r6, #0			@ start of a block?
44786464859SArd Biesheuvel	bne		.Lxtsdec3x
44886464859SArd Biesheuvel
44986464859SArd Biesheuvel.Lxtsdecloop3x:
45086464859SArd Biesheuvel	next_tweak	q3, q3, q7, q6
45186464859SArd Biesheuvel.Lxtsdec3x:
45286464859SArd Biesheuvel	subs		r4, r4, #3
45386464859SArd Biesheuvel	bmi		.Lxtsdec1x
4541465fb13SArd Biesheuvel	vld1.8		{q0-q1}, [r1]!		@ get 3 ct blocks
4551465fb13SArd Biesheuvel	vld1.8		{q2}, [r1]!
45686464859SArd Biesheuvel	next_tweak	q4, q3, q7, q6
45786464859SArd Biesheuvel	veor		q0, q0, q3
45886464859SArd Biesheuvel	next_tweak	q5, q4, q7, q6
45986464859SArd Biesheuvel	veor		q1, q1, q4
46086464859SArd Biesheuvel	veor		q2, q2, q5
46186464859SArd Biesheuvel	bl		aes_decrypt_3x
46286464859SArd Biesheuvel	veor		q0, q0, q3
46386464859SArd Biesheuvel	veor		q1, q1, q4
46486464859SArd Biesheuvel	veor		q2, q2, q5
4651465fb13SArd Biesheuvel	vst1.8		{q0-q1}, [r0]!		@ write 3 pt blocks
4661465fb13SArd Biesheuvel	vst1.8		{q2}, [r0]!
46786464859SArd Biesheuvel	vmov		q3, q5
46886464859SArd Biesheuvel	teq		r4, #0
46986464859SArd Biesheuvel	beq		.Lxtsdecout
47086464859SArd Biesheuvel	b		.Lxtsdecloop3x
47186464859SArd Biesheuvel.Lxtsdec1x:
47286464859SArd Biesheuvel	adds		r4, r4, #3
47386464859SArd Biesheuvel	beq		.Lxtsdecout
47486464859SArd Biesheuvel.Lxtsdecloop:
4751465fb13SArd Biesheuvel	vld1.8		{q0}, [r1]!
47686464859SArd Biesheuvel	veor		q0, q0, q3
47786464859SArd Biesheuvel	add		ip, r2, #32		@ 3rd round key
47886464859SArd Biesheuvel	bl		aes_decrypt
47986464859SArd Biesheuvel	veor		q0, q0, q3
4801465fb13SArd Biesheuvel	vst1.8		{q0}, [r0]!
48186464859SArd Biesheuvel	subs		r4, r4, #1
48286464859SArd Biesheuvel	beq		.Lxtsdecout
48386464859SArd Biesheuvel	next_tweak	q3, q3, q7, q6
48486464859SArd Biesheuvel	b		.Lxtsdecloop
48586464859SArd Biesheuvel.Lxtsdecout:
48686464859SArd Biesheuvel	vst1.8		{q3}, [r5]
48786464859SArd Biesheuvel	pop		{r4-r6, pc}
48886464859SArd BiesheuvelENDPROC(ce_aes_xts_decrypt)
48986464859SArd Biesheuvel
49086464859SArd Biesheuvel	/*
49186464859SArd Biesheuvel	 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
49286464859SArd Biesheuvel	 *                             AES sbox substitution on each byte in
49386464859SArd Biesheuvel	 *                             'input'
49486464859SArd Biesheuvel	 */
49586464859SArd BiesheuvelENTRY(ce_aes_sub)
49686464859SArd Biesheuvel	vdup.32		q1, r0
49786464859SArd Biesheuvel	veor		q0, q0, q0
49886464859SArd Biesheuvel	aese.8		q0, q1
49986464859SArd Biesheuvel	vmov		r0, s0
50086464859SArd Biesheuvel	bx		lr
50186464859SArd BiesheuvelENDPROC(ce_aes_sub)
50286464859SArd Biesheuvel
50386464859SArd Biesheuvel	/*
50486464859SArd Biesheuvel	 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
50586464859SArd Biesheuvel	 *                                        operation on round key *src
50686464859SArd Biesheuvel	 */
50786464859SArd BiesheuvelENTRY(ce_aes_invert)
50886464859SArd Biesheuvel	vld1.8		{q0}, [r1]
50986464859SArd Biesheuvel	aesimc.8	q0, q0
51086464859SArd Biesheuvel	vst1.8		{q0}, [r0]
51186464859SArd Biesheuvel	bx		lr
51286464859SArd BiesheuvelENDPROC(ce_aes_invert)
513