xref: /openbmc/linux/arch/arm/crypto/aes-ce-core.S (revision 6499e8cf)
186464859SArd Biesheuvel/*
286464859SArd Biesheuvel * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
386464859SArd Biesheuvel *
486464859SArd Biesheuvel * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
586464859SArd Biesheuvel *
686464859SArd Biesheuvel * This program is free software; you can redistribute it and/or modify
786464859SArd Biesheuvel * it under the terms of the GNU General Public License version 2 as
886464859SArd Biesheuvel * published by the Free Software Foundation.
986464859SArd Biesheuvel */
1086464859SArd Biesheuvel
1186464859SArd Biesheuvel#include <linux/linkage.h>
1286464859SArd Biesheuvel#include <asm/assembler.h>
1386464859SArd Biesheuvel
1486464859SArd Biesheuvel	.text
1586464859SArd Biesheuvel	.fpu		crypto-neon-fp-armv8
1686464859SArd Biesheuvel	.align		3
1786464859SArd Biesheuvel
1886464859SArd Biesheuvel	.macro		enc_round, state, key
1986464859SArd Biesheuvel	aese.8		\state, \key
2086464859SArd Biesheuvel	aesmc.8		\state, \state
2186464859SArd Biesheuvel	.endm
2286464859SArd Biesheuvel
2386464859SArd Biesheuvel	.macro		dec_round, state, key
2486464859SArd Biesheuvel	aesd.8		\state, \key
2586464859SArd Biesheuvel	aesimc.8	\state, \state
2686464859SArd Biesheuvel	.endm
2786464859SArd Biesheuvel
2886464859SArd Biesheuvel	.macro		enc_dround, key1, key2
2986464859SArd Biesheuvel	enc_round	q0, \key1
3086464859SArd Biesheuvel	enc_round	q0, \key2
3186464859SArd Biesheuvel	.endm
3286464859SArd Biesheuvel
3386464859SArd Biesheuvel	.macro		dec_dround, key1, key2
3486464859SArd Biesheuvel	dec_round	q0, \key1
3586464859SArd Biesheuvel	dec_round	q0, \key2
3686464859SArd Biesheuvel	.endm
3786464859SArd Biesheuvel
3886464859SArd Biesheuvel	.macro		enc_fround, key1, key2, key3
3986464859SArd Biesheuvel	enc_round	q0, \key1
4086464859SArd Biesheuvel	aese.8		q0, \key2
4186464859SArd Biesheuvel	veor		q0, q0, \key3
4286464859SArd Biesheuvel	.endm
4386464859SArd Biesheuvel
4486464859SArd Biesheuvel	.macro		dec_fround, key1, key2, key3
4586464859SArd Biesheuvel	dec_round	q0, \key1
4686464859SArd Biesheuvel	aesd.8		q0, \key2
4786464859SArd Biesheuvel	veor		q0, q0, \key3
4886464859SArd Biesheuvel	.endm
4986464859SArd Biesheuvel
5086464859SArd Biesheuvel	.macro		enc_dround_3x, key1, key2
5186464859SArd Biesheuvel	enc_round	q0, \key1
5286464859SArd Biesheuvel	enc_round	q1, \key1
5386464859SArd Biesheuvel	enc_round	q2, \key1
5486464859SArd Biesheuvel	enc_round	q0, \key2
5586464859SArd Biesheuvel	enc_round	q1, \key2
5686464859SArd Biesheuvel	enc_round	q2, \key2
5786464859SArd Biesheuvel	.endm
5886464859SArd Biesheuvel
5986464859SArd Biesheuvel	.macro		dec_dround_3x, key1, key2
6086464859SArd Biesheuvel	dec_round	q0, \key1
6186464859SArd Biesheuvel	dec_round	q1, \key1
6286464859SArd Biesheuvel	dec_round	q2, \key1
6386464859SArd Biesheuvel	dec_round	q0, \key2
6486464859SArd Biesheuvel	dec_round	q1, \key2
6586464859SArd Biesheuvel	dec_round	q2, \key2
6686464859SArd Biesheuvel	.endm
6786464859SArd Biesheuvel
6886464859SArd Biesheuvel	.macro		enc_fround_3x, key1, key2, key3
6986464859SArd Biesheuvel	enc_round	q0, \key1
7086464859SArd Biesheuvel	enc_round	q1, \key1
7186464859SArd Biesheuvel	enc_round	q2, \key1
7286464859SArd Biesheuvel	aese.8		q0, \key2
7386464859SArd Biesheuvel	aese.8		q1, \key2
7486464859SArd Biesheuvel	aese.8		q2, \key2
7586464859SArd Biesheuvel	veor		q0, q0, \key3
7686464859SArd Biesheuvel	veor		q1, q1, \key3
7786464859SArd Biesheuvel	veor		q2, q2, \key3
7886464859SArd Biesheuvel	.endm
7986464859SArd Biesheuvel
8086464859SArd Biesheuvel	.macro		dec_fround_3x, key1, key2, key3
8186464859SArd Biesheuvel	dec_round	q0, \key1
8286464859SArd Biesheuvel	dec_round	q1, \key1
8386464859SArd Biesheuvel	dec_round	q2, \key1
8486464859SArd Biesheuvel	aesd.8		q0, \key2
8586464859SArd Biesheuvel	aesd.8		q1, \key2
8686464859SArd Biesheuvel	aesd.8		q2, \key2
8786464859SArd Biesheuvel	veor		q0, q0, \key3
8886464859SArd Biesheuvel	veor		q1, q1, \key3
8986464859SArd Biesheuvel	veor		q2, q2, \key3
9086464859SArd Biesheuvel	.endm
9186464859SArd Biesheuvel
9286464859SArd Biesheuvel	.macro		do_block, dround, fround
9386464859SArd Biesheuvel	cmp		r3, #12			@ which key size?
9486464859SArd Biesheuvel	vld1.8		{q10-q11}, [ip]!
9586464859SArd Biesheuvel	\dround		q8, q9
9686464859SArd Biesheuvel	vld1.8		{q12-q13}, [ip]!
9786464859SArd Biesheuvel	\dround		q10, q11
9886464859SArd Biesheuvel	vld1.8		{q10-q11}, [ip]!
9986464859SArd Biesheuvel	\dround		q12, q13
10086464859SArd Biesheuvel	vld1.8		{q12-q13}, [ip]!
10186464859SArd Biesheuvel	\dround		q10, q11
10286464859SArd Biesheuvel	blo		0f			@ AES-128: 10 rounds
10386464859SArd Biesheuvel	vld1.8		{q10-q11}, [ip]!
10486464859SArd Biesheuvel	\dround		q12, q13
1056499e8cfSArd Biesheuvel	beq		1f			@ AES-192: 12 rounds
10686464859SArd Biesheuvel	vld1.8		{q12-q13}, [ip]
10786464859SArd Biesheuvel	\dround		q10, q11
10886464859SArd Biesheuvel0:	\fround		q12, q13, q14
10986464859SArd Biesheuvel	bx		lr
11086464859SArd Biesheuvel
1116499e8cfSArd Biesheuvel1:	\fround		q10, q11, q14
11286464859SArd Biesheuvel	bx		lr
11386464859SArd Biesheuvel	.endm
11486464859SArd Biesheuvel
11586464859SArd Biesheuvel	/*
11686464859SArd Biesheuvel	 * Internal, non-AAPCS compliant functions that implement the core AES
11786464859SArd Biesheuvel	 * transforms. These should preserve all registers except q0 - q2 and ip
11886464859SArd Biesheuvel	 * Arguments:
11986464859SArd Biesheuvel	 *   q0        : first in/output block
12086464859SArd Biesheuvel	 *   q1        : second in/output block (_3x version only)
12186464859SArd Biesheuvel	 *   q2        : third in/output block (_3x version only)
12286464859SArd Biesheuvel	 *   q8        : first round key
12386464859SArd Biesheuvel	 *   q9        : secound round key
12486464859SArd Biesheuvel	 *   q14       : final round key
1256499e8cfSArd Biesheuvel	 *   r2        : address of round key array
12686464859SArd Biesheuvel	 *   r3        : number of rounds
12786464859SArd Biesheuvel	 */
12886464859SArd Biesheuvel	.align		6
12986464859SArd Biesheuvelaes_encrypt:
13086464859SArd Biesheuvel	add		ip, r2, #32		@ 3rd round key
13186464859SArd Biesheuvel.Laes_encrypt_tweak:
13286464859SArd Biesheuvel	do_block	enc_dround, enc_fround
13386464859SArd BiesheuvelENDPROC(aes_encrypt)
13486464859SArd Biesheuvel
13586464859SArd Biesheuvel	.align		6
13686464859SArd Biesheuvelaes_decrypt:
13786464859SArd Biesheuvel	add		ip, r2, #32		@ 3rd round key
13886464859SArd Biesheuvel	do_block	dec_dround, dec_fround
13986464859SArd BiesheuvelENDPROC(aes_decrypt)
14086464859SArd Biesheuvel
14186464859SArd Biesheuvel	.align		6
14286464859SArd Biesheuvelaes_encrypt_3x:
14386464859SArd Biesheuvel	add		ip, r2, #32		@ 3rd round key
14486464859SArd Biesheuvel	do_block	enc_dround_3x, enc_fround_3x
14586464859SArd BiesheuvelENDPROC(aes_encrypt_3x)
14686464859SArd Biesheuvel
14786464859SArd Biesheuvel	.align		6
14886464859SArd Biesheuvelaes_decrypt_3x:
14986464859SArd Biesheuvel	add		ip, r2, #32		@ 3rd round key
15086464859SArd Biesheuvel	do_block	dec_dround_3x, dec_fround_3x
15186464859SArd BiesheuvelENDPROC(aes_decrypt_3x)
15286464859SArd Biesheuvel
15386464859SArd Biesheuvel	.macro		prepare_key, rk, rounds
15486464859SArd Biesheuvel	add		ip, \rk, \rounds, lsl #4
15586464859SArd Biesheuvel	vld1.8		{q8-q9}, [\rk]		@ load first 2 round keys
15686464859SArd Biesheuvel	vld1.8		{q14}, [ip]		@ load last round key
15786464859SArd Biesheuvel	.endm
15886464859SArd Biesheuvel
15986464859SArd Biesheuvel	/*
16086464859SArd Biesheuvel	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
16186464859SArd Biesheuvel	 *		   int blocks)
16286464859SArd Biesheuvel	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
16386464859SArd Biesheuvel	 *		   int blocks)
16486464859SArd Biesheuvel	 */
16586464859SArd BiesheuvelENTRY(ce_aes_ecb_encrypt)
16686464859SArd Biesheuvel	push		{r4, lr}
16786464859SArd Biesheuvel	ldr		r4, [sp, #8]
16886464859SArd Biesheuvel	prepare_key	r2, r3
16986464859SArd Biesheuvel.Lecbencloop3x:
17086464859SArd Biesheuvel	subs		r4, r4, #3
17186464859SArd Biesheuvel	bmi		.Lecbenc1x
17286464859SArd Biesheuvel	vld1.8		{q0-q1}, [r1, :64]!
17386464859SArd Biesheuvel	vld1.8		{q2}, [r1, :64]!
17486464859SArd Biesheuvel	bl		aes_encrypt_3x
17586464859SArd Biesheuvel	vst1.8		{q0-q1}, [r0, :64]!
17686464859SArd Biesheuvel	vst1.8		{q2}, [r0, :64]!
17786464859SArd Biesheuvel	b		.Lecbencloop3x
17886464859SArd Biesheuvel.Lecbenc1x:
17986464859SArd Biesheuvel	adds		r4, r4, #3
18086464859SArd Biesheuvel	beq		.Lecbencout
18186464859SArd Biesheuvel.Lecbencloop:
18286464859SArd Biesheuvel	vld1.8		{q0}, [r1, :64]!
18386464859SArd Biesheuvel	bl		aes_encrypt
18486464859SArd Biesheuvel	vst1.8		{q0}, [r0, :64]!
18586464859SArd Biesheuvel	subs		r4, r4, #1
18686464859SArd Biesheuvel	bne		.Lecbencloop
18786464859SArd Biesheuvel.Lecbencout:
18886464859SArd Biesheuvel	pop		{r4, pc}
18986464859SArd BiesheuvelENDPROC(ce_aes_ecb_encrypt)
19086464859SArd Biesheuvel
19186464859SArd BiesheuvelENTRY(ce_aes_ecb_decrypt)
19286464859SArd Biesheuvel	push		{r4, lr}
19386464859SArd Biesheuvel	ldr		r4, [sp, #8]
19486464859SArd Biesheuvel	prepare_key	r2, r3
19586464859SArd Biesheuvel.Lecbdecloop3x:
19686464859SArd Biesheuvel	subs		r4, r4, #3
19786464859SArd Biesheuvel	bmi		.Lecbdec1x
19886464859SArd Biesheuvel	vld1.8		{q0-q1}, [r1, :64]!
19986464859SArd Biesheuvel	vld1.8		{q2}, [r1, :64]!
20086464859SArd Biesheuvel	bl		aes_decrypt_3x
20186464859SArd Biesheuvel	vst1.8		{q0-q1}, [r0, :64]!
20286464859SArd Biesheuvel	vst1.8		{q2}, [r0, :64]!
20386464859SArd Biesheuvel	b		.Lecbdecloop3x
20486464859SArd Biesheuvel.Lecbdec1x:
20586464859SArd Biesheuvel	adds		r4, r4, #3
20686464859SArd Biesheuvel	beq		.Lecbdecout
20786464859SArd Biesheuvel.Lecbdecloop:
20886464859SArd Biesheuvel	vld1.8		{q0}, [r1, :64]!
20986464859SArd Biesheuvel	bl		aes_decrypt
21086464859SArd Biesheuvel	vst1.8		{q0}, [r0, :64]!
21186464859SArd Biesheuvel	subs		r4, r4, #1
21286464859SArd Biesheuvel	bne		.Lecbdecloop
21386464859SArd Biesheuvel.Lecbdecout:
21486464859SArd Biesheuvel	pop		{r4, pc}
21586464859SArd BiesheuvelENDPROC(ce_aes_ecb_decrypt)
21686464859SArd Biesheuvel
21786464859SArd Biesheuvel	/*
21886464859SArd Biesheuvel	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
21986464859SArd Biesheuvel	 *		   int blocks, u8 iv[])
22086464859SArd Biesheuvel	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
22186464859SArd Biesheuvel	 *		   int blocks, u8 iv[])
22286464859SArd Biesheuvel	 */
22386464859SArd BiesheuvelENTRY(ce_aes_cbc_encrypt)
22486464859SArd Biesheuvel	push		{r4-r6, lr}
22586464859SArd Biesheuvel	ldrd		r4, r5, [sp, #16]
22686464859SArd Biesheuvel	vld1.8		{q0}, [r5]
22786464859SArd Biesheuvel	prepare_key	r2, r3
22886464859SArd Biesheuvel.Lcbcencloop:
22986464859SArd Biesheuvel	vld1.8		{q1}, [r1, :64]!	@ get next pt block
23086464859SArd Biesheuvel	veor		q0, q0, q1		@ ..and xor with iv
23186464859SArd Biesheuvel	bl		aes_encrypt
23286464859SArd Biesheuvel	vst1.8		{q0}, [r0, :64]!
23386464859SArd Biesheuvel	subs		r4, r4, #1
23486464859SArd Biesheuvel	bne		.Lcbcencloop
23586464859SArd Biesheuvel	vst1.8		{q0}, [r5]
23686464859SArd Biesheuvel	pop		{r4-r6, pc}
23786464859SArd BiesheuvelENDPROC(ce_aes_cbc_encrypt)
23886464859SArd Biesheuvel
23986464859SArd BiesheuvelENTRY(ce_aes_cbc_decrypt)
24086464859SArd Biesheuvel	push		{r4-r6, lr}
24186464859SArd Biesheuvel	ldrd		r4, r5, [sp, #16]
24286464859SArd Biesheuvel	vld1.8		{q6}, [r5]		@ keep iv in q6
24386464859SArd Biesheuvel	prepare_key	r2, r3
24486464859SArd Biesheuvel.Lcbcdecloop3x:
24586464859SArd Biesheuvel	subs		r4, r4, #3
24686464859SArd Biesheuvel	bmi		.Lcbcdec1x
24786464859SArd Biesheuvel	vld1.8		{q0-q1}, [r1, :64]!
24886464859SArd Biesheuvel	vld1.8		{q2}, [r1, :64]!
24986464859SArd Biesheuvel	vmov		q3, q0
25086464859SArd Biesheuvel	vmov		q4, q1
25186464859SArd Biesheuvel	vmov		q5, q2
25286464859SArd Biesheuvel	bl		aes_decrypt_3x
25386464859SArd Biesheuvel	veor		q0, q0, q6
25486464859SArd Biesheuvel	veor		q1, q1, q3
25586464859SArd Biesheuvel	veor		q2, q2, q4
25686464859SArd Biesheuvel	vmov		q6, q5
25786464859SArd Biesheuvel	vst1.8		{q0-q1}, [r0, :64]!
25886464859SArd Biesheuvel	vst1.8		{q2}, [r0, :64]!
25986464859SArd Biesheuvel	b		.Lcbcdecloop3x
26086464859SArd Biesheuvel.Lcbcdec1x:
26186464859SArd Biesheuvel	adds		r4, r4, #3
26286464859SArd Biesheuvel	beq		.Lcbcdecout
26386464859SArd Biesheuvel	vmov		q15, q14		@ preserve last round key
26486464859SArd Biesheuvel.Lcbcdecloop:
26586464859SArd Biesheuvel	vld1.8		{q0}, [r1, :64]!	@ get next ct block
26686464859SArd Biesheuvel	veor		q14, q15, q6		@ combine prev ct with last key
26786464859SArd Biesheuvel	vmov		q6, q0
26886464859SArd Biesheuvel	bl		aes_decrypt
26986464859SArd Biesheuvel	vst1.8		{q0}, [r0, :64]!
27086464859SArd Biesheuvel	subs		r4, r4, #1
27186464859SArd Biesheuvel	bne		.Lcbcdecloop
27286464859SArd Biesheuvel.Lcbcdecout:
27386464859SArd Biesheuvel	vst1.8		{q6}, [r5]		@ keep iv in q6
27486464859SArd Biesheuvel	pop		{r4-r6, pc}
27586464859SArd BiesheuvelENDPROC(ce_aes_cbc_decrypt)
27686464859SArd Biesheuvel
27786464859SArd Biesheuvel	/*
27886464859SArd Biesheuvel	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
27986464859SArd Biesheuvel	 *		   int blocks, u8 ctr[])
28086464859SArd Biesheuvel	 */
28186464859SArd BiesheuvelENTRY(ce_aes_ctr_encrypt)
28286464859SArd Biesheuvel	push		{r4-r6, lr}
28386464859SArd Biesheuvel	ldrd		r4, r5, [sp, #16]
28486464859SArd Biesheuvel	vld1.8		{q6}, [r5]		@ load ctr
28586464859SArd Biesheuvel	prepare_key	r2, r3
28686464859SArd Biesheuvel	vmov		r6, s27			@ keep swabbed ctr in r6
28786464859SArd Biesheuvel	rev		r6, r6
28886464859SArd Biesheuvel	cmn		r6, r4			@ 32 bit overflow?
28986464859SArd Biesheuvel	bcs		.Lctrloop
29086464859SArd Biesheuvel.Lctrloop3x:
29186464859SArd Biesheuvel	subs		r4, r4, #3
29286464859SArd Biesheuvel	bmi		.Lctr1x
29386464859SArd Biesheuvel	add		r6, r6, #1
29486464859SArd Biesheuvel	vmov		q0, q6
29586464859SArd Biesheuvel	vmov		q1, q6
29686464859SArd Biesheuvel	rev		ip, r6
29786464859SArd Biesheuvel	add		r6, r6, #1
29886464859SArd Biesheuvel	vmov		q2, q6
29986464859SArd Biesheuvel	vmov		s7, ip
30086464859SArd Biesheuvel	rev		ip, r6
30186464859SArd Biesheuvel	add		r6, r6, #1
30286464859SArd Biesheuvel	vmov		s11, ip
30386464859SArd Biesheuvel	vld1.8		{q3-q4}, [r1, :64]!
30486464859SArd Biesheuvel	vld1.8		{q5}, [r1, :64]!
30586464859SArd Biesheuvel	bl		aes_encrypt_3x
30686464859SArd Biesheuvel	veor		q0, q0, q3
30786464859SArd Biesheuvel	veor		q1, q1, q4
30886464859SArd Biesheuvel	veor		q2, q2, q5
30986464859SArd Biesheuvel	rev		ip, r6
31086464859SArd Biesheuvel	vst1.8		{q0-q1}, [r0, :64]!
31186464859SArd Biesheuvel	vst1.8		{q2}, [r0, :64]!
31286464859SArd Biesheuvel	vmov		s27, ip
31386464859SArd Biesheuvel	b		.Lctrloop3x
31486464859SArd Biesheuvel.Lctr1x:
31586464859SArd Biesheuvel	adds		r4, r4, #3
31686464859SArd Biesheuvel	beq		.Lctrout
31786464859SArd Biesheuvel.Lctrloop:
31886464859SArd Biesheuvel	vmov		q0, q6
31986464859SArd Biesheuvel	bl		aes_encrypt
32086464859SArd Biesheuvel	subs		r4, r4, #1
32186464859SArd Biesheuvel	bmi		.Lctrhalfblock		@ blocks < 0 means 1/2 block
32286464859SArd Biesheuvel	vld1.8		{q3}, [r1, :64]!
32386464859SArd Biesheuvel	veor		q3, q0, q3
32486464859SArd Biesheuvel	vst1.8		{q3}, [r0, :64]!
32586464859SArd Biesheuvel
32686464859SArd Biesheuvel	adds		r6, r6, #1		@ increment BE ctr
32786464859SArd Biesheuvel	rev		ip, r6
32886464859SArd Biesheuvel	vmov		s27, ip
32986464859SArd Biesheuvel	bcs		.Lctrcarry
33086464859SArd Biesheuvel	teq		r4, #0
33186464859SArd Biesheuvel	bne		.Lctrloop
33286464859SArd Biesheuvel.Lctrout:
33386464859SArd Biesheuvel	vst1.8		{q6}, [r5]
33486464859SArd Biesheuvel	pop		{r4-r6, pc}
33586464859SArd Biesheuvel
33686464859SArd Biesheuvel.Lctrhalfblock:
33786464859SArd Biesheuvel	vld1.8		{d1}, [r1, :64]
33886464859SArd Biesheuvel	veor		d0, d0, d1
33986464859SArd Biesheuvel	vst1.8		{d0}, [r0, :64]
34086464859SArd Biesheuvel	pop		{r4-r6, pc}
34186464859SArd Biesheuvel
34286464859SArd Biesheuvel.Lctrcarry:
34386464859SArd Biesheuvel	.irp		sreg, s26, s25, s24
34486464859SArd Biesheuvel	vmov		ip, \sreg		@ load next word of ctr
34586464859SArd Biesheuvel	rev		ip, ip			@ ... to handle the carry
34686464859SArd Biesheuvel	adds		ip, ip, #1
34786464859SArd Biesheuvel	rev		ip, ip
34886464859SArd Biesheuvel	vmov		\sreg, ip
34986464859SArd Biesheuvel	bcc		0f
35086464859SArd Biesheuvel	.endr
35186464859SArd Biesheuvel0:	teq		r4, #0
35286464859SArd Biesheuvel	beq		.Lctrout
35386464859SArd Biesheuvel	b		.Lctrloop
35486464859SArd BiesheuvelENDPROC(ce_aes_ctr_encrypt)
35586464859SArd Biesheuvel
35686464859SArd Biesheuvel	/*
35786464859SArd Biesheuvel	 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
35886464859SArd Biesheuvel	 *		   int blocks, u8 iv[], u8 const rk2[], int first)
35986464859SArd Biesheuvel	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
36086464859SArd Biesheuvel	 *		   int blocks, u8 iv[], u8 const rk2[], int first)
36186464859SArd Biesheuvel	 */
36286464859SArd Biesheuvel
36386464859SArd Biesheuvel	.macro		next_tweak, out, in, const, tmp
36486464859SArd Biesheuvel	vshr.s64	\tmp, \in, #63
36586464859SArd Biesheuvel	vand		\tmp, \tmp, \const
36686464859SArd Biesheuvel	vadd.u64	\out, \in, \in
36786464859SArd Biesheuvel	vext.8		\tmp, \tmp, \tmp, #8
36886464859SArd Biesheuvel	veor		\out, \out, \tmp
36986464859SArd Biesheuvel	.endm
37086464859SArd Biesheuvel
37186464859SArd Biesheuvel	.align		3
37286464859SArd Biesheuvel.Lxts_mul_x:
37386464859SArd Biesheuvel	.quad		1, 0x87
37486464859SArd Biesheuvel
37586464859SArd Biesheuvelce_aes_xts_init:
37686464859SArd Biesheuvel	vldr		d14, .Lxts_mul_x
37786464859SArd Biesheuvel	vldr		d15, .Lxts_mul_x + 8
37886464859SArd Biesheuvel
37986464859SArd Biesheuvel	ldrd		r4, r5, [sp, #16]	@ load args
38086464859SArd Biesheuvel	ldr		r6, [sp, #28]
38186464859SArd Biesheuvel	vld1.8		{q0}, [r5]		@ load iv
38286464859SArd Biesheuvel	teq		r6, #1			@ start of a block?
38386464859SArd Biesheuvel	bxne		lr
38486464859SArd Biesheuvel
38586464859SArd Biesheuvel	@ Encrypt the IV in q0 with the second AES key. This should only
38686464859SArd Biesheuvel	@ be done at the start of a block.
38786464859SArd Biesheuvel	ldr		r6, [sp, #24]		@ load AES key 2
38886464859SArd Biesheuvel	prepare_key	r6, r3
38986464859SArd Biesheuvel	add		ip, r6, #32		@ 3rd round key of key 2
39086464859SArd Biesheuvel	b		.Laes_encrypt_tweak	@ tail call
39186464859SArd BiesheuvelENDPROC(ce_aes_xts_init)
39286464859SArd Biesheuvel
39386464859SArd BiesheuvelENTRY(ce_aes_xts_encrypt)
39486464859SArd Biesheuvel	push		{r4-r6, lr}
39586464859SArd Biesheuvel
39686464859SArd Biesheuvel	bl		ce_aes_xts_init		@ run shared prologue
39786464859SArd Biesheuvel	prepare_key	r2, r3
39886464859SArd Biesheuvel	vmov		q3, q0
39986464859SArd Biesheuvel
40086464859SArd Biesheuvel	teq		r6, #0			@ start of a block?
40186464859SArd Biesheuvel	bne		.Lxtsenc3x
40286464859SArd Biesheuvel
40386464859SArd Biesheuvel.Lxtsencloop3x:
40486464859SArd Biesheuvel	next_tweak	q3, q3, q7, q6
40586464859SArd Biesheuvel.Lxtsenc3x:
40686464859SArd Biesheuvel	subs		r4, r4, #3
40786464859SArd Biesheuvel	bmi		.Lxtsenc1x
40886464859SArd Biesheuvel	vld1.8		{q0-q1}, [r1, :64]!	@ get 3 pt blocks
40986464859SArd Biesheuvel	vld1.8		{q2}, [r1, :64]!
41086464859SArd Biesheuvel	next_tweak	q4, q3, q7, q6
41186464859SArd Biesheuvel	veor		q0, q0, q3
41286464859SArd Biesheuvel	next_tweak	q5, q4, q7, q6
41386464859SArd Biesheuvel	veor		q1, q1, q4
41486464859SArd Biesheuvel	veor		q2, q2, q5
41586464859SArd Biesheuvel	bl		aes_encrypt_3x
41686464859SArd Biesheuvel	veor		q0, q0, q3
41786464859SArd Biesheuvel	veor		q1, q1, q4
41886464859SArd Biesheuvel	veor		q2, q2, q5
41986464859SArd Biesheuvel	vst1.8		{q0-q1}, [r0, :64]!	@ write 3 ct blocks
42086464859SArd Biesheuvel	vst1.8		{q2}, [r0, :64]!
42186464859SArd Biesheuvel	vmov		q3, q5
42286464859SArd Biesheuvel	teq		r4, #0
42386464859SArd Biesheuvel	beq		.Lxtsencout
42486464859SArd Biesheuvel	b		.Lxtsencloop3x
42586464859SArd Biesheuvel.Lxtsenc1x:
42686464859SArd Biesheuvel	adds		r4, r4, #3
42786464859SArd Biesheuvel	beq		.Lxtsencout
42886464859SArd Biesheuvel.Lxtsencloop:
42986464859SArd Biesheuvel	vld1.8		{q0}, [r1, :64]!
43086464859SArd Biesheuvel	veor		q0, q0, q3
43186464859SArd Biesheuvel	bl		aes_encrypt
43286464859SArd Biesheuvel	veor		q0, q0, q3
43386464859SArd Biesheuvel	vst1.8		{q0}, [r0, :64]!
43486464859SArd Biesheuvel	subs		r4, r4, #1
43586464859SArd Biesheuvel	beq		.Lxtsencout
43686464859SArd Biesheuvel	next_tweak	q3, q3, q7, q6
43786464859SArd Biesheuvel	b		.Lxtsencloop
43886464859SArd Biesheuvel.Lxtsencout:
43986464859SArd Biesheuvel	vst1.8		{q3}, [r5]
44086464859SArd Biesheuvel	pop		{r4-r6, pc}
44186464859SArd BiesheuvelENDPROC(ce_aes_xts_encrypt)
44286464859SArd Biesheuvel
44386464859SArd Biesheuvel
44486464859SArd BiesheuvelENTRY(ce_aes_xts_decrypt)
44586464859SArd Biesheuvel	push		{r4-r6, lr}
44686464859SArd Biesheuvel
44786464859SArd Biesheuvel	bl		ce_aes_xts_init		@ run shared prologue
44886464859SArd Biesheuvel	prepare_key	r2, r3
44986464859SArd Biesheuvel	vmov		q3, q0
45086464859SArd Biesheuvel
45186464859SArd Biesheuvel	teq		r6, #0			@ start of a block?
45286464859SArd Biesheuvel	bne		.Lxtsdec3x
45386464859SArd Biesheuvel
45486464859SArd Biesheuvel.Lxtsdecloop3x:
45586464859SArd Biesheuvel	next_tweak	q3, q3, q7, q6
45686464859SArd Biesheuvel.Lxtsdec3x:
45786464859SArd Biesheuvel	subs		r4, r4, #3
45886464859SArd Biesheuvel	bmi		.Lxtsdec1x
45986464859SArd Biesheuvel	vld1.8		{q0-q1}, [r1, :64]!	@ get 3 ct blocks
46086464859SArd Biesheuvel	vld1.8		{q2}, [r1, :64]!
46186464859SArd Biesheuvel	next_tweak	q4, q3, q7, q6
46286464859SArd Biesheuvel	veor		q0, q0, q3
46386464859SArd Biesheuvel	next_tweak	q5, q4, q7, q6
46486464859SArd Biesheuvel	veor		q1, q1, q4
46586464859SArd Biesheuvel	veor		q2, q2, q5
46686464859SArd Biesheuvel	bl		aes_decrypt_3x
46786464859SArd Biesheuvel	veor		q0, q0, q3
46886464859SArd Biesheuvel	veor		q1, q1, q4
46986464859SArd Biesheuvel	veor		q2, q2, q5
47086464859SArd Biesheuvel	vst1.8		{q0-q1}, [r0, :64]!	@ write 3 pt blocks
47186464859SArd Biesheuvel	vst1.8		{q2}, [r0, :64]!
47286464859SArd Biesheuvel	vmov		q3, q5
47386464859SArd Biesheuvel	teq		r4, #0
47486464859SArd Biesheuvel	beq		.Lxtsdecout
47586464859SArd Biesheuvel	b		.Lxtsdecloop3x
47686464859SArd Biesheuvel.Lxtsdec1x:
47786464859SArd Biesheuvel	adds		r4, r4, #3
47886464859SArd Biesheuvel	beq		.Lxtsdecout
47986464859SArd Biesheuvel.Lxtsdecloop:
48086464859SArd Biesheuvel	vld1.8		{q0}, [r1, :64]!
48186464859SArd Biesheuvel	veor		q0, q0, q3
48286464859SArd Biesheuvel	add		ip, r2, #32		@ 3rd round key
48386464859SArd Biesheuvel	bl		aes_decrypt
48486464859SArd Biesheuvel	veor		q0, q0, q3
48586464859SArd Biesheuvel	vst1.8		{q0}, [r0, :64]!
48686464859SArd Biesheuvel	subs		r4, r4, #1
48786464859SArd Biesheuvel	beq		.Lxtsdecout
48886464859SArd Biesheuvel	next_tweak	q3, q3, q7, q6
48986464859SArd Biesheuvel	b		.Lxtsdecloop
49086464859SArd Biesheuvel.Lxtsdecout:
49186464859SArd Biesheuvel	vst1.8		{q3}, [r5]
49286464859SArd Biesheuvel	pop		{r4-r6, pc}
49386464859SArd BiesheuvelENDPROC(ce_aes_xts_decrypt)
49486464859SArd Biesheuvel
49586464859SArd Biesheuvel	/*
49686464859SArd Biesheuvel	 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
49786464859SArd Biesheuvel	 *                             AES sbox substitution on each byte in
49886464859SArd Biesheuvel	 *                             'input'
49986464859SArd Biesheuvel	 */
50086464859SArd BiesheuvelENTRY(ce_aes_sub)
50186464859SArd Biesheuvel	vdup.32		q1, r0
50286464859SArd Biesheuvel	veor		q0, q0, q0
50386464859SArd Biesheuvel	aese.8		q0, q1
50486464859SArd Biesheuvel	vmov		r0, s0
50586464859SArd Biesheuvel	bx		lr
50686464859SArd BiesheuvelENDPROC(ce_aes_sub)
50786464859SArd Biesheuvel
50886464859SArd Biesheuvel	/*
50986464859SArd Biesheuvel	 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
51086464859SArd Biesheuvel	 *                                        operation on round key *src
51186464859SArd Biesheuvel	 */
51286464859SArd BiesheuvelENTRY(ce_aes_invert)
51386464859SArd Biesheuvel	vld1.8		{q0}, [r1]
51486464859SArd Biesheuvel	aesimc.8	q0, q0
51586464859SArd Biesheuvel	vst1.8		{q0}, [r0]
51686464859SArd Biesheuvel	bx		lr
51786464859SArd BiesheuvelENDPROC(ce_aes_invert)
518