xref: /openbmc/linux/arch/arm/crypto/aes-cipher-core.S (revision 8be98d2f2a0a262f8bf8a0bc1fdf522b3c7aab17)
1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
281edb426SArd Biesheuvel/*
381edb426SArd Biesheuvel * Scalar AES core transform
481edb426SArd Biesheuvel *
581edb426SArd Biesheuvel * Copyright (C) 2017 Linaro Ltd.
681edb426SArd Biesheuvel * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
781edb426SArd Biesheuvel */
881edb426SArd Biesheuvel
981edb426SArd Biesheuvel#include <linux/linkage.h>
10913a3aa0SEric Biggers#include <asm/assembler.h>
110d149ce6SArd Biesheuvel#include <asm/cache.h>
1281edb426SArd Biesheuvel
1381edb426SArd Biesheuvel	.text
1481edb426SArd Biesheuvel	.align		5
1581edb426SArd Biesheuvel
1681edb426SArd Biesheuvel	rk		.req	r0
1781edb426SArd Biesheuvel	rounds		.req	r1
1881edb426SArd Biesheuvel	in		.req	r2
1981edb426SArd Biesheuvel	out		.req	r3
20658fa754SArd Biesheuvel	ttab		.req	ip
2181edb426SArd Biesheuvel
2281edb426SArd Biesheuvel	t0		.req	lr
2381edb426SArd Biesheuvel	t1		.req	r2
2481edb426SArd Biesheuvel	t2		.req	r3
2581edb426SArd Biesheuvel
2681edb426SArd Biesheuvel	.macro		__select, out, in, idx
2781edb426SArd Biesheuvel	.if		__LINUX_ARM_ARCH__ < 7
2881edb426SArd Biesheuvel	and		\out, \in, #0xff << (8 * \idx)
2981edb426SArd Biesheuvel	.else
3081edb426SArd Biesheuvel	ubfx		\out, \in, #(8 * \idx), #8
3181edb426SArd Biesheuvel	.endif
3281edb426SArd Biesheuvel	.endm
3381edb426SArd Biesheuvel
340d149ce6SArd Biesheuvel	.macro		__load, out, in, idx, sz, op
3581edb426SArd Biesheuvel	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
360d149ce6SArd Biesheuvel	ldr\op		\out, [ttab, \in, lsr #(8 * \idx) - \sz]
3781edb426SArd Biesheuvel	.else
380d149ce6SArd Biesheuvel	ldr\op		\out, [ttab, \in, lsl #\sz]
3981edb426SArd Biesheuvel	.endif
4081edb426SArd Biesheuvel	.endm
4181edb426SArd Biesheuvel
42913a3aa0SEric Biggers	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
4381edb426SArd Biesheuvel	__select	\out0, \in0, 0
4481edb426SArd Biesheuvel	__select	t0, \in1, 1
450d149ce6SArd Biesheuvel	__load		\out0, \out0, 0, \sz, \op
460d149ce6SArd Biesheuvel	__load		t0, t0, 1, \sz, \op
4781edb426SArd Biesheuvel
4881edb426SArd Biesheuvel	.if		\enc
4981edb426SArd Biesheuvel	__select	\out1, \in1, 0
5081edb426SArd Biesheuvel	__select	t1, \in2, 1
5181edb426SArd Biesheuvel	.else
5281edb426SArd Biesheuvel	__select	\out1, \in3, 0
5381edb426SArd Biesheuvel	__select	t1, \in0, 1
5481edb426SArd Biesheuvel	.endif
550d149ce6SArd Biesheuvel	__load		\out1, \out1, 0, \sz, \op
5681edb426SArd Biesheuvel	__select	t2, \in2, 2
570d149ce6SArd Biesheuvel	__load		t1, t1, 1, \sz, \op
580d149ce6SArd Biesheuvel	__load		t2, t2, 2, \sz, \op
5981edb426SArd Biesheuvel
6081edb426SArd Biesheuvel	eor		\out0, \out0, t0, ror #24
6181edb426SArd Biesheuvel
6281edb426SArd Biesheuvel	__select	t0, \in3, 3
6381edb426SArd Biesheuvel	.if		\enc
6481edb426SArd Biesheuvel	__select	\t3, \in3, 2
6581edb426SArd Biesheuvel	__select	\t4, \in0, 3
6681edb426SArd Biesheuvel	.else
6781edb426SArd Biesheuvel	__select	\t3, \in1, 2
6881edb426SArd Biesheuvel	__select	\t4, \in2, 3
6981edb426SArd Biesheuvel	.endif
700d149ce6SArd Biesheuvel	__load		\t3, \t3, 2, \sz, \op
710d149ce6SArd Biesheuvel	__load		t0, t0, 3, \sz, \op
720d149ce6SArd Biesheuvel	__load		\t4, \t4, 3, \sz, \op
7381edb426SArd Biesheuvel
74913a3aa0SEric Biggers	.ifnb		\oldcpsr
75913a3aa0SEric Biggers	/*
76913a3aa0SEric Biggers	 * This is the final round and we're done with all data-dependent table
77913a3aa0SEric Biggers	 * lookups, so we can safely re-enable interrupts.
78913a3aa0SEric Biggers	 */
79913a3aa0SEric Biggers	restore_irqs	\oldcpsr
80913a3aa0SEric Biggers	.endif
81913a3aa0SEric Biggers
8281edb426SArd Biesheuvel	eor		\out1, \out1, t1, ror #24
8381edb426SArd Biesheuvel	eor		\out0, \out0, t2, ror #16
8481edb426SArd Biesheuvel	ldm		rk!, {t1, t2}
8581edb426SArd Biesheuvel	eor		\out1, \out1, \t3, ror #16
8681edb426SArd Biesheuvel	eor		\out0, \out0, t0, ror #8
8781edb426SArd Biesheuvel	eor		\out1, \out1, \t4, ror #8
8881edb426SArd Biesheuvel	eor		\out0, \out0, t1
8981edb426SArd Biesheuvel	eor		\out1, \out1, t2
9081edb426SArd Biesheuvel	.endm
9181edb426SArd Biesheuvel
92913a3aa0SEric Biggers	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
930d149ce6SArd Biesheuvel	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
94913a3aa0SEric Biggers	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
9581edb426SArd Biesheuvel	.endm
9681edb426SArd Biesheuvel
97913a3aa0SEric Biggers	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
980d149ce6SArd Biesheuvel	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
99913a3aa0SEric Biggers	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
10081edb426SArd Biesheuvel	.endm
10181edb426SArd Biesheuvel
1020d149ce6SArd Biesheuvel	.macro		do_crypt, round, ttab, ltab, bsz
10381edb426SArd Biesheuvel	push		{r3-r11, lr}
10481edb426SArd Biesheuvel
105913a3aa0SEric Biggers	// Load keys first, to reduce latency in case they're not cached yet.
106913a3aa0SEric Biggers	ldm		rk!, {r8-r11}
107913a3aa0SEric Biggers
10881edb426SArd Biesheuvel	ldr		r4, [in]
10981edb426SArd Biesheuvel	ldr		r5, [in, #4]
11081edb426SArd Biesheuvel	ldr		r6, [in, #8]
11181edb426SArd Biesheuvel	ldr		r7, [in, #12]
11281edb426SArd Biesheuvel
11381edb426SArd Biesheuvel#ifdef CONFIG_CPU_BIG_ENDIAN
114*d5adb9d1SArd Biesheuvel	rev_l		r4, t0
115*d5adb9d1SArd Biesheuvel	rev_l		r5, t0
116*d5adb9d1SArd Biesheuvel	rev_l		r6, t0
117*d5adb9d1SArd Biesheuvel	rev_l		r7, t0
11881edb426SArd Biesheuvel#endif
11981edb426SArd Biesheuvel
12081edb426SArd Biesheuvel	eor		r4, r4, r8
12181edb426SArd Biesheuvel	eor		r5, r5, r9
12281edb426SArd Biesheuvel	eor		r6, r6, r10
12381edb426SArd Biesheuvel	eor		r7, r7, r11
12481edb426SArd Biesheuvel
125*d5adb9d1SArd Biesheuvel	mov_l		ttab, \ttab
126913a3aa0SEric Biggers	/*
127913a3aa0SEric Biggers	 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
128913a3aa0SEric Biggers	 * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
129913a3aa0SEric Biggers	 * intended to make cache-timing attacks more difficult.  They may not
130913a3aa0SEric Biggers	 * be fully prevented, however; see the paper
131913a3aa0SEric Biggers	 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
132913a3aa0SEric Biggers	 * ("Cache-timing attacks on AES") for a discussion of the many
133913a3aa0SEric Biggers	 * difficulties involved in writing truly constant-time AES software.
134913a3aa0SEric Biggers	 */
135913a3aa0SEric Biggers	 save_and_disable_irqs	t0
136913a3aa0SEric Biggers	.set		i, 0
137913a3aa0SEric Biggers	.rept		1024 / 128
138913a3aa0SEric Biggers	ldr		r8, [ttab, #i + 0]
139913a3aa0SEric Biggers	ldr		r9, [ttab, #i + 32]
140913a3aa0SEric Biggers	ldr		r10, [ttab, #i + 64]
141913a3aa0SEric Biggers	ldr		r11, [ttab, #i + 96]
142913a3aa0SEric Biggers	.set		i, i + 128
143913a3aa0SEric Biggers	.endr
144913a3aa0SEric Biggers	push		{t0}		// oldcpsr
14581edb426SArd Biesheuvel
14681edb426SArd Biesheuvel	tst		rounds, #2
14781edb426SArd Biesheuvel	bne		1f
14881edb426SArd Biesheuvel
14981edb426SArd Biesheuvel0:	\round		r8, r9, r10, r11, r4, r5, r6, r7
15081edb426SArd Biesheuvel	\round		r4, r5, r6, r7, r8, r9, r10, r11
15181edb426SArd Biesheuvel
15281edb426SArd Biesheuvel1:	subs		rounds, rounds, #4
15381edb426SArd Biesheuvel	\round		r8, r9, r10, r11, r4, r5, r6, r7
1540d149ce6SArd Biesheuvel	bls		2f
15581edb426SArd Biesheuvel	\round		r4, r5, r6, r7, r8, r9, r10, r11
1560d149ce6SArd Biesheuvel	b		0b
1570d149ce6SArd Biesheuvel
158913a3aa0SEric Biggers2:	.ifb		\ltab
159913a3aa0SEric Biggers	add		ttab, ttab, #1
160913a3aa0SEric Biggers	.else
161*d5adb9d1SArd Biesheuvel	mov_l		ttab, \ltab
162913a3aa0SEric Biggers	// Prefetch inverse S-box for final round; see explanation above
163913a3aa0SEric Biggers	.set		i, 0
164913a3aa0SEric Biggers	.rept		256 / 64
165913a3aa0SEric Biggers	ldr		t0, [ttab, #i + 0]
166913a3aa0SEric Biggers	ldr		t1, [ttab, #i + 32]
167913a3aa0SEric Biggers	.set		i, i + 64
168913a3aa0SEric Biggers	.endr
169913a3aa0SEric Biggers	.endif
170913a3aa0SEric Biggers
171913a3aa0SEric Biggers	pop		{rounds}	// oldcpsr
172913a3aa0SEric Biggers	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
17381edb426SArd Biesheuvel
17481edb426SArd Biesheuvel#ifdef CONFIG_CPU_BIG_ENDIAN
175*d5adb9d1SArd Biesheuvel	rev_l		r4, t0
176*d5adb9d1SArd Biesheuvel	rev_l		r5, t0
177*d5adb9d1SArd Biesheuvel	rev_l		r6, t0
178*d5adb9d1SArd Biesheuvel	rev_l		r7, t0
17981edb426SArd Biesheuvel#endif
18081edb426SArd Biesheuvel
18181edb426SArd Biesheuvel	ldr		out, [sp]
18281edb426SArd Biesheuvel
18381edb426SArd Biesheuvel	str		r4, [out]
18481edb426SArd Biesheuvel	str		r5, [out, #4]
18581edb426SArd Biesheuvel	str		r6, [out, #8]
18681edb426SArd Biesheuvel	str		r7, [out, #12]
18781edb426SArd Biesheuvel
18881edb426SArd Biesheuvel	pop		{r3-r11, pc}
18981edb426SArd Biesheuvel
19081edb426SArd Biesheuvel	.align		3
19181edb426SArd Biesheuvel	.ltorg
19281edb426SArd Biesheuvel	.endm
19381edb426SArd Biesheuvel
1944ff8b1ddSJinbum ParkENTRY(__aes_arm_encrypt)
195913a3aa0SEric Biggers	do_crypt	fround, crypto_ft_tab,, 2
1964ff8b1ddSJinbum ParkENDPROC(__aes_arm_encrypt)
1974ff8b1ddSJinbum Park
1984ff8b1ddSJinbum Park	.align		5
1994ff8b1ddSJinbum ParkENTRY(__aes_arm_decrypt)
2008de6dd33SArd Biesheuvel	do_crypt	iround, crypto_it_tab, crypto_aes_inv_sbox, 0
2014ff8b1ddSJinbum ParkENDPROC(__aes_arm_decrypt)
202