1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Scalar AES core transform
4 *
5 * Copyright (C) 2017 Linaro Ltd.
6 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
7 */
8
9#include <linux/linkage.h>
10#include <asm/assembler.h>
11#include <asm/cache.h>
12
13	.text
14	.align		5
15
16	rk		.req	r0
17	rounds		.req	r1
18	in		.req	r2
19	out		.req	r3
20	ttab		.req	ip
21
22	t0		.req	lr
23	t1		.req	r2
24	t2		.req	r3
25
26	.macro		__select, out, in, idx
27	.if		__LINUX_ARM_ARCH__ < 7
28	and		\out, \in, #0xff << (8 * \idx)
29	.else
30	ubfx		\out, \in, #(8 * \idx), #8
31	.endif
32	.endm
33
34	.macro		__load, out, in, idx, sz, op
35	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
36	ldr\op		\out, [ttab, \in, lsr #(8 * \idx) - \sz]
37	.else
38	ldr\op		\out, [ttab, \in, lsl #\sz]
39	.endif
40	.endm
41
42	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
43	__select	\out0, \in0, 0
44	__select	t0, \in1, 1
45	__load		\out0, \out0, 0, \sz, \op
46	__load		t0, t0, 1, \sz, \op
47
48	.if		\enc
49	__select	\out1, \in1, 0
50	__select	t1, \in2, 1
51	.else
52	__select	\out1, \in3, 0
53	__select	t1, \in0, 1
54	.endif
55	__load		\out1, \out1, 0, \sz, \op
56	__select	t2, \in2, 2
57	__load		t1, t1, 1, \sz, \op
58	__load		t2, t2, 2, \sz, \op
59
60	eor		\out0, \out0, t0, ror #24
61
62	__select	t0, \in3, 3
63	.if		\enc
64	__select	\t3, \in3, 2
65	__select	\t4, \in0, 3
66	.else
67	__select	\t3, \in1, 2
68	__select	\t4, \in2, 3
69	.endif
70	__load		\t3, \t3, 2, \sz, \op
71	__load		t0, t0, 3, \sz, \op
72	__load		\t4, \t4, 3, \sz, \op
73
74	.ifnb		\oldcpsr
75	/*
76	 * This is the final round and we're done with all data-dependent table
77	 * lookups, so we can safely re-enable interrupts.
78	 */
79	restore_irqs	\oldcpsr
80	.endif
81
82	eor		\out1, \out1, t1, ror #24
83	eor		\out0, \out0, t2, ror #16
84	ldm		rk!, {t1, t2}
85	eor		\out1, \out1, \t3, ror #16
86	eor		\out0, \out0, t0, ror #8
87	eor		\out1, \out1, \t4, ror #8
88	eor		\out0, \out0, t1
89	eor		\out1, \out1, t2
90	.endm
91
92	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
93	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
94	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
95	.endm
96
97	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
98	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
99	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
100	.endm
101
102	.macro		do_crypt, round, ttab, ltab, bsz
103	push		{r3-r11, lr}
104
105	// Load keys first, to reduce latency in case they're not cached yet.
106	ldm		rk!, {r8-r11}
107
108	ldr		r4, [in]
109	ldr		r5, [in, #4]
110	ldr		r6, [in, #8]
111	ldr		r7, [in, #12]
112
113#ifdef CONFIG_CPU_BIG_ENDIAN
114	rev_l		r4, t0
115	rev_l		r5, t0
116	rev_l		r6, t0
117	rev_l		r7, t0
118#endif
119
120	eor		r4, r4, r8
121	eor		r5, r5, r9
122	eor		r6, r6, r10
123	eor		r7, r7, r11
124
125	mov_l		ttab, \ttab
126	/*
127	 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
128	 * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
129	 * intended to make cache-timing attacks more difficult.  They may not
130	 * be fully prevented, however; see the paper
131	 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
132	 * ("Cache-timing attacks on AES") for a discussion of the many
133	 * difficulties involved in writing truly constant-time AES software.
134	 */
135	 save_and_disable_irqs	t0
136	.set		i, 0
137	.rept		1024 / 128
138	ldr		r8, [ttab, #i + 0]
139	ldr		r9, [ttab, #i + 32]
140	ldr		r10, [ttab, #i + 64]
141	ldr		r11, [ttab, #i + 96]
142	.set		i, i + 128
143	.endr
144	push		{t0}		// oldcpsr
145
146	tst		rounds, #2
147	bne		1f
148
1490:	\round		r8, r9, r10, r11, r4, r5, r6, r7
150	\round		r4, r5, r6, r7, r8, r9, r10, r11
151
1521:	subs		rounds, rounds, #4
153	\round		r8, r9, r10, r11, r4, r5, r6, r7
154	bls		2f
155	\round		r4, r5, r6, r7, r8, r9, r10, r11
156	b		0b
157
1582:	.ifb		\ltab
159	add		ttab, ttab, #1
160	.else
161	mov_l		ttab, \ltab
162	// Prefetch inverse S-box for final round; see explanation above
163	.set		i, 0
164	.rept		256 / 64
165	ldr		t0, [ttab, #i + 0]
166	ldr		t1, [ttab, #i + 32]
167	.set		i, i + 64
168	.endr
169	.endif
170
171	pop		{rounds}	// oldcpsr
172	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
173
174#ifdef CONFIG_CPU_BIG_ENDIAN
175	rev_l		r4, t0
176	rev_l		r5, t0
177	rev_l		r6, t0
178	rev_l		r7, t0
179#endif
180
181	ldr		out, [sp]
182
183	str		r4, [out]
184	str		r5, [out, #4]
185	str		r6, [out, #8]
186	str		r7, [out, #12]
187
188	pop		{r3-r11, pc}
189
190	.align		3
191	.ltorg
192	.endm
193
194ENTRY(__aes_arm_encrypt)
195	do_crypt	fround, crypto_ft_tab,, 2
196ENDPROC(__aes_arm_encrypt)
197
198	.align		5
199ENTRY(__aes_arm_decrypt)
200	do_crypt	iround, crypto_it_tab, crypto_aes_inv_sbox, 0
201ENDPROC(__aes_arm_decrypt)
202