1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Scalar AES core transform
4 *
5 * Copyright (C) 2017 Linaro Ltd.
6 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
7 */
8
9#include <linux/linkage.h>
10#include <asm/assembler.h>
11#include <asm/cache.h>
12
13	.text
14	.align		5
15
16	rk		.req	r0
17	rounds		.req	r1
18	in		.req	r2
19	out		.req	r3
20	ttab		.req	ip
21
22	t0		.req	lr
23	t1		.req	r2
24	t2		.req	r3
25
26	.macro		__select, out, in, idx
27	.if		__LINUX_ARM_ARCH__ < 7
28	and		\out, \in, #0xff << (8 * \idx)
29	.else
30	ubfx		\out, \in, #(8 * \idx), #8
31	.endif
32	.endm
33
34	.macro		__load, out, in, idx, sz, op
35	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
36	ldr\op		\out, [ttab, \in, lsr #(8 * \idx) - \sz]
37	.else
38	ldr\op		\out, [ttab, \in, lsl #\sz]
39	.endif
40	.endm
41
42	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
43	__select	\out0, \in0, 0
44	__select	t0, \in1, 1
45	__load		\out0, \out0, 0, \sz, \op
46	__load		t0, t0, 1, \sz, \op
47
48	.if		\enc
49	__select	\out1, \in1, 0
50	__select	t1, \in2, 1
51	.else
52	__select	\out1, \in3, 0
53	__select	t1, \in0, 1
54	.endif
55	__load		\out1, \out1, 0, \sz, \op
56	__select	t2, \in2, 2
57	__load		t1, t1, 1, \sz, \op
58	__load		t2, t2, 2, \sz, \op
59
60	eor		\out0, \out0, t0, ror #24
61
62	__select	t0, \in3, 3
63	.if		\enc
64	__select	\t3, \in3, 2
65	__select	\t4, \in0, 3
66	.else
67	__select	\t3, \in1, 2
68	__select	\t4, \in2, 3
69	.endif
70	__load		\t3, \t3, 2, \sz, \op
71	__load		t0, t0, 3, \sz, \op
72	__load		\t4, \t4, 3, \sz, \op
73
74	.ifnb		\oldcpsr
75	/*
76	 * This is the final round and we're done with all data-dependent table
77	 * lookups, so we can safely re-enable interrupts.
78	 */
79	restore_irqs	\oldcpsr
80	.endif
81
82	eor		\out1, \out1, t1, ror #24
83	eor		\out0, \out0, t2, ror #16
84	ldm		rk!, {t1, t2}
85	eor		\out1, \out1, \t3, ror #16
86	eor		\out0, \out0, t0, ror #8
87	eor		\out1, \out1, \t4, ror #8
88	eor		\out0, \out0, t1
89	eor		\out1, \out1, t2
90	.endm
91
92	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
93	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
94	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
95	.endm
96
97	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
98	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
99	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
100	.endm
101
102	.macro		__rev, out, in
103	.if		__LINUX_ARM_ARCH__ < 6
104	lsl		t0, \in, #24
105	and		t1, \in, #0xff00
106	and		t2, \in, #0xff0000
107	orr		\out, t0, \in, lsr #24
108	orr		\out, \out, t1, lsl #8
109	orr		\out, \out, t2, lsr #8
110	.else
111	rev		\out, \in
112	.endif
113	.endm
114
115	.macro		__adrl, out, sym, c
116	.if		__LINUX_ARM_ARCH__ < 7
117	ldr\c		\out, =\sym
118	.else
119	movw\c		\out, #:lower16:\sym
120	movt\c		\out, #:upper16:\sym
121	.endif
122	.endm
123
124	.macro		do_crypt, round, ttab, ltab, bsz
125	push		{r3-r11, lr}
126
127	// Load keys first, to reduce latency in case they're not cached yet.
128	ldm		rk!, {r8-r11}
129
130	ldr		r4, [in]
131	ldr		r5, [in, #4]
132	ldr		r6, [in, #8]
133	ldr		r7, [in, #12]
134
135#ifdef CONFIG_CPU_BIG_ENDIAN
136	__rev		r4, r4
137	__rev		r5, r5
138	__rev		r6, r6
139	__rev		r7, r7
140#endif
141
142	eor		r4, r4, r8
143	eor		r5, r5, r9
144	eor		r6, r6, r10
145	eor		r7, r7, r11
146
147	__adrl		ttab, \ttab
148	/*
149	 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
150	 * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
151	 * intended to make cache-timing attacks more difficult.  They may not
152	 * be fully prevented, however; see the paper
153	 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
154	 * ("Cache-timing attacks on AES") for a discussion of the many
155	 * difficulties involved in writing truly constant-time AES software.
156	 */
157	 save_and_disable_irqs	t0
158	.set		i, 0
159	.rept		1024 / 128
160	ldr		r8, [ttab, #i + 0]
161	ldr		r9, [ttab, #i + 32]
162	ldr		r10, [ttab, #i + 64]
163	ldr		r11, [ttab, #i + 96]
164	.set		i, i + 128
165	.endr
166	push		{t0}		// oldcpsr
167
168	tst		rounds, #2
169	bne		1f
170
1710:	\round		r8, r9, r10, r11, r4, r5, r6, r7
172	\round		r4, r5, r6, r7, r8, r9, r10, r11
173
1741:	subs		rounds, rounds, #4
175	\round		r8, r9, r10, r11, r4, r5, r6, r7
176	bls		2f
177	\round		r4, r5, r6, r7, r8, r9, r10, r11
178	b		0b
179
1802:	.ifb		\ltab
181	add		ttab, ttab, #1
182	.else
183	__adrl		ttab, \ltab
184	// Prefetch inverse S-box for final round; see explanation above
185	.set		i, 0
186	.rept		256 / 64
187	ldr		t0, [ttab, #i + 0]
188	ldr		t1, [ttab, #i + 32]
189	.set		i, i + 64
190	.endr
191	.endif
192
193	pop		{rounds}	// oldcpsr
194	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
195
196#ifdef CONFIG_CPU_BIG_ENDIAN
197	__rev		r4, r4
198	__rev		r5, r5
199	__rev		r6, r6
200	__rev		r7, r7
201#endif
202
203	ldr		out, [sp]
204
205	str		r4, [out]
206	str		r5, [out, #4]
207	str		r6, [out, #8]
208	str		r7, [out, #12]
209
210	pop		{r3-r11, pc}
211
212	.align		3
213	.ltorg
214	.endm
215
216ENTRY(__aes_arm_encrypt)
217	do_crypt	fround, crypto_ft_tab,, 2
218ENDPROC(__aes_arm_encrypt)
219
220	.align		5
221ENTRY(__aes_arm_decrypt)
222	do_crypt	iround, crypto_it_tab, crypto_aes_inv_sbox, 0
223ENDPROC(__aes_arm_decrypt)
224