1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Scalar AES core transform
4 *
5 * Copyright (C) 2017 Linaro Ltd.
6 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
7 */
8
9#include <linux/linkage.h>
10#include <asm/assembler.h>
11#include <asm/cache.h>
12
13	.text
14	.align		5
15
16	rk		.req	r0
17	rounds		.req	r1
18	in		.req	r2
19	out		.req	r3
20	ttab		.req	ip
21
22	t0		.req	lr
23	t1		.req	r2
24	t2		.req	r3
25
26	.macro		__select, out, in, idx
27	.if		__LINUX_ARM_ARCH__ < 7
28	and		\out, \in, #0xff << (8 * \idx)
29	.else
30	ubfx		\out, \in, #(8 * \idx), #8
31	.endif
32	.endm
33
34	.macro		__load, out, in, idx, sz, op
35	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
36	ldr\op		\out, [ttab, \in, lsr #(8 * \idx) - \sz]
37	.else
38	ldr\op		\out, [ttab, \in, lsl #\sz]
39	.endif
40	.endm
41
42	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
43	__select	\out0, \in0, 0
44	__select	t0, \in1, 1
45	__load		\out0, \out0, 0, \sz, \op
46	__load		t0, t0, 1, \sz, \op
47
48	.if		\enc
49	__select	\out1, \in1, 0
50	__select	t1, \in2, 1
51	.else
52	__select	\out1, \in3, 0
53	__select	t1, \in0, 1
54	.endif
55	__load		\out1, \out1, 0, \sz, \op
56	__select	t2, \in2, 2
57	__load		t1, t1, 1, \sz, \op
58	__load		t2, t2, 2, \sz, \op
59
60	eor		\out0, \out0, t0, ror #24
61
62	__select	t0, \in3, 3
63	.if		\enc
64	__select	\t3, \in3, 2
65	__select	\t4, \in0, 3
66	.else
67	__select	\t3, \in1, 2
68	__select	\t4, \in2, 3
69	.endif
70	__load		\t3, \t3, 2, \sz, \op
71	__load		t0, t0, 3, \sz, \op
72	__load		\t4, \t4, 3, \sz, \op
73
74	.ifnb		\oldcpsr
75	/*
76	 * This is the final round and we're done with all data-dependent table
77	 * lookups, so we can safely re-enable interrupts.
78	 */
79	restore_irqs	\oldcpsr
80	.endif
81
82	eor		\out1, \out1, t1, ror #24
83	eor		\out0, \out0, t2, ror #16
84	ldm		rk!, {t1, t2}
85	eor		\out1, \out1, \t3, ror #16
86	eor		\out0, \out0, t0, ror #8
87	eor		\out1, \out1, \t4, ror #8
88	eor		\out0, \out0, t1
89	eor		\out1, \out1, t2
90	.endm
91
92	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
93	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
94	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
95	.endm
96
97	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
98	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
99	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
100	.endm
101
102	.macro		__rev, out, in
103	.if		__LINUX_ARM_ARCH__ < 6
104	lsl		t0, \in, #24
105	and		t1, \in, #0xff00
106	and		t2, \in, #0xff0000
107	orr		\out, t0, \in, lsr #24
108	orr		\out, \out, t1, lsl #8
109	orr		\out, \out, t2, lsr #8
110	.else
111	rev		\out, \in
112	.endif
113	.endm
114
115	.macro		__adrl, out, sym, c
116	.if		__LINUX_ARM_ARCH__ < 7
117	ldr\c		\out, =\sym
118	.else
119	movw\c		\out, #:lower16:\sym
120	movt\c		\out, #:upper16:\sym
121	.endif
122	.endm
123
124	.macro		do_crypt, round, ttab, ltab, bsz
125	push		{r3-r11, lr}
126
127	// Load keys first, to reduce latency in case they're not cached yet.
128	ldm		rk!, {r8-r11}
129
130	ldr		r4, [in]
131	ldr		r5, [in, #4]
132	ldr		r6, [in, #8]
133	ldr		r7, [in, #12]
134
135#ifdef CONFIG_CPU_BIG_ENDIAN
136	__rev		r4, r4
137	__rev		r5, r5
138	__rev		r6, r6
139	__rev		r7, r7
140#endif
141
142	eor		r4, r4, r8
143	eor		r5, r5, r9
144	eor		r6, r6, r10
145	eor		r7, r7, r11
146
147	__adrl		ttab, \ttab
148	/*
149	 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
150	 * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
151	 * intended to make cache-timing attacks more difficult.  They may not
152	 * be fully prevented, however; see the paper
153	 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
154	 * ("Cache-timing attacks on AES") for a discussion of the many
155	 * difficulties involved in writing truly constant-time AES software.
156	 */
157	 save_and_disable_irqs	t0
158	.set		i, 0
159	.rept		1024 / 128
160	ldr		r8, [ttab, #i + 0]
161	ldr		r9, [ttab, #i + 32]
162	ldr		r10, [ttab, #i + 64]
163	ldr		r11, [ttab, #i + 96]
164	.set		i, i + 128
165	.endr
166	push		{t0}		// oldcpsr
167
168	tst		rounds, #2
169	bne		1f
170
1710:	\round		r8, r9, r10, r11, r4, r5, r6, r7
172	\round		r4, r5, r6, r7, r8, r9, r10, r11
173
1741:	subs		rounds, rounds, #4
175	\round		r8, r9, r10, r11, r4, r5, r6, r7
176	bls		2f
177	\round		r4, r5, r6, r7, r8, r9, r10, r11
178	b		0b
179
1802:	.ifb		\ltab
181	add		ttab, ttab, #1
182	.else
183	__adrl		ttab, \ltab
184	// Prefetch inverse S-box for final round; see explanation above
185	.set		i, 0
186	.rept		256 / 64
187	ldr		t0, [ttab, #i + 0]
188	ldr		t1, [ttab, #i + 32]
189	.set		i, i + 64
190	.endr
191	.endif
192
193	pop		{rounds}	// oldcpsr
194	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
195
196#ifdef CONFIG_CPU_BIG_ENDIAN
197	__rev		r4, r4
198	__rev		r5, r5
199	__rev		r6, r6
200	__rev		r7, r7
201#endif
202
203	ldr		out, [sp]
204
205	str		r4, [out]
206	str		r5, [out, #4]
207	str		r6, [out, #8]
208	str		r7, [out, #12]
209
210	pop		{r3-r11, pc}
211
212	.align		3
213	.ltorg
214	.endm
215
216ENTRY(__aes_arm_encrypt)
217	do_crypt	fround, crypto_ft_tab,, 2
218ENDPROC(__aes_arm_encrypt)
219
220	.align		5
221ENTRY(__aes_arm_decrypt)
222	do_crypt	iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
223ENDPROC(__aes_arm_decrypt)
224
225	.section	".rodata", "a"
226	.align		L1_CACHE_SHIFT
227	.type		__aes_arm_inverse_sbox, %object
228__aes_arm_inverse_sbox:
229	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
230	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
231	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
232	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
233	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
234	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
235	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
236	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
237	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
238	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
239	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
240	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
241	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
242	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
243	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
244	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
245	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
246	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
247	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
248	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
249	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
250	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
251	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
252	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
253	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
254	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
255	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
256	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
257	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
258	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
259	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
260	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
261	.size		__aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox
262