xref: /openbmc/linux/arch/arm/crypto/aes-cipher-core.S (revision 4f727ecefefbd180de10e25b3e74c03dce3f1e75)
1/*
2 * Scalar AES core transform
3 *
4 * Copyright (C) 2017 Linaro Ltd.
5 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/linkage.h>
13#include <asm/assembler.h>
14#include <asm/cache.h>
15
16	.text
17	.align		5
18
19	rk		.req	r0
20	rounds		.req	r1
21	in		.req	r2
22	out		.req	r3
23	ttab		.req	ip
24
25	t0		.req	lr
26	t1		.req	r2
27	t2		.req	r3
28
29	.macro		__select, out, in, idx
30	.if		__LINUX_ARM_ARCH__ < 7
31	and		\out, \in, #0xff << (8 * \idx)
32	.else
33	ubfx		\out, \in, #(8 * \idx), #8
34	.endif
35	.endm
36
37	.macro		__load, out, in, idx, sz, op
38	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
39	ldr\op		\out, [ttab, \in, lsr #(8 * \idx) - \sz]
40	.else
41	ldr\op		\out, [ttab, \in, lsl #\sz]
42	.endif
43	.endm
44
45	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
46	__select	\out0, \in0, 0
47	__select	t0, \in1, 1
48	__load		\out0, \out0, 0, \sz, \op
49	__load		t0, t0, 1, \sz, \op
50
51	.if		\enc
52	__select	\out1, \in1, 0
53	__select	t1, \in2, 1
54	.else
55	__select	\out1, \in3, 0
56	__select	t1, \in0, 1
57	.endif
58	__load		\out1, \out1, 0, \sz, \op
59	__select	t2, \in2, 2
60	__load		t1, t1, 1, \sz, \op
61	__load		t2, t2, 2, \sz, \op
62
63	eor		\out0, \out0, t0, ror #24
64
65	__select	t0, \in3, 3
66	.if		\enc
67	__select	\t3, \in3, 2
68	__select	\t4, \in0, 3
69	.else
70	__select	\t3, \in1, 2
71	__select	\t4, \in2, 3
72	.endif
73	__load		\t3, \t3, 2, \sz, \op
74	__load		t0, t0, 3, \sz, \op
75	__load		\t4, \t4, 3, \sz, \op
76
77	.ifnb		\oldcpsr
78	/*
79	 * This is the final round and we're done with all data-dependent table
80	 * lookups, so we can safely re-enable interrupts.
81	 */
82	restore_irqs	\oldcpsr
83	.endif
84
85	eor		\out1, \out1, t1, ror #24
86	eor		\out0, \out0, t2, ror #16
87	ldm		rk!, {t1, t2}
88	eor		\out1, \out1, \t3, ror #16
89	eor		\out0, \out0, t0, ror #8
90	eor		\out1, \out1, \t4, ror #8
91	eor		\out0, \out0, t1
92	eor		\out1, \out1, t2
93	.endm
94
95	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
96	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
97	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
98	.endm
99
100	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
101	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
102	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
103	.endm
104
105	.macro		__rev, out, in
106	.if		__LINUX_ARM_ARCH__ < 6
107	lsl		t0, \in, #24
108	and		t1, \in, #0xff00
109	and		t2, \in, #0xff0000
110	orr		\out, t0, \in, lsr #24
111	orr		\out, \out, t1, lsl #8
112	orr		\out, \out, t2, lsr #8
113	.else
114	rev		\out, \in
115	.endif
116	.endm
117
118	.macro		__adrl, out, sym, c
119	.if		__LINUX_ARM_ARCH__ < 7
120	ldr\c		\out, =\sym
121	.else
122	movw\c		\out, #:lower16:\sym
123	movt\c		\out, #:upper16:\sym
124	.endif
125	.endm
126
127	.macro		do_crypt, round, ttab, ltab, bsz
128	push		{r3-r11, lr}
129
130	// Load keys first, to reduce latency in case they're not cached yet.
131	ldm		rk!, {r8-r11}
132
133	ldr		r4, [in]
134	ldr		r5, [in, #4]
135	ldr		r6, [in, #8]
136	ldr		r7, [in, #12]
137
138#ifdef CONFIG_CPU_BIG_ENDIAN
139	__rev		r4, r4
140	__rev		r5, r5
141	__rev		r6, r6
142	__rev		r7, r7
143#endif
144
145	eor		r4, r4, r8
146	eor		r5, r5, r9
147	eor		r6, r6, r10
148	eor		r7, r7, r11
149
150	__adrl		ttab, \ttab
151	/*
152	 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
153	 * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
154	 * intended to make cache-timing attacks more difficult.  They may not
155	 * be fully prevented, however; see the paper
156	 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
157	 * ("Cache-timing attacks on AES") for a discussion of the many
158	 * difficulties involved in writing truly constant-time AES software.
159	 */
160	 save_and_disable_irqs	t0
161	.set		i, 0
162	.rept		1024 / 128
163	ldr		r8, [ttab, #i + 0]
164	ldr		r9, [ttab, #i + 32]
165	ldr		r10, [ttab, #i + 64]
166	ldr		r11, [ttab, #i + 96]
167	.set		i, i + 128
168	.endr
169	push		{t0}		// oldcpsr
170
171	tst		rounds, #2
172	bne		1f
173
1740:	\round		r8, r9, r10, r11, r4, r5, r6, r7
175	\round		r4, r5, r6, r7, r8, r9, r10, r11
176
1771:	subs		rounds, rounds, #4
178	\round		r8, r9, r10, r11, r4, r5, r6, r7
179	bls		2f
180	\round		r4, r5, r6, r7, r8, r9, r10, r11
181	b		0b
182
1832:	.ifb		\ltab
184	add		ttab, ttab, #1
185	.else
186	__adrl		ttab, \ltab
187	// Prefetch inverse S-box for final round; see explanation above
188	.set		i, 0
189	.rept		256 / 64
190	ldr		t0, [ttab, #i + 0]
191	ldr		t1, [ttab, #i + 32]
192	.set		i, i + 64
193	.endr
194	.endif
195
196	pop		{rounds}	// oldcpsr
197	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
198
199#ifdef CONFIG_CPU_BIG_ENDIAN
200	__rev		r4, r4
201	__rev		r5, r5
202	__rev		r6, r6
203	__rev		r7, r7
204#endif
205
206	ldr		out, [sp]
207
208	str		r4, [out]
209	str		r5, [out, #4]
210	str		r6, [out, #8]
211	str		r7, [out, #12]
212
213	pop		{r3-r11, pc}
214
215	.align		3
216	.ltorg
217	.endm
218
219ENTRY(__aes_arm_encrypt)
220	do_crypt	fround, crypto_ft_tab,, 2
221ENDPROC(__aes_arm_encrypt)
222
223	.align		5
224ENTRY(__aes_arm_decrypt)
225	do_crypt	iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
226ENDPROC(__aes_arm_decrypt)
227
228	.section	".rodata", "a"
229	.align		L1_CACHE_SHIFT
230	.type		__aes_arm_inverse_sbox, %object
231__aes_arm_inverse_sbox:
232	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
233	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
234	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
235	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
236	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
237	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
238	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
239	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
240	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
241	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
242	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
243	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
244	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
245	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
246	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
247	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
248	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
249	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
250	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
251	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
252	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
253	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
254	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
255	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
256	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
257	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
258	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
259	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
260	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
261	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
262	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
263	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
264	.size		__aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox
265