1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Scalar AES core transform
4 *
5 * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
6 */
7
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10#include <asm/cache.h>
11
12	.text
13
14	rk		.req	x0
15	out		.req	x1
16	in		.req	x2
17	rounds		.req	x3
18	tt		.req	x2
19
20	.macro		__pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
21	.ifc		\op\shift, b0
22	ubfiz		\reg0, \in0, #2, #8
23	ubfiz		\reg1, \in1e, #2, #8
24	.else
25	ubfx		\reg0, \in0, #\shift, #8
26	ubfx		\reg1, \in1e, #\shift, #8
27	.endif
28
29	/*
30	 * AArch64 cannot do byte size indexed loads from a table containing
31	 * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
32	 * valid instruction. So perform the shift explicitly first for the
33	 * high bytes (the low byte is shifted implicitly by using ubfiz rather
34	 * than ubfx above)
35	 */
36	.ifnc		\op, b
37	ldr		\reg0, [tt, \reg0, uxtw #2]
38	ldr		\reg1, [tt, \reg1, uxtw #2]
39	.else
40	.if		\shift > 0
41	lsl		\reg0, \reg0, #2
42	lsl		\reg1, \reg1, #2
43	.endif
44	ldrb		\reg0, [tt, \reg0, uxtw]
45	ldrb		\reg1, [tt, \reg1, uxtw]
46	.endif
47	.endm
48
49	.macro		__pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
50	ubfx		\reg0, \in0, #\shift, #8
51	ubfx		\reg1, \in1d, #\shift, #8
52	ldr\op		\reg0, [tt, \reg0, uxtw #\sz]
53	ldr\op		\reg1, [tt, \reg1, uxtw #\sz]
54	.endm
55
56	.macro		__hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
57	ldp		\out0, \out1, [rk], #8
58
59	__pair\enc	\sz, \op, w12, w13, \in0, \in1, \in3, 0
60	__pair\enc	\sz, \op, w14, w15, \in1, \in2, \in0, 8
61	__pair\enc	\sz, \op, w16, w17, \in2, \in3, \in1, 16
62	__pair\enc	\sz, \op, \t0, \t1, \in3, \in0, \in2, 24
63
64	eor		\out0, \out0, w12
65	eor		\out1, \out1, w13
66	eor		\out0, \out0, w14, ror #24
67	eor		\out1, \out1, w15, ror #24
68	eor		\out0, \out0, w16, ror #16
69	eor		\out1, \out1, w17, ror #16
70	eor		\out0, \out0, \t0, ror #8
71	eor		\out1, \out1, \t1, ror #8
72	.endm
73
74	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
75	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
76	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
77	.endm
78
79	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
80	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
81	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
82	.endm
83
84	.macro		do_crypt, round, ttab, ltab, bsz
85	ldp		w4, w5, [in]
86	ldp		w6, w7, [in, #8]
87	ldp		w8, w9, [rk], #16
88	ldp		w10, w11, [rk, #-8]
89
90CPU_BE(	rev		w4, w4		)
91CPU_BE(	rev		w5, w5		)
92CPU_BE(	rev		w6, w6		)
93CPU_BE(	rev		w7, w7		)
94
95	eor		w4, w4, w8
96	eor		w5, w5, w9
97	eor		w6, w6, w10
98	eor		w7, w7, w11
99
100	adr_l		tt, \ttab
101
102	tbnz		rounds, #1, 1f
103
1040:	\round		w8, w9, w10, w11, w4, w5, w6, w7
105	\round		w4, w5, w6, w7, w8, w9, w10, w11
106
1071:	subs		rounds, rounds, #4
108	\round		w8, w9, w10, w11, w4, w5, w6, w7
109	b.ls		3f
1102:	\round		w4, w5, w6, w7, w8, w9, w10, w11
111	b		0b
1123:	adr_l		tt, \ltab
113	\round		w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
114
115CPU_BE(	rev		w4, w4		)
116CPU_BE(	rev		w5, w5		)
117CPU_BE(	rev		w6, w6		)
118CPU_BE(	rev		w7, w7		)
119
120	stp		w4, w5, [out]
121	stp		w6, w7, [out, #8]
122	ret
123	.endm
124
125ENTRY(__aes_arm64_encrypt)
126	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
127ENDPROC(__aes_arm64_encrypt)
128
129	.align		5
130ENTRY(__aes_arm64_decrypt)
131	do_crypt	iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
132ENDPROC(__aes_arm64_decrypt)
133
134	.section	".rodata", "a"
135	.align		L1_CACHE_SHIFT
136	.type		__aes_arm64_inverse_sbox, %object
137__aes_arm64_inverse_sbox:
138	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
139	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
140	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
141	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
142	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
143	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
144	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
145	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
146	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
147	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
148	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
149	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
150	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
151	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
152	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
153	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
154	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
155	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
156	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
157	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
158	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
159	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
160	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
161	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
162	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
163	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
164	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
165	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
166	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
167	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
168	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
169	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
170	.size		__aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox
171