1/*
2 * Scalar AES core transform
3 *
4 * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13#include <asm/cache.h>
14
15	.text
16
17	rk		.req	x0
18	out		.req	x1
19	in		.req	x2
20	rounds		.req	x3
21	tt		.req	x2
22
23	.macro		__pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
24	.ifc		\op\shift, b0
25	ubfiz		\reg0, \in0, #2, #8
26	ubfiz		\reg1, \in1e, #2, #8
27	.else
28	ubfx		\reg0, \in0, #\shift, #8
29	ubfx		\reg1, \in1e, #\shift, #8
30	.endif
31
32	/*
33	 * AArch64 cannot do byte size indexed loads from a table containing
34	 * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
35	 * valid instruction. So perform the shift explicitly first for the
36	 * high bytes (the low byte is shifted implicitly by using ubfiz rather
37	 * than ubfx above)
38	 */
39	.ifnc		\op, b
40	ldr		\reg0, [tt, \reg0, uxtw #2]
41	ldr		\reg1, [tt, \reg1, uxtw #2]
42	.else
43	.if		\shift > 0
44	lsl		\reg0, \reg0, #2
45	lsl		\reg1, \reg1, #2
46	.endif
47	ldrb		\reg0, [tt, \reg0, uxtw]
48	ldrb		\reg1, [tt, \reg1, uxtw]
49	.endif
50	.endm
51
52	.macro		__pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
53	ubfx		\reg0, \in0, #\shift, #8
54	ubfx		\reg1, \in1d, #\shift, #8
55	ldr\op		\reg0, [tt, \reg0, uxtw #\sz]
56	ldr\op		\reg1, [tt, \reg1, uxtw #\sz]
57	.endm
58
59	.macro		__hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
60	ldp		\out0, \out1, [rk], #8
61
62	__pair\enc	\sz, \op, w12, w13, \in0, \in1, \in3, 0
63	__pair\enc	\sz, \op, w14, w15, \in1, \in2, \in0, 8
64	__pair\enc	\sz, \op, w16, w17, \in2, \in3, \in1, 16
65	__pair\enc	\sz, \op, \t0, \t1, \in3, \in0, \in2, 24
66
67	eor		\out0, \out0, w12
68	eor		\out1, \out1, w13
69	eor		\out0, \out0, w14, ror #24
70	eor		\out1, \out1, w15, ror #24
71	eor		\out0, \out0, w16, ror #16
72	eor		\out1, \out1, w17, ror #16
73	eor		\out0, \out0, \t0, ror #8
74	eor		\out1, \out1, \t1, ror #8
75	.endm
76
77	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
78	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
79	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
80	.endm
81
82	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
83	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
84	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
85	.endm
86
87	.macro		do_crypt, round, ttab, ltab, bsz
88	ldp		w4, w5, [in]
89	ldp		w6, w7, [in, #8]
90	ldp		w8, w9, [rk], #16
91	ldp		w10, w11, [rk, #-8]
92
93CPU_BE(	rev		w4, w4		)
94CPU_BE(	rev		w5, w5		)
95CPU_BE(	rev		w6, w6		)
96CPU_BE(	rev		w7, w7		)
97
98	eor		w4, w4, w8
99	eor		w5, w5, w9
100	eor		w6, w6, w10
101	eor		w7, w7, w11
102
103	adr_l		tt, \ttab
104
105	tbnz		rounds, #1, 1f
106
1070:	\round		w8, w9, w10, w11, w4, w5, w6, w7
108	\round		w4, w5, w6, w7, w8, w9, w10, w11
109
1101:	subs		rounds, rounds, #4
111	\round		w8, w9, w10, w11, w4, w5, w6, w7
112	b.ls		3f
1132:	\round		w4, w5, w6, w7, w8, w9, w10, w11
114	b		0b
1153:	adr_l		tt, \ltab
116	\round		w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
117
118CPU_BE(	rev		w4, w4		)
119CPU_BE(	rev		w5, w5		)
120CPU_BE(	rev		w6, w6		)
121CPU_BE(	rev		w7, w7		)
122
123	stp		w4, w5, [out]
124	stp		w6, w7, [out, #8]
125	ret
126	.endm
127
128ENTRY(__aes_arm64_encrypt)
129	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
130ENDPROC(__aes_arm64_encrypt)
131
132	.align		5
133ENTRY(__aes_arm64_decrypt)
134	do_crypt	iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
135ENDPROC(__aes_arm64_decrypt)
136
137	.section	".rodata", "a"
138	.align		L1_CACHE_SHIFT
139	.type		__aes_arm64_inverse_sbox, %object
140__aes_arm64_inverse_sbox:
141	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
142	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
143	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
144	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
145	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
146	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
147	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
148	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
149	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
150	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
151	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
152	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
153	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
154	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
155	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
156	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
157	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
158	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
159	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
160	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
161	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
162	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
163	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
164	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
165	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
166	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
167	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
168	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
169	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
170	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
171	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
172	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
173	.size		__aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox
174