xref: /openbmc/linux/arch/sparc/crypto/aes_asm.S (revision 0bf49ffb)
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <linux/linkage.h>
3#include <asm/visasm.h>
4
5#include "opcodes.h"
6
7#define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
8	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
9	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
10	AES_EROUND01(KEY_BASE +  4, T0, T1, I0) \
11	AES_EROUND23(KEY_BASE +  6, T0, T1, I1)
12
13#define ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
14	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
15	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
16	AES_EROUND01(KEY_BASE +  0, I2, I3, T2) \
17	AES_EROUND23(KEY_BASE +  2, I2, I3, T3) \
18	AES_EROUND01(KEY_BASE +  4, T0, T1, I0) \
19	AES_EROUND23(KEY_BASE +  6, T0, T1, I1) \
20	AES_EROUND01(KEY_BASE +  4, T2, T3, I2) \
21	AES_EROUND23(KEY_BASE +  6, T2, T3, I3)
22
23#define ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
24	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
25	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
26	AES_EROUND01_L(KEY_BASE +  4, T0, T1, I0) \
27	AES_EROUND23_L(KEY_BASE +  6, T0, T1, I1)
28
29#define ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
30	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
31	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
32	AES_EROUND01(KEY_BASE +  0, I2, I3, T2) \
33	AES_EROUND23(KEY_BASE +  2, I2, I3, T3) \
34	AES_EROUND01_L(KEY_BASE +  4, T0, T1, I0) \
35	AES_EROUND23_L(KEY_BASE +  6, T0, T1, I1) \
36	AES_EROUND01_L(KEY_BASE +  4, T2, T3, I2) \
37	AES_EROUND23_L(KEY_BASE +  6, T2, T3, I3)
38
39	/* 10 rounds */
40#define ENCRYPT_128(KEY_BASE, I0, I1, T0, T1) \
41	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
42	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
43	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
44	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
45	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
46
47#define ENCRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
48	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
49	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
50	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
51	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
52	ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
53
54	/* 12 rounds */
55#define ENCRYPT_192(KEY_BASE, I0, I1, T0, T1) \
56	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
57	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
58	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
59	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
60	ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
61	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
62
63#define ENCRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
64	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
65	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
66	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
67	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
68	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
69	ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
70
71	/* 14 rounds */
72#define ENCRYPT_256(KEY_BASE, I0, I1, T0, T1) \
73	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
74	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
75	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
76	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
77	ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
78	ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
79	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
80
81#define ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
82	ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
83			     TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
84
85#define ENCRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
86	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, KEY_BASE + 48) \
87	ldd	[%o0 + 0xd0], %f56; \
88	ldd	[%o0 + 0xd8], %f58; \
89	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, KEY_BASE +  0) \
90	ldd	[%o0 + 0xe0], %f60; \
91	ldd	[%o0 + 0xe8], %f62; \
92	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE +  0) \
93	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE +  0) \
94	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE +  0) \
95	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE +  0) \
96	AES_EROUND01(KEY_BASE +  48, I0, I1, KEY_BASE + 0) \
97	AES_EROUND23(KEY_BASE +  50, I0, I1, KEY_BASE + 2) \
98	AES_EROUND01(KEY_BASE +  48, I2, I3, KEY_BASE + 4) \
99	AES_EROUND23(KEY_BASE +  50, I2, I3, KEY_BASE + 6) \
100	AES_EROUND01_L(KEY_BASE +  52, KEY_BASE + 0, KEY_BASE + 2, I0) \
101	AES_EROUND23_L(KEY_BASE +  54, KEY_BASE + 0, KEY_BASE + 2, I1) \
102	ldd	[%o0 + 0x10], %f8; \
103	ldd	[%o0 + 0x18], %f10; \
104	AES_EROUND01_L(KEY_BASE +  52, KEY_BASE + 4, KEY_BASE + 6, I2) \
105	AES_EROUND23_L(KEY_BASE +  54, KEY_BASE + 4, KEY_BASE + 6, I3) \
106	ldd	[%o0 + 0x20], %f12; \
107	ldd	[%o0 + 0x28], %f14;
108
109#define DECRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
110	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
111	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
112	AES_DROUND23(KEY_BASE +  4, T0, T1, I1) \
113	AES_DROUND01(KEY_BASE +  6, T0, T1, I0)
114
115#define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
116	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
117	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
118	AES_DROUND23(KEY_BASE +  0, I2, I3, T3) \
119	AES_DROUND01(KEY_BASE +  2, I2, I3, T2) \
120	AES_DROUND23(KEY_BASE +  4, T0, T1, I1) \
121	AES_DROUND01(KEY_BASE +  6, T0, T1, I0) \
122	AES_DROUND23(KEY_BASE +  4, T2, T3, I3) \
123	AES_DROUND01(KEY_BASE +  6, T2, T3, I2)
124
125#define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
126	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
127	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
128	AES_DROUND23_L(KEY_BASE +  4, T0, T1, I1) \
129	AES_DROUND01_L(KEY_BASE +  6, T0, T1, I0)
130
131#define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
132	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
133	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
134	AES_DROUND23(KEY_BASE +  0, I2, I3, T3) \
135	AES_DROUND01(KEY_BASE +  2, I2, I3, T2) \
136	AES_DROUND23_L(KEY_BASE +  4, T0, T1, I1) \
137	AES_DROUND01_L(KEY_BASE +  6, T0, T1, I0) \
138	AES_DROUND23_L(KEY_BASE +  4, T2, T3, I3) \
139	AES_DROUND01_L(KEY_BASE +  6, T2, T3, I2)
140
141	/* 10 rounds */
142#define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \
143	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
144	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
145	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
146	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
147	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
148
149#define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
150	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
151	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
152	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
153	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
154	DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
155
156	/* 12 rounds */
157#define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \
158	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
159	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
160	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
161	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
162	DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
163	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
164
165#define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
166	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
167	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
168	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
169	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
170	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
171	DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
172
173	/* 14 rounds */
174#define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \
175	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
176	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
177	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
178	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
179	DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
180	DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
181	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
182
183#define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
184	DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
185			     TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
186
187#define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
188	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, KEY_BASE + 48) \
189	ldd	[%o0 + 0x18], %f56; \
190	ldd	[%o0 + 0x10], %f58; \
191	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, KEY_BASE +  0) \
192	ldd	[%o0 + 0x08], %f60; \
193	ldd	[%o0 + 0x00], %f62; \
194	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE +  0) \
195	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE +  0) \
196	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE +  0) \
197	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE +  0) \
198	AES_DROUND23(KEY_BASE +  48, I0, I1, KEY_BASE + 2) \
199	AES_DROUND01(KEY_BASE +  50, I0, I1, KEY_BASE + 0) \
200	AES_DROUND23(KEY_BASE +  48, I2, I3, KEY_BASE + 6) \
201	AES_DROUND01(KEY_BASE +  50, I2, I3, KEY_BASE + 4) \
202	AES_DROUND23_L(KEY_BASE +  52, KEY_BASE + 0, KEY_BASE + 2, I1) \
203	AES_DROUND01_L(KEY_BASE +  54, KEY_BASE + 0, KEY_BASE + 2, I0) \
204	ldd	[%o0 + 0xd8], %f8; \
205	ldd	[%o0 + 0xd0], %f10; \
206	AES_DROUND23_L(KEY_BASE +  52, KEY_BASE + 4, KEY_BASE + 6, I3) \
207	AES_DROUND01_L(KEY_BASE +  54, KEY_BASE + 4, KEY_BASE + 6, I2) \
208	ldd	[%o0 + 0xc8], %f12; \
209	ldd	[%o0 + 0xc0], %f14;
210
211	.align	32
212ENTRY(aes_sparc64_key_expand)
213	/* %o0=input_key, %o1=output_key, %o2=key_len */
214	VISEntry
215	ld	[%o0 + 0x00], %f0
216	ld	[%o0 + 0x04], %f1
217	ld	[%o0 + 0x08], %f2
218	ld	[%o0 + 0x0c], %f3
219
220	std	%f0, [%o1 + 0x00]
221	std	%f2, [%o1 + 0x08]
222	add	%o1, 0x10, %o1
223
224	cmp	%o2, 24
225	bl	2f
226	 nop
227
228	be	1f
229	 nop
230
231	/* 256-bit key expansion */
232	ld	[%o0 + 0x10], %f4
233	ld	[%o0 + 0x14], %f5
234	ld	[%o0 + 0x18], %f6
235	ld	[%o0 + 0x1c], %f7
236
237	std	%f4, [%o1 + 0x00]
238	std	%f6, [%o1 + 0x08]
239	add	%o1, 0x10, %o1
240
241	AES_KEXPAND1(0, 6, 0x0, 8)
242	AES_KEXPAND2(2, 8, 10)
243	AES_KEXPAND0(4, 10, 12)
244	AES_KEXPAND2(6, 12, 14)
245	AES_KEXPAND1(8, 14, 0x1, 16)
246	AES_KEXPAND2(10, 16, 18)
247	AES_KEXPAND0(12, 18, 20)
248	AES_KEXPAND2(14, 20, 22)
249	AES_KEXPAND1(16, 22, 0x2, 24)
250	AES_KEXPAND2(18, 24, 26)
251	AES_KEXPAND0(20, 26, 28)
252	AES_KEXPAND2(22, 28, 30)
253	AES_KEXPAND1(24, 30, 0x3, 32)
254	AES_KEXPAND2(26, 32, 34)
255	AES_KEXPAND0(28, 34, 36)
256	AES_KEXPAND2(30, 36, 38)
257	AES_KEXPAND1(32, 38, 0x4, 40)
258	AES_KEXPAND2(34, 40, 42)
259	AES_KEXPAND0(36, 42, 44)
260	AES_KEXPAND2(38, 44, 46)
261	AES_KEXPAND1(40, 46, 0x5, 48)
262	AES_KEXPAND2(42, 48, 50)
263	AES_KEXPAND0(44, 50, 52)
264	AES_KEXPAND2(46, 52, 54)
265	AES_KEXPAND1(48, 54, 0x6, 56)
266	AES_KEXPAND2(50, 56, 58)
267
268	std	%f8, [%o1 + 0x00]
269	std	%f10, [%o1 + 0x08]
270	std	%f12, [%o1 + 0x10]
271	std	%f14, [%o1 + 0x18]
272	std	%f16, [%o1 + 0x20]
273	std	%f18, [%o1 + 0x28]
274	std	%f20, [%o1 + 0x30]
275	std	%f22, [%o1 + 0x38]
276	std	%f24, [%o1 + 0x40]
277	std	%f26, [%o1 + 0x48]
278	std	%f28, [%o1 + 0x50]
279	std	%f30, [%o1 + 0x58]
280	std	%f32, [%o1 + 0x60]
281	std	%f34, [%o1 + 0x68]
282	std	%f36, [%o1 + 0x70]
283	std	%f38, [%o1 + 0x78]
284	std	%f40, [%o1 + 0x80]
285	std	%f42, [%o1 + 0x88]
286	std	%f44, [%o1 + 0x90]
287	std	%f46, [%o1 + 0x98]
288	std	%f48, [%o1 + 0xa0]
289	std	%f50, [%o1 + 0xa8]
290	std	%f52, [%o1 + 0xb0]
291	std	%f54, [%o1 + 0xb8]
292	std	%f56, [%o1 + 0xc0]
293	ba,pt	%xcc, 80f
294	 std	%f58, [%o1 + 0xc8]
295
2961:
297	/* 192-bit key expansion */
298	ld	[%o0 + 0x10], %f4
299	ld	[%o0 + 0x14], %f5
300
301	std	%f4, [%o1 + 0x00]
302	add	%o1, 0x08, %o1
303
304	AES_KEXPAND1(0, 4, 0x0, 6)
305	AES_KEXPAND2(2, 6, 8)
306	AES_KEXPAND2(4, 8, 10)
307	AES_KEXPAND1(6, 10, 0x1, 12)
308	AES_KEXPAND2(8, 12, 14)
309	AES_KEXPAND2(10, 14, 16)
310	AES_KEXPAND1(12, 16, 0x2, 18)
311	AES_KEXPAND2(14, 18, 20)
312	AES_KEXPAND2(16, 20, 22)
313	AES_KEXPAND1(18, 22, 0x3, 24)
314	AES_KEXPAND2(20, 24, 26)
315	AES_KEXPAND2(22, 26, 28)
316	AES_KEXPAND1(24, 28, 0x4, 30)
317	AES_KEXPAND2(26, 30, 32)
318	AES_KEXPAND2(28, 32, 34)
319	AES_KEXPAND1(30, 34, 0x5, 36)
320	AES_KEXPAND2(32, 36, 38)
321	AES_KEXPAND2(34, 38, 40)
322	AES_KEXPAND1(36, 40, 0x6, 42)
323	AES_KEXPAND2(38, 42, 44)
324	AES_KEXPAND2(40, 44, 46)
325	AES_KEXPAND1(42, 46, 0x7, 48)
326	AES_KEXPAND2(44, 48, 50)
327
328	std	%f6, [%o1 + 0x00]
329	std	%f8, [%o1 + 0x08]
330	std	%f10, [%o1 + 0x10]
331	std	%f12, [%o1 + 0x18]
332	std	%f14, [%o1 + 0x20]
333	std	%f16, [%o1 + 0x28]
334	std	%f18, [%o1 + 0x30]
335	std	%f20, [%o1 + 0x38]
336	std	%f22, [%o1 + 0x40]
337	std	%f24, [%o1 + 0x48]
338	std	%f26, [%o1 + 0x50]
339	std	%f28, [%o1 + 0x58]
340	std	%f30, [%o1 + 0x60]
341	std	%f32, [%o1 + 0x68]
342	std	%f34, [%o1 + 0x70]
343	std	%f36, [%o1 + 0x78]
344	std	%f38, [%o1 + 0x80]
345	std	%f40, [%o1 + 0x88]
346	std	%f42, [%o1 + 0x90]
347	std	%f44, [%o1 + 0x98]
348	std	%f46, [%o1 + 0xa0]
349	std	%f48, [%o1 + 0xa8]
350	ba,pt	%xcc, 80f
351	 std	%f50, [%o1 + 0xb0]
352
3532:
354	/* 128-bit key expansion */
355	AES_KEXPAND1(0, 2, 0x0, 4)
356	AES_KEXPAND2(2, 4, 6)
357	AES_KEXPAND1(4, 6, 0x1, 8)
358	AES_KEXPAND2(6, 8, 10)
359	AES_KEXPAND1(8, 10, 0x2, 12)
360	AES_KEXPAND2(10, 12, 14)
361	AES_KEXPAND1(12, 14, 0x3, 16)
362	AES_KEXPAND2(14, 16, 18)
363	AES_KEXPAND1(16, 18, 0x4, 20)
364	AES_KEXPAND2(18, 20, 22)
365	AES_KEXPAND1(20, 22, 0x5, 24)
366	AES_KEXPAND2(22, 24, 26)
367	AES_KEXPAND1(24, 26, 0x6, 28)
368	AES_KEXPAND2(26, 28, 30)
369	AES_KEXPAND1(28, 30, 0x7, 32)
370	AES_KEXPAND2(30, 32, 34)
371	AES_KEXPAND1(32, 34, 0x8, 36)
372	AES_KEXPAND2(34, 36, 38)
373	AES_KEXPAND1(36, 38, 0x9, 40)
374	AES_KEXPAND2(38, 40, 42)
375
376	std	%f4, [%o1 + 0x00]
377	std	%f6, [%o1 + 0x08]
378	std	%f8, [%o1 + 0x10]
379	std	%f10, [%o1 + 0x18]
380	std	%f12, [%o1 + 0x20]
381	std	%f14, [%o1 + 0x28]
382	std	%f16, [%o1 + 0x30]
383	std	%f18, [%o1 + 0x38]
384	std	%f20, [%o1 + 0x40]
385	std	%f22, [%o1 + 0x48]
386	std	%f24, [%o1 + 0x50]
387	std	%f26, [%o1 + 0x58]
388	std	%f28, [%o1 + 0x60]
389	std	%f30, [%o1 + 0x68]
390	std	%f32, [%o1 + 0x70]
391	std	%f34, [%o1 + 0x78]
392	std	%f36, [%o1 + 0x80]
393	std	%f38, [%o1 + 0x88]
394	std	%f40, [%o1 + 0x90]
395	std	%f42, [%o1 + 0x98]
39680:
397	retl
398	 VISExit
399ENDPROC(aes_sparc64_key_expand)
400
401	.align		32
402ENTRY(aes_sparc64_encrypt_128)
403	/* %o0=key, %o1=input, %o2=output */
404	VISEntry
405	ld		[%o1 + 0x00], %f4
406	ld		[%o1 + 0x04], %f5
407	ld		[%o1 + 0x08], %f6
408	ld		[%o1 + 0x0c], %f7
409	ldd		[%o0 + 0x00], %f8
410	ldd		[%o0 + 0x08], %f10
411	ldd		[%o0 + 0x10], %f12
412	ldd		[%o0 + 0x18], %f14
413	ldd		[%o0 + 0x20], %f16
414	ldd		[%o0 + 0x28], %f18
415	ldd		[%o0 + 0x30], %f20
416	ldd		[%o0 + 0x38], %f22
417	ldd		[%o0 + 0x40], %f24
418	ldd		[%o0 + 0x48], %f26
419	ldd		[%o0 + 0x50], %f28
420	ldd		[%o0 + 0x58], %f30
421	ldd		[%o0 + 0x60], %f32
422	ldd		[%o0 + 0x68], %f34
423	ldd		[%o0 + 0x70], %f36
424	ldd		[%o0 + 0x78], %f38
425	ldd		[%o0 + 0x80], %f40
426	ldd		[%o0 + 0x88], %f42
427	ldd		[%o0 + 0x90], %f44
428	ldd		[%o0 + 0x98], %f46
429	ldd		[%o0 + 0xa0], %f48
430	ldd		[%o0 + 0xa8], %f50
431	fxor		%f8, %f4, %f4
432	fxor		%f10, %f6, %f6
433	ENCRYPT_128(12, 4, 6, 0, 2)
434	st		%f4, [%o2 + 0x00]
435	st		%f5, [%o2 + 0x04]
436	st		%f6, [%o2 + 0x08]
437	st		%f7, [%o2 + 0x0c]
438	retl
439	 VISExit
440ENDPROC(aes_sparc64_encrypt_128)
441
442	.align		32
443ENTRY(aes_sparc64_encrypt_192)
444	/* %o0=key, %o1=input, %o2=output */
445	VISEntry
446	ld		[%o1 + 0x00], %f4
447	ld		[%o1 + 0x04], %f5
448	ld		[%o1 + 0x08], %f6
449	ld		[%o1 + 0x0c], %f7
450
451	ldd		[%o0 + 0x00], %f8
452	ldd		[%o0 + 0x08], %f10
453
454	fxor		%f8, %f4, %f4
455	fxor		%f10, %f6, %f6
456
457	ldd		[%o0 + 0x10], %f8
458	ldd		[%o0 + 0x18], %f10
459	ldd		[%o0 + 0x20], %f12
460	ldd		[%o0 + 0x28], %f14
461	add		%o0, 0x20, %o0
462
463	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
464
465	ldd		[%o0 + 0x10], %f12
466	ldd		[%o0 + 0x18], %f14
467	ldd		[%o0 + 0x20], %f16
468	ldd		[%o0 + 0x28], %f18
469	ldd		[%o0 + 0x30], %f20
470	ldd		[%o0 + 0x38], %f22
471	ldd		[%o0 + 0x40], %f24
472	ldd		[%o0 + 0x48], %f26
473	ldd		[%o0 + 0x50], %f28
474	ldd		[%o0 + 0x58], %f30
475	ldd		[%o0 + 0x60], %f32
476	ldd		[%o0 + 0x68], %f34
477	ldd		[%o0 + 0x70], %f36
478	ldd		[%o0 + 0x78], %f38
479	ldd		[%o0 + 0x80], %f40
480	ldd		[%o0 + 0x88], %f42
481	ldd		[%o0 + 0x90], %f44
482	ldd		[%o0 + 0x98], %f46
483	ldd		[%o0 + 0xa0], %f48
484	ldd		[%o0 + 0xa8], %f50
485
486
487	ENCRYPT_128(12, 4, 6, 0, 2)
488
489	st		%f4, [%o2 + 0x00]
490	st		%f5, [%o2 + 0x04]
491	st		%f6, [%o2 + 0x08]
492	st		%f7, [%o2 + 0x0c]
493
494	retl
495	 VISExit
496ENDPROC(aes_sparc64_encrypt_192)
497
498	.align		32
499ENTRY(aes_sparc64_encrypt_256)
500	/* %o0=key, %o1=input, %o2=output */
501	VISEntry
502	ld		[%o1 + 0x00], %f4
503	ld		[%o1 + 0x04], %f5
504	ld		[%o1 + 0x08], %f6
505	ld		[%o1 + 0x0c], %f7
506
507	ldd		[%o0 + 0x00], %f8
508	ldd		[%o0 + 0x08], %f10
509
510	fxor		%f8, %f4, %f4
511	fxor		%f10, %f6, %f6
512
513	ldd		[%o0 + 0x10], %f8
514
515	ldd		[%o0 + 0x18], %f10
516	ldd		[%o0 + 0x20], %f12
517	ldd		[%o0 + 0x28], %f14
518	add		%o0, 0x20, %o0
519
520	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
521
522	ldd		[%o0 + 0x10], %f8
523
524	ldd		[%o0 + 0x18], %f10
525	ldd		[%o0 + 0x20], %f12
526	ldd		[%o0 + 0x28], %f14
527	add		%o0, 0x20, %o0
528
529	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
530
531	ldd		[%o0 + 0x10], %f12
532	ldd		[%o0 + 0x18], %f14
533	ldd		[%o0 + 0x20], %f16
534	ldd		[%o0 + 0x28], %f18
535	ldd		[%o0 + 0x30], %f20
536	ldd		[%o0 + 0x38], %f22
537	ldd		[%o0 + 0x40], %f24
538	ldd		[%o0 + 0x48], %f26
539	ldd		[%o0 + 0x50], %f28
540	ldd		[%o0 + 0x58], %f30
541	ldd		[%o0 + 0x60], %f32
542	ldd		[%o0 + 0x68], %f34
543	ldd		[%o0 + 0x70], %f36
544	ldd		[%o0 + 0x78], %f38
545	ldd		[%o0 + 0x80], %f40
546	ldd		[%o0 + 0x88], %f42
547	ldd		[%o0 + 0x90], %f44
548	ldd		[%o0 + 0x98], %f46
549	ldd		[%o0 + 0xa0], %f48
550	ldd		[%o0 + 0xa8], %f50
551
552	ENCRYPT_128(12, 4, 6, 0, 2)
553
554	st		%f4, [%o2 + 0x00]
555	st		%f5, [%o2 + 0x04]
556	st		%f6, [%o2 + 0x08]
557	st		%f7, [%o2 + 0x0c]
558
559	retl
560	 VISExit
561ENDPROC(aes_sparc64_encrypt_256)
562
563	.align		32
564ENTRY(aes_sparc64_decrypt_128)
565	/* %o0=key, %o1=input, %o2=output */
566	VISEntry
567	ld		[%o1 + 0x00], %f4
568	ld		[%o1 + 0x04], %f5
569	ld		[%o1 + 0x08], %f6
570	ld		[%o1 + 0x0c], %f7
571	ldd		[%o0 + 0xa0], %f8
572	ldd		[%o0 + 0xa8], %f10
573	ldd		[%o0 + 0x98], %f12
574	ldd		[%o0 + 0x90], %f14
575	ldd		[%o0 + 0x88], %f16
576	ldd		[%o0 + 0x80], %f18
577	ldd		[%o0 + 0x78], %f20
578	ldd		[%o0 + 0x70], %f22
579	ldd		[%o0 + 0x68], %f24
580	ldd		[%o0 + 0x60], %f26
581	ldd		[%o0 + 0x58], %f28
582	ldd		[%o0 + 0x50], %f30
583	ldd		[%o0 + 0x48], %f32
584	ldd		[%o0 + 0x40], %f34
585	ldd		[%o0 + 0x38], %f36
586	ldd		[%o0 + 0x30], %f38
587	ldd		[%o0 + 0x28], %f40
588	ldd		[%o0 + 0x20], %f42
589	ldd		[%o0 + 0x18], %f44
590	ldd		[%o0 + 0x10], %f46
591	ldd		[%o0 + 0x08], %f48
592	ldd		[%o0 + 0x00], %f50
593	fxor		%f8, %f4, %f4
594	fxor		%f10, %f6, %f6
595	DECRYPT_128(12, 4, 6, 0, 2)
596	st		%f4, [%o2 + 0x00]
597	st		%f5, [%o2 + 0x04]
598	st		%f6, [%o2 + 0x08]
599	st		%f7, [%o2 + 0x0c]
600	retl
601	 VISExit
602ENDPROC(aes_sparc64_decrypt_128)
603
604	.align		32
605ENTRY(aes_sparc64_decrypt_192)
606	/* %o0=key, %o1=input, %o2=output */
607	VISEntry
608	ld		[%o1 + 0x00], %f4
609	ld		[%o1 + 0x04], %f5
610	ld		[%o1 + 0x08], %f6
611	ld		[%o1 + 0x0c], %f7
612	ldd		[%o0 + 0xc0], %f8
613	ldd		[%o0 + 0xc8], %f10
614	ldd		[%o0 + 0xb8], %f12
615	ldd		[%o0 + 0xb0], %f14
616	ldd		[%o0 + 0xa8], %f16
617	ldd		[%o0 + 0xa0], %f18
618	fxor		%f8, %f4, %f4
619	fxor		%f10, %f6, %f6
620	ldd		[%o0 + 0x98], %f20
621	ldd		[%o0 + 0x90], %f22
622	ldd		[%o0 + 0x88], %f24
623	ldd		[%o0 + 0x80], %f26
624	DECRYPT_TWO_ROUNDS(12, 4, 6, 0, 2)
625	ldd		[%o0 + 0x78], %f28
626	ldd		[%o0 + 0x70], %f30
627	ldd		[%o0 + 0x68], %f32
628	ldd		[%o0 + 0x60], %f34
629	ldd		[%o0 + 0x58], %f36
630	ldd		[%o0 + 0x50], %f38
631	ldd		[%o0 + 0x48], %f40
632	ldd		[%o0 + 0x40], %f42
633	ldd		[%o0 + 0x38], %f44
634	ldd		[%o0 + 0x30], %f46
635	ldd		[%o0 + 0x28], %f48
636	ldd		[%o0 + 0x20], %f50
637	ldd		[%o0 + 0x18], %f52
638	ldd		[%o0 + 0x10], %f54
639	ldd		[%o0 + 0x08], %f56
640	ldd		[%o0 + 0x00], %f58
641	DECRYPT_128(20, 4, 6, 0, 2)
642	st		%f4, [%o2 + 0x00]
643	st		%f5, [%o2 + 0x04]
644	st		%f6, [%o2 + 0x08]
645	st		%f7, [%o2 + 0x0c]
646	retl
647	 VISExit
648ENDPROC(aes_sparc64_decrypt_192)
649
650	.align		32
651ENTRY(aes_sparc64_decrypt_256)
652	/* %o0=key, %o1=input, %o2=output */
653	VISEntry
654	ld		[%o1 + 0x00], %f4
655	ld		[%o1 + 0x04], %f5
656	ld		[%o1 + 0x08], %f6
657	ld		[%o1 + 0x0c], %f7
658	ldd		[%o0 + 0xe0], %f8
659	ldd		[%o0 + 0xe8], %f10
660	ldd		[%o0 + 0xd8], %f12
661	ldd		[%o0 + 0xd0], %f14
662	ldd		[%o0 + 0xc8], %f16
663	fxor		%f8, %f4, %f4
664	ldd		[%o0 + 0xc0], %f18
665	fxor		%f10, %f6, %f6
666	ldd		[%o0 + 0xb8], %f20
667	AES_DROUND23(12, 4, 6, 2)
668	ldd		[%o0 + 0xb0], %f22
669	AES_DROUND01(14, 4, 6, 0)
670	ldd		[%o0 + 0xa8], %f24
671	AES_DROUND23(16, 0, 2, 6)
672	ldd		[%o0 + 0xa0], %f26
673	AES_DROUND01(18, 0, 2, 4)
674	ldd		[%o0 + 0x98], %f12
675	AES_DROUND23(20, 4, 6, 2)
676	ldd		[%o0 + 0x90], %f14
677	AES_DROUND01(22, 4, 6, 0)
678	ldd		[%o0 + 0x88], %f16
679	AES_DROUND23(24, 0, 2, 6)
680	ldd		[%o0 + 0x80], %f18
681	AES_DROUND01(26, 0, 2, 4)
682	ldd		[%o0 + 0x78], %f20
683	AES_DROUND23(12, 4, 6, 2)
684	ldd		[%o0 + 0x70], %f22
685	AES_DROUND01(14, 4, 6, 0)
686	ldd		[%o0 + 0x68], %f24
687	AES_DROUND23(16, 0, 2, 6)
688	ldd		[%o0 + 0x60], %f26
689	AES_DROUND01(18, 0, 2, 4)
690	ldd		[%o0 + 0x58], %f28
691	AES_DROUND23(20, 4, 6, 2)
692	ldd		[%o0 + 0x50], %f30
693	AES_DROUND01(22, 4, 6, 0)
694	ldd		[%o0 + 0x48], %f32
695	AES_DROUND23(24, 0, 2, 6)
696	ldd		[%o0 + 0x40], %f34
697	AES_DROUND01(26, 0, 2, 4)
698	ldd		[%o0 + 0x38], %f36
699	AES_DROUND23(28, 4, 6, 2)
700	ldd		[%o0 + 0x30], %f38
701	AES_DROUND01(30, 4, 6, 0)
702	ldd		[%o0 + 0x28], %f40
703	AES_DROUND23(32, 0, 2, 6)
704	ldd		[%o0 + 0x20], %f42
705	AES_DROUND01(34, 0, 2, 4)
706	ldd		[%o0 + 0x18], %f44
707	AES_DROUND23(36, 4, 6, 2)
708	ldd		[%o0 + 0x10], %f46
709	AES_DROUND01(38, 4, 6, 0)
710	ldd		[%o0 + 0x08], %f48
711	AES_DROUND23(40, 0, 2, 6)
712	ldd		[%o0 + 0x00], %f50
713	AES_DROUND01(42, 0, 2, 4)
714	AES_DROUND23(44, 4, 6, 2)
715	AES_DROUND01(46, 4, 6, 0)
716	AES_DROUND23_L(48, 0, 2, 6)
717	AES_DROUND01_L(50, 0, 2, 4)
718	st		%f4, [%o2 + 0x00]
719	st		%f5, [%o2 + 0x04]
720	st		%f6, [%o2 + 0x08]
721	st		%f7, [%o2 + 0x0c]
722	retl
723	 VISExit
724ENDPROC(aes_sparc64_decrypt_256)
725
726	.align		32
727ENTRY(aes_sparc64_load_encrypt_keys_128)
728	/* %o0=key */
729	VISEntry
730	ldd		[%o0 + 0x10], %f8
731	ldd		[%o0 + 0x18], %f10
732	ldd		[%o0 + 0x20], %f12
733	ldd		[%o0 + 0x28], %f14
734	ldd		[%o0 + 0x30], %f16
735	ldd		[%o0 + 0x38], %f18
736	ldd		[%o0 + 0x40], %f20
737	ldd		[%o0 + 0x48], %f22
738	ldd		[%o0 + 0x50], %f24
739	ldd		[%o0 + 0x58], %f26
740	ldd		[%o0 + 0x60], %f28
741	ldd		[%o0 + 0x68], %f30
742	ldd		[%o0 + 0x70], %f32
743	ldd		[%o0 + 0x78], %f34
744	ldd		[%o0 + 0x80], %f36
745	ldd		[%o0 + 0x88], %f38
746	ldd		[%o0 + 0x90], %f40
747	ldd		[%o0 + 0x98], %f42
748	ldd		[%o0 + 0xa0], %f44
749	retl
750	 ldd		[%o0 + 0xa8], %f46
751ENDPROC(aes_sparc64_load_encrypt_keys_128)
752
753	.align		32
754ENTRY(aes_sparc64_load_encrypt_keys_192)
755	/* %o0=key */
756	VISEntry
757	ldd		[%o0 + 0x10], %f8
758	ldd		[%o0 + 0x18], %f10
759	ldd		[%o0 + 0x20], %f12
760	ldd		[%o0 + 0x28], %f14
761	ldd		[%o0 + 0x30], %f16
762	ldd		[%o0 + 0x38], %f18
763	ldd		[%o0 + 0x40], %f20
764	ldd		[%o0 + 0x48], %f22
765	ldd		[%o0 + 0x50], %f24
766	ldd		[%o0 + 0x58], %f26
767	ldd		[%o0 + 0x60], %f28
768	ldd		[%o0 + 0x68], %f30
769	ldd		[%o0 + 0x70], %f32
770	ldd		[%o0 + 0x78], %f34
771	ldd		[%o0 + 0x80], %f36
772	ldd		[%o0 + 0x88], %f38
773	ldd		[%o0 + 0x90], %f40
774	ldd		[%o0 + 0x98], %f42
775	ldd		[%o0 + 0xa0], %f44
776	ldd		[%o0 + 0xa8], %f46
777	ldd		[%o0 + 0xb0], %f48
778	ldd		[%o0 + 0xb8], %f50
779	ldd		[%o0 + 0xc0], %f52
780	retl
781	 ldd		[%o0 + 0xc8], %f54
782ENDPROC(aes_sparc64_load_encrypt_keys_192)
783
784	.align		32
785ENTRY(aes_sparc64_load_encrypt_keys_256)
786	/* %o0=key */
787	VISEntry
788	ldd		[%o0 + 0x10], %f8
789	ldd		[%o0 + 0x18], %f10
790	ldd		[%o0 + 0x20], %f12
791	ldd		[%o0 + 0x28], %f14
792	ldd		[%o0 + 0x30], %f16
793	ldd		[%o0 + 0x38], %f18
794	ldd		[%o0 + 0x40], %f20
795	ldd		[%o0 + 0x48], %f22
796	ldd		[%o0 + 0x50], %f24
797	ldd		[%o0 + 0x58], %f26
798	ldd		[%o0 + 0x60], %f28
799	ldd		[%o0 + 0x68], %f30
800	ldd		[%o0 + 0x70], %f32
801	ldd		[%o0 + 0x78], %f34
802	ldd		[%o0 + 0x80], %f36
803	ldd		[%o0 + 0x88], %f38
804	ldd		[%o0 + 0x90], %f40
805	ldd		[%o0 + 0x98], %f42
806	ldd		[%o0 + 0xa0], %f44
807	ldd		[%o0 + 0xa8], %f46
808	ldd		[%o0 + 0xb0], %f48
809	ldd		[%o0 + 0xb8], %f50
810	ldd		[%o0 + 0xc0], %f52
811	ldd		[%o0 + 0xc8], %f54
812	ldd		[%o0 + 0xd0], %f56
813	ldd		[%o0 + 0xd8], %f58
814	ldd		[%o0 + 0xe0], %f60
815	retl
816	 ldd		[%o0 + 0xe8], %f62
817ENDPROC(aes_sparc64_load_encrypt_keys_256)
818
819	.align		32
820ENTRY(aes_sparc64_load_decrypt_keys_128)
821	/* %o0=key */
822	VISEntry
823	ldd		[%o0 + 0x98], %f8
824	ldd		[%o0 + 0x90], %f10
825	ldd		[%o0 + 0x88], %f12
826	ldd		[%o0 + 0x80], %f14
827	ldd		[%o0 + 0x78], %f16
828	ldd		[%o0 + 0x70], %f18
829	ldd		[%o0 + 0x68], %f20
830	ldd		[%o0 + 0x60], %f22
831	ldd		[%o0 + 0x58], %f24
832	ldd		[%o0 + 0x50], %f26
833	ldd		[%o0 + 0x48], %f28
834	ldd		[%o0 + 0x40], %f30
835	ldd		[%o0 + 0x38], %f32
836	ldd		[%o0 + 0x30], %f34
837	ldd		[%o0 + 0x28], %f36
838	ldd		[%o0 + 0x20], %f38
839	ldd		[%o0 + 0x18], %f40
840	ldd		[%o0 + 0x10], %f42
841	ldd		[%o0 + 0x08], %f44
842	retl
843	 ldd		[%o0 + 0x00], %f46
844ENDPROC(aes_sparc64_load_decrypt_keys_128)
845
846	.align		32
847ENTRY(aes_sparc64_load_decrypt_keys_192)
848	/* %o0=key */
849	VISEntry
850	ldd		[%o0 + 0xb8], %f8
851	ldd		[%o0 + 0xb0], %f10
852	ldd		[%o0 + 0xa8], %f12
853	ldd		[%o0 + 0xa0], %f14
854	ldd		[%o0 + 0x98], %f16
855	ldd		[%o0 + 0x90], %f18
856	ldd		[%o0 + 0x88], %f20
857	ldd		[%o0 + 0x80], %f22
858	ldd		[%o0 + 0x78], %f24
859	ldd		[%o0 + 0x70], %f26
860	ldd		[%o0 + 0x68], %f28
861	ldd		[%o0 + 0x60], %f30
862	ldd		[%o0 + 0x58], %f32
863	ldd		[%o0 + 0x50], %f34
864	ldd		[%o0 + 0x48], %f36
865	ldd		[%o0 + 0x40], %f38
866	ldd		[%o0 + 0x38], %f40
867	ldd		[%o0 + 0x30], %f42
868	ldd		[%o0 + 0x28], %f44
869	ldd		[%o0 + 0x20], %f46
870	ldd		[%o0 + 0x18], %f48
871	ldd		[%o0 + 0x10], %f50
872	ldd		[%o0 + 0x08], %f52
873	retl
874	 ldd		[%o0 + 0x00], %f54
875ENDPROC(aes_sparc64_load_decrypt_keys_192)
876
877	.align		32
878ENTRY(aes_sparc64_load_decrypt_keys_256)
879	/* %o0=key */
880	VISEntry
881	ldd		[%o0 + 0xd8], %f8
882	ldd		[%o0 + 0xd0], %f10
883	ldd		[%o0 + 0xc8], %f12
884	ldd		[%o0 + 0xc0], %f14
885	ldd		[%o0 + 0xb8], %f16
886	ldd		[%o0 + 0xb0], %f18
887	ldd		[%o0 + 0xa8], %f20
888	ldd		[%o0 + 0xa0], %f22
889	ldd		[%o0 + 0x98], %f24
890	ldd		[%o0 + 0x90], %f26
891	ldd		[%o0 + 0x88], %f28
892	ldd		[%o0 + 0x80], %f30
893	ldd		[%o0 + 0x78], %f32
894	ldd		[%o0 + 0x70], %f34
895	ldd		[%o0 + 0x68], %f36
896	ldd		[%o0 + 0x60], %f38
897	ldd		[%o0 + 0x58], %f40
898	ldd		[%o0 + 0x50], %f42
899	ldd		[%o0 + 0x48], %f44
900	ldd		[%o0 + 0x40], %f46
901	ldd		[%o0 + 0x38], %f48
902	ldd		[%o0 + 0x30], %f50
903	ldd		[%o0 + 0x28], %f52
904	ldd		[%o0 + 0x20], %f54
905	ldd		[%o0 + 0x18], %f56
906	ldd		[%o0 + 0x10], %f58
907	ldd		[%o0 + 0x08], %f60
908	retl
909	 ldd		[%o0 + 0x00], %f62
910ENDPROC(aes_sparc64_load_decrypt_keys_256)
911
912	.align		32
913ENTRY(aes_sparc64_ecb_encrypt_128)
914	/* %o0=key, %o1=input, %o2=output, %o3=len */
915	ldx		[%o0 + 0x00], %g1
916	subcc		%o3, 0x10, %o3
917	be		10f
918	 ldx		[%o0 + 0x08], %g2
9191:	ldx		[%o1 + 0x00], %g3
920	ldx		[%o1 + 0x08], %g7
921	ldx		[%o1 + 0x10], %o4
922	ldx		[%o1 + 0x18], %o5
923	xor		%g1, %g3, %g3
924	xor		%g2, %g7, %g7
925	MOVXTOD_G3_F4
926	MOVXTOD_G7_F6
927	xor		%g1, %o4, %g3
928	xor		%g2, %o5, %g7
929	MOVXTOD_G3_F60
930	MOVXTOD_G7_F62
931	ENCRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
932	std		%f4, [%o2 + 0x00]
933	std		%f6, [%o2 + 0x08]
934	std		%f60, [%o2 + 0x10]
935	std		%f62, [%o2 + 0x18]
936	sub		%o3, 0x20, %o3
937	add		%o1, 0x20, %o1
938	brgz		%o3, 1b
939	 add		%o2, 0x20, %o2
940	brlz,pt		%o3, 11f
941	 nop
94210:	ldx		[%o1 + 0x00], %g3
943	ldx		[%o1 + 0x08], %g7
944	xor		%g1, %g3, %g3
945	xor		%g2, %g7, %g7
946	MOVXTOD_G3_F4
947	MOVXTOD_G7_F6
948	ENCRYPT_128(8, 4, 6, 0, 2)
949	std		%f4, [%o2 + 0x00]
950	std		%f6, [%o2 + 0x08]
95111:	retl
952	 nop
953ENDPROC(aes_sparc64_ecb_encrypt_128)
954
955	.align		32
956ENTRY(aes_sparc64_ecb_encrypt_192)
957	/* %o0=key, %o1=input, %o2=output, %o3=len */
958	ldx		[%o0 + 0x00], %g1
959	subcc		%o3, 0x10, %o3
960	be		10f
961	 ldx		[%o0 + 0x08], %g2
9621:	ldx		[%o1 + 0x00], %g3
963	ldx		[%o1 + 0x08], %g7
964	ldx		[%o1 + 0x10], %o4
965	ldx		[%o1 + 0x18], %o5
966	xor		%g1, %g3, %g3
967	xor		%g2, %g7, %g7
968	MOVXTOD_G3_F4
969	MOVXTOD_G7_F6
970	xor		%g1, %o4, %g3
971	xor		%g2, %o5, %g7
972	MOVXTOD_G3_F60
973	MOVXTOD_G7_F62
974	ENCRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
975	std		%f4, [%o2 + 0x00]
976	std		%f6, [%o2 + 0x08]
977	std		%f60, [%o2 + 0x10]
978	std		%f62, [%o2 + 0x18]
979	sub		%o3, 0x20, %o3
980	add		%o1, 0x20, %o1
981	brgz		%o3, 1b
982	 add		%o2, 0x20, %o2
983	brlz,pt		%o3, 11f
984	 nop
98510:	ldx		[%o1 + 0x00], %g3
986	ldx		[%o1 + 0x08], %g7
987	xor		%g1, %g3, %g3
988	xor		%g2, %g7, %g7
989	MOVXTOD_G3_F4
990	MOVXTOD_G7_F6
991	ENCRYPT_192(8, 4, 6, 0, 2)
992	std		%f4, [%o2 + 0x00]
993	std		%f6, [%o2 + 0x08]
99411:	retl
995	 nop
996ENDPROC(aes_sparc64_ecb_encrypt_192)
997
998	.align		32
999ENTRY(aes_sparc64_ecb_encrypt_256)
1000	/* %o0=key, %o1=input, %o2=output, %o3=len */
1001	ldx		[%o0 + 0x00], %g1
1002	subcc		%o3, 0x10, %o3
1003	be		10f
1004	 ldx		[%o0 + 0x08], %g2
10051:	ldx		[%o1 + 0x00], %g3
1006	ldx		[%o1 + 0x08], %g7
1007	ldx		[%o1 + 0x10], %o4
1008	ldx		[%o1 + 0x18], %o5
1009	xor		%g1, %g3, %g3
1010	xor		%g2, %g7, %g7
1011	MOVXTOD_G3_F4
1012	MOVXTOD_G7_F6
1013	xor		%g1, %o4, %g3
1014	xor		%g2, %o5, %g7
1015	MOVXTOD_G3_F0
1016	MOVXTOD_G7_F2
1017	ENCRYPT_256_2(8, 4, 6, 0, 2)
1018	std		%f4, [%o2 + 0x00]
1019	std		%f6, [%o2 + 0x08]
1020	std		%f0, [%o2 + 0x10]
1021	std		%f2, [%o2 + 0x18]
1022	sub		%o3, 0x20, %o3
1023	add		%o1, 0x20, %o1
1024	brgz		%o3, 1b
1025	 add		%o2, 0x20, %o2
1026	brlz,pt		%o3, 11f
1027	 nop
102810:	ldd		[%o0 + 0xd0], %f56
1029	ldd		[%o0 + 0xd8], %f58
1030	ldd		[%o0 + 0xe0], %f60
1031	ldd		[%o0 + 0xe8], %f62
1032	ldx		[%o1 + 0x00], %g3
1033	ldx		[%o1 + 0x08], %g7
1034	xor		%g1, %g3, %g3
1035	xor		%g2, %g7, %g7
1036	MOVXTOD_G3_F4
1037	MOVXTOD_G7_F6
1038	ENCRYPT_256(8, 4, 6, 0, 2)
1039	std		%f4, [%o2 + 0x00]
1040	std		%f6, [%o2 + 0x08]
104111:	retl
1042	 nop
1043ENDPROC(aes_sparc64_ecb_encrypt_256)
1044
1045	.align		32
1046ENTRY(aes_sparc64_ecb_decrypt_128)
1047	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
1048	ldx		[%o0 - 0x10], %g1
1049	subcc		%o3, 0x10, %o3
1050	be		10f
1051	 ldx		[%o0 - 0x08], %g2
10521:	ldx		[%o1 + 0x00], %g3
1053	ldx		[%o1 + 0x08], %g7
1054	ldx		[%o1 + 0x10], %o4
1055	ldx		[%o1 + 0x18], %o5
1056	xor		%g1, %g3, %g3
1057	xor		%g2, %g7, %g7
1058	MOVXTOD_G3_F4
1059	MOVXTOD_G7_F6
1060	xor		%g1, %o4, %g3
1061	xor		%g2, %o5, %g7
1062	MOVXTOD_G3_F60
1063	MOVXTOD_G7_F62
1064	DECRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
1065	std		%f4, [%o2 + 0x00]
1066	std		%f6, [%o2 + 0x08]
1067	std		%f60, [%o2 + 0x10]
1068	std		%f62, [%o2 + 0x18]
1069	sub		%o3, 0x20, %o3
1070	add		%o1, 0x20, %o1
1071	brgz,pt		%o3, 1b
1072	 add		%o2, 0x20, %o2
1073	brlz,pt		%o3, 11f
1074	 nop
107510:	ldx		[%o1 + 0x00], %g3
1076	ldx		[%o1 + 0x08], %g7
1077	xor		%g1, %g3, %g3
1078	xor		%g2, %g7, %g7
1079	MOVXTOD_G3_F4
1080	MOVXTOD_G7_F6
1081	DECRYPT_128(8, 4, 6, 0, 2)
1082	std		%f4, [%o2 + 0x00]
1083	std		%f6, [%o2 + 0x08]
108411:	retl
1085	 nop
1086ENDPROC(aes_sparc64_ecb_decrypt_128)
1087
1088	.align		32
1089ENTRY(aes_sparc64_ecb_decrypt_192)
1090	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
1091	ldx		[%o0 - 0x10], %g1
1092	subcc		%o3, 0x10, %o3
1093	be		10f
1094	 ldx		[%o0 - 0x08], %g2
10951:	ldx		[%o1 + 0x00], %g3
1096	ldx		[%o1 + 0x08], %g7
1097	ldx		[%o1 + 0x10], %o4
1098	ldx		[%o1 + 0x18], %o5
1099	xor		%g1, %g3, %g3
1100	xor		%g2, %g7, %g7
1101	MOVXTOD_G3_F4
1102	MOVXTOD_G7_F6
1103	xor		%g1, %o4, %g3
1104	xor		%g2, %o5, %g7
1105	MOVXTOD_G3_F60
1106	MOVXTOD_G7_F62
1107	DECRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
1108	std		%f4, [%o2 + 0x00]
1109	std		%f6, [%o2 + 0x08]
1110	std		%f60, [%o2 + 0x10]
1111	std		%f62, [%o2 + 0x18]
1112	sub		%o3, 0x20, %o3
1113	add		%o1, 0x20, %o1
1114	brgz,pt		%o3, 1b
1115	 add		%o2, 0x20, %o2
1116	brlz,pt		%o3, 11f
1117	 nop
111810:	ldx		[%o1 + 0x00], %g3
1119	ldx		[%o1 + 0x08], %g7
1120	xor		%g1, %g3, %g3
1121	xor		%g2, %g7, %g7
1122	MOVXTOD_G3_F4
1123	MOVXTOD_G7_F6
1124	DECRYPT_192(8, 4, 6, 0, 2)
1125	std		%f4, [%o2 + 0x00]
1126	std		%f6, [%o2 + 0x08]
112711:	retl
1128	 nop
1129ENDPROC(aes_sparc64_ecb_decrypt_192)
1130
1131	.align		32
1132ENTRY(aes_sparc64_ecb_decrypt_256)
1133	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
1134	ldx		[%o0 - 0x10], %g1
1135	subcc		%o3, 0x10, %o3
1136	ldx		[%o0 - 0x08], %g2
1137	be		10f
1138	 sub		%o0, 0xf0, %o0
11391:	ldx		[%o1 + 0x00], %g3
1140	ldx		[%o1 + 0x08], %g7
1141	ldx		[%o1 + 0x10], %o4
1142	ldx		[%o1 + 0x18], %o5
1143	xor		%g1, %g3, %g3
1144	xor		%g2, %g7, %g7
1145	MOVXTOD_G3_F4
1146	MOVXTOD_G7_F6
1147	xor		%g1, %o4, %g3
1148	xor		%g2, %o5, %g7
1149	MOVXTOD_G3_F0
1150	MOVXTOD_G7_F2
1151	DECRYPT_256_2(8, 4, 6, 0, 2)
1152	std		%f4, [%o2 + 0x00]
1153	std		%f6, [%o2 + 0x08]
1154	std		%f0, [%o2 + 0x10]
1155	std		%f2, [%o2 + 0x18]
1156	sub		%o3, 0x20, %o3
1157	add		%o1, 0x20, %o1
1158	brgz,pt		%o3, 1b
1159	 add		%o2, 0x20, %o2
1160	brlz,pt		%o3, 11f
1161	 nop
116210:	ldd		[%o0 + 0x18], %f56
1163	ldd		[%o0 + 0x10], %f58
1164	ldd		[%o0 + 0x08], %f60
1165	ldd		[%o0 + 0x00], %f62
1166	ldx		[%o1 + 0x00], %g3
1167	ldx		[%o1 + 0x08], %g7
1168	xor		%g1, %g3, %g3
1169	xor		%g2, %g7, %g7
1170	MOVXTOD_G3_F4
1171	MOVXTOD_G7_F6
1172	DECRYPT_256(8, 4, 6, 0, 2)
1173	std		%f4, [%o2 + 0x00]
1174	std		%f6, [%o2 + 0x08]
117511:	retl
1176	 nop
1177ENDPROC(aes_sparc64_ecb_decrypt_256)
1178
1179	.align		32
1180ENTRY(aes_sparc64_cbc_encrypt_128)
1181	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1182	ldd		[%o4 + 0x00], %f4
1183	ldd		[%o4 + 0x08], %f6
1184	ldx		[%o0 + 0x00], %g1
1185	ldx		[%o0 + 0x08], %g2
11861:	ldx		[%o1 + 0x00], %g3
1187	ldx		[%o1 + 0x08], %g7
1188	add		%o1, 0x10, %o1
1189	xor		%g1, %g3, %g3
1190	xor		%g2, %g7, %g7
1191	MOVXTOD_G3_F0
1192	MOVXTOD_G7_F2
1193	fxor		%f4, %f0, %f4
1194	fxor		%f6, %f2, %f6
1195	ENCRYPT_128(8, 4, 6, 0, 2)
1196	std		%f4, [%o2 + 0x00]
1197	std		%f6, [%o2 + 0x08]
1198	subcc		%o3, 0x10, %o3
1199	bne,pt		%xcc, 1b
1200	 add		%o2, 0x10, %o2
1201	std		%f4, [%o4 + 0x00]
1202	std		%f6, [%o4 + 0x08]
1203	retl
1204	 nop
1205ENDPROC(aes_sparc64_cbc_encrypt_128)
1206
1207	.align		32
1208ENTRY(aes_sparc64_cbc_encrypt_192)
1209	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1210	ldd		[%o4 + 0x00], %f4
1211	ldd		[%o4 + 0x08], %f6
1212	ldx		[%o0 + 0x00], %g1
1213	ldx		[%o0 + 0x08], %g2
12141:	ldx		[%o1 + 0x00], %g3
1215	ldx		[%o1 + 0x08], %g7
1216	add		%o1, 0x10, %o1
1217	xor		%g1, %g3, %g3
1218	xor		%g2, %g7, %g7
1219	MOVXTOD_G3_F0
1220	MOVXTOD_G7_F2
1221	fxor		%f4, %f0, %f4
1222	fxor		%f6, %f2, %f6
1223	ENCRYPT_192(8, 4, 6, 0, 2)
1224	std		%f4, [%o2 + 0x00]
1225	std		%f6, [%o2 + 0x08]
1226	subcc		%o3, 0x10, %o3
1227	bne,pt		%xcc, 1b
1228	 add		%o2, 0x10, %o2
1229	std		%f4, [%o4 + 0x00]
1230	std		%f6, [%o4 + 0x08]
1231	retl
1232	 nop
1233ENDPROC(aes_sparc64_cbc_encrypt_192)
1234
1235	.align		32
1236ENTRY(aes_sparc64_cbc_encrypt_256)
1237	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1238	ldd		[%o4 + 0x00], %f4
1239	ldd		[%o4 + 0x08], %f6
1240	ldx		[%o0 + 0x00], %g1
1241	ldx		[%o0 + 0x08], %g2
12421:	ldx		[%o1 + 0x00], %g3
1243	ldx		[%o1 + 0x08], %g7
1244	add		%o1, 0x10, %o1
1245	xor		%g1, %g3, %g3
1246	xor		%g2, %g7, %g7
1247	MOVXTOD_G3_F0
1248	MOVXTOD_G7_F2
1249	fxor		%f4, %f0, %f4
1250	fxor		%f6, %f2, %f6
1251	ENCRYPT_256(8, 4, 6, 0, 2)
1252	std		%f4, [%o2 + 0x00]
1253	std		%f6, [%o2 + 0x08]
1254	subcc		%o3, 0x10, %o3
1255	bne,pt		%xcc, 1b
1256	 add		%o2, 0x10, %o2
1257	std		%f4, [%o4 + 0x00]
1258	std		%f6, [%o4 + 0x08]
1259	retl
1260	 nop
1261ENDPROC(aes_sparc64_cbc_encrypt_256)
1262
1263	.align		32
1264ENTRY(aes_sparc64_cbc_decrypt_128)
1265	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
1266	ldx		[%o0 - 0x10], %g1
1267	ldx		[%o0 - 0x08], %g2
1268	ldx		[%o4 + 0x00], %o0
1269	ldx		[%o4 + 0x08], %o5
12701:	ldx		[%o1 + 0x00], %g3
1271	ldx		[%o1 + 0x08], %g7
1272	add		%o1, 0x10, %o1
1273	xor		%g1, %g3, %g3
1274	xor		%g2, %g7, %g7
1275	MOVXTOD_G3_F4
1276	MOVXTOD_G7_F6
1277	DECRYPT_128(8, 4, 6, 0, 2)
1278	MOVXTOD_O0_F0
1279	MOVXTOD_O5_F2
1280	xor		%g1, %g3, %o0
1281	xor		%g2, %g7, %o5
1282	fxor		%f4, %f0, %f4
1283	fxor		%f6, %f2, %f6
1284	std		%f4, [%o2 + 0x00]
1285	std		%f6, [%o2 + 0x08]
1286	subcc		%o3, 0x10, %o3
1287	bne,pt		%xcc, 1b
1288	 add		%o2, 0x10, %o2
1289	stx		%o0, [%o4 + 0x00]
1290	stx		%o5, [%o4 + 0x08]
1291	retl
1292	 nop
1293ENDPROC(aes_sparc64_cbc_decrypt_128)
1294
1295	.align		32
1296ENTRY(aes_sparc64_cbc_decrypt_192)
1297	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
1298	ldx		[%o0 - 0x10], %g1
1299	ldx		[%o0 - 0x08], %g2
1300	ldx		[%o4 + 0x00], %o0
1301	ldx		[%o4 + 0x08], %o5
13021:	ldx		[%o1 + 0x00], %g3
1303	ldx		[%o1 + 0x08], %g7
1304	add		%o1, 0x10, %o1
1305	xor		%g1, %g3, %g3
1306	xor		%g2, %g7, %g7
1307	MOVXTOD_G3_F4
1308	MOVXTOD_G7_F6
1309	DECRYPT_192(8, 4, 6, 0, 2)
1310	MOVXTOD_O0_F0
1311	MOVXTOD_O5_F2
1312	xor		%g1, %g3, %o0
1313	xor		%g2, %g7, %o5
1314	fxor		%f4, %f0, %f4
1315	fxor		%f6, %f2, %f6
1316	std		%f4, [%o2 + 0x00]
1317	std		%f6, [%o2 + 0x08]
1318	subcc		%o3, 0x10, %o3
1319	bne,pt		%xcc, 1b
1320	 add		%o2, 0x10, %o2
1321	stx		%o0, [%o4 + 0x00]
1322	stx		%o5, [%o4 + 0x08]
1323	retl
1324	 nop
1325ENDPROC(aes_sparc64_cbc_decrypt_192)
1326
1327	.align		32
1328ENTRY(aes_sparc64_cbc_decrypt_256)
1329	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
1330	ldx		[%o0 - 0x10], %g1
1331	ldx		[%o0 - 0x08], %g2
1332	ldx		[%o4 + 0x00], %o0
1333	ldx		[%o4 + 0x08], %o5
13341:	ldx		[%o1 + 0x00], %g3
1335	ldx		[%o1 + 0x08], %g7
1336	add		%o1, 0x10, %o1
1337	xor		%g1, %g3, %g3
1338	xor		%g2, %g7, %g7
1339	MOVXTOD_G3_F4
1340	MOVXTOD_G7_F6
1341	DECRYPT_256(8, 4, 6, 0, 2)
1342	MOVXTOD_O0_F0
1343	MOVXTOD_O5_F2
1344	xor		%g1, %g3, %o0
1345	xor		%g2, %g7, %o5
1346	fxor		%f4, %f0, %f4
1347	fxor		%f6, %f2, %f6
1348	std		%f4, [%o2 + 0x00]
1349	std		%f6, [%o2 + 0x08]
1350	subcc		%o3, 0x10, %o3
1351	bne,pt		%xcc, 1b
1352	 add		%o2, 0x10, %o2
1353	stx		%o0, [%o4 + 0x00]
1354	stx		%o5, [%o4 + 0x08]
1355	retl
1356	 nop
1357ENDPROC(aes_sparc64_cbc_decrypt_256)
1358
1359	.align		32
1360ENTRY(aes_sparc64_ctr_crypt_128)
1361	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1362	ldx		[%o4 + 0x00], %g3
1363	ldx		[%o4 + 0x08], %g7
1364	subcc		%o3, 0x10, %o3
1365	ldx		[%o0 + 0x00], %g1
1366	be		10f
1367	 ldx		[%o0 + 0x08], %g2
13681:	xor		%g1, %g3, %o5
1369	MOVXTOD_O5_F0
1370	xor		%g2, %g7, %o5
1371	MOVXTOD_O5_F2
1372	add		%g7, 1, %g7
1373	add		%g3, 1, %o5
1374	movrz		%g7, %o5, %g3
1375	xor		%g1, %g3, %o5
1376	MOVXTOD_O5_F4
1377	xor		%g2, %g7, %o5
1378	MOVXTOD_O5_F6
1379	add		%g7, 1, %g7
1380	add		%g3, 1, %o5
1381	movrz		%g7, %o5, %g3
1382	ENCRYPT_128_2(8, 0, 2, 4, 6, 56, 58, 60, 62)
1383	ldd		[%o1 + 0x00], %f56
1384	ldd		[%o1 + 0x08], %f58
1385	ldd		[%o1 + 0x10], %f60
1386	ldd		[%o1 + 0x18], %f62
1387	fxor		%f56, %f0, %f56
1388	fxor		%f58, %f2, %f58
1389	fxor		%f60, %f4, %f60
1390	fxor		%f62, %f6, %f62
1391	std		%f56, [%o2 + 0x00]
1392	std		%f58, [%o2 + 0x08]
1393	std		%f60, [%o2 + 0x10]
1394	std		%f62, [%o2 + 0x18]
1395	subcc		%o3, 0x20, %o3
1396	add		%o1, 0x20, %o1
1397	brgz		%o3, 1b
1398	 add		%o2, 0x20, %o2
1399	brlz,pt		%o3, 11f
1400	 nop
140110:	xor		%g1, %g3, %o5
1402	MOVXTOD_O5_F0
1403	xor		%g2, %g7, %o5
1404	MOVXTOD_O5_F2
1405	add		%g7, 1, %g7
1406	add		%g3, 1, %o5
1407	movrz		%g7, %o5, %g3
1408	ENCRYPT_128(8, 0, 2, 4, 6)
1409	ldd		[%o1 + 0x00], %f4
1410	ldd		[%o1 + 0x08], %f6
1411	fxor		%f4, %f0, %f4
1412	fxor		%f6, %f2, %f6
1413	std		%f4, [%o2 + 0x00]
1414	std		%f6, [%o2 + 0x08]
141511:	stx		%g3, [%o4 + 0x00]
1416	retl
1417	 stx		%g7, [%o4 + 0x08]
1418ENDPROC(aes_sparc64_ctr_crypt_128)
1419
1420	.align		32
1421ENTRY(aes_sparc64_ctr_crypt_192)
1422	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1423	ldx		[%o4 + 0x00], %g3
1424	ldx		[%o4 + 0x08], %g7
1425	subcc		%o3, 0x10, %o3
1426	ldx		[%o0 + 0x00], %g1
1427	be		10f
1428	 ldx		[%o0 + 0x08], %g2
14291:	xor		%g1, %g3, %o5
1430	MOVXTOD_O5_F0
1431	xor		%g2, %g7, %o5
1432	MOVXTOD_O5_F2
1433	add		%g7, 1, %g7
1434	add		%g3, 1, %o5
1435	movrz		%g7, %o5, %g3
1436	xor		%g1, %g3, %o5
1437	MOVXTOD_O5_F4
1438	xor		%g2, %g7, %o5
1439	MOVXTOD_O5_F6
1440	add		%g7, 1, %g7
1441	add		%g3, 1, %o5
1442	movrz		%g7, %o5, %g3
1443	ENCRYPT_192_2(8, 0, 2, 4, 6, 56, 58, 60, 62)
1444	ldd		[%o1 + 0x00], %f56
1445	ldd		[%o1 + 0x08], %f58
1446	ldd		[%o1 + 0x10], %f60
1447	ldd		[%o1 + 0x18], %f62
1448	fxor		%f56, %f0, %f56
1449	fxor		%f58, %f2, %f58
1450	fxor		%f60, %f4, %f60
1451	fxor		%f62, %f6, %f62
1452	std		%f56, [%o2 + 0x00]
1453	std		%f58, [%o2 + 0x08]
1454	std		%f60, [%o2 + 0x10]
1455	std		%f62, [%o2 + 0x18]
1456	subcc		%o3, 0x20, %o3
1457	add		%o1, 0x20, %o1
1458	brgz		%o3, 1b
1459	 add		%o2, 0x20, %o2
1460	brlz,pt		%o3, 11f
1461	 nop
146210:	xor		%g1, %g3, %o5
1463	MOVXTOD_O5_F0
1464	xor		%g2, %g7, %o5
1465	MOVXTOD_O5_F2
1466	add		%g7, 1, %g7
1467	add		%g3, 1, %o5
1468	movrz		%g7, %o5, %g3
1469	ENCRYPT_192(8, 0, 2, 4, 6)
1470	ldd		[%o1 + 0x00], %f4
1471	ldd		[%o1 + 0x08], %f6
1472	fxor		%f4, %f0, %f4
1473	fxor		%f6, %f2, %f6
1474	std		%f4, [%o2 + 0x00]
1475	std		%f6, [%o2 + 0x08]
147611:	stx		%g3, [%o4 + 0x00]
1477	retl
1478	 stx		%g7, [%o4 + 0x08]
1479ENDPROC(aes_sparc64_ctr_crypt_192)
1480
1481	.align		32
1482ENTRY(aes_sparc64_ctr_crypt_256)
1483	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1484	ldx		[%o4 + 0x00], %g3
1485	ldx		[%o4 + 0x08], %g7
1486	subcc		%o3, 0x10, %o3
1487	ldx		[%o0 + 0x00], %g1
1488	be		10f
1489	 ldx		[%o0 + 0x08], %g2
14901:	xor		%g1, %g3, %o5
1491	MOVXTOD_O5_F0
1492	xor		%g2, %g7, %o5
1493	MOVXTOD_O5_F2
1494	add		%g7, 1, %g7
1495	add		%g3, 1, %o5
1496	movrz		%g7, %o5, %g3
1497	xor		%g1, %g3, %o5
1498	MOVXTOD_O5_F4
1499	xor		%g2, %g7, %o5
1500	MOVXTOD_O5_F6
1501	add		%g7, 1, %g7
1502	add		%g3, 1, %o5
1503	movrz		%g7, %o5, %g3
1504	ENCRYPT_256_2(8, 0, 2, 4, 6)
1505	ldd		[%o1 + 0x00], %f56
1506	ldd		[%o1 + 0x08], %f58
1507	ldd		[%o1 + 0x10], %f60
1508	ldd		[%o1 + 0x18], %f62
1509	fxor		%f56, %f0, %f56
1510	fxor		%f58, %f2, %f58
1511	fxor		%f60, %f4, %f60
1512	fxor		%f62, %f6, %f62
1513	std		%f56, [%o2 + 0x00]
1514	std		%f58, [%o2 + 0x08]
1515	std		%f60, [%o2 + 0x10]
1516	std		%f62, [%o2 + 0x18]
1517	subcc		%o3, 0x20, %o3
1518	add		%o1, 0x20, %o1
1519	brgz		%o3, 1b
1520	 add		%o2, 0x20, %o2
1521	brlz,pt		%o3, 11f
1522	 nop
152310:	ldd		[%o0 + 0xd0], %f56
1524	ldd		[%o0 + 0xd8], %f58
1525	ldd		[%o0 + 0xe0], %f60
1526	ldd		[%o0 + 0xe8], %f62
1527	xor		%g1, %g3, %o5
1528	MOVXTOD_O5_F0
1529	xor		%g2, %g7, %o5
1530	MOVXTOD_O5_F2
1531	add		%g7, 1, %g7
1532	add		%g3, 1, %o5
1533	movrz		%g7, %o5, %g3
1534	ENCRYPT_256(8, 0, 2, 4, 6)
1535	ldd		[%o1 + 0x00], %f4
1536	ldd		[%o1 + 0x08], %f6
1537	fxor		%f4, %f0, %f4
1538	fxor		%f6, %f2, %f6
1539	std		%f4, [%o2 + 0x00]
1540	std		%f6, [%o2 + 0x08]
154111:	stx		%g3, [%o4 + 0x00]
1542	retl
1543	 stx		%g7, [%o4 + 0x08]
1544ENDPROC(aes_sparc64_ctr_crypt_256)
1545