xref: /openbmc/linux/arch/sparc/crypto/aes_asm.S (revision eb3fcf00)
1#include <linux/linkage.h>
2#include <asm/visasm.h>
3
4#include "opcodes.h"
5
6#define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
7	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
8	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
9	AES_EROUND01(KEY_BASE +  4, T0, T1, I0) \
10	AES_EROUND23(KEY_BASE +  6, T0, T1, I1)
11
12#define ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
13	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
14	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
15	AES_EROUND01(KEY_BASE +  0, I2, I3, T2) \
16	AES_EROUND23(KEY_BASE +  2, I2, I3, T3) \
17	AES_EROUND01(KEY_BASE +  4, T0, T1, I0) \
18	AES_EROUND23(KEY_BASE +  6, T0, T1, I1) \
19	AES_EROUND01(KEY_BASE +  4, T2, T3, I2) \
20	AES_EROUND23(KEY_BASE +  6, T2, T3, I3)
21
22#define ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
23	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
24	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
25	AES_EROUND01_L(KEY_BASE +  4, T0, T1, I0) \
26	AES_EROUND23_L(KEY_BASE +  6, T0, T1, I1)
27
28#define ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
29	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
30	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
31	AES_EROUND01(KEY_BASE +  0, I2, I3, T2) \
32	AES_EROUND23(KEY_BASE +  2, I2, I3, T3) \
33	AES_EROUND01_L(KEY_BASE +  4, T0, T1, I0) \
34	AES_EROUND23_L(KEY_BASE +  6, T0, T1, I1) \
35	AES_EROUND01_L(KEY_BASE +  4, T2, T3, I2) \
36	AES_EROUND23_L(KEY_BASE +  6, T2, T3, I3)
37
38	/* 10 rounds */
39#define ENCRYPT_128(KEY_BASE, I0, I1, T0, T1) \
40	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
41	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
42	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
43	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
44	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
45
46#define ENCRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
47	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
48	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
49	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
50	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
51	ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
52
53	/* 12 rounds */
54#define ENCRYPT_192(KEY_BASE, I0, I1, T0, T1) \
55	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
56	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
57	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
58	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
59	ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
60	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
61
62#define ENCRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
63	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
64	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
65	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
66	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
67	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
68	ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
69
70	/* 14 rounds */
71#define ENCRYPT_256(KEY_BASE, I0, I1, T0, T1) \
72	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
73	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
74	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
75	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
76	ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
77	ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
78	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
79
80#define ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
81	ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
82			     TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
83
84#define ENCRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
85	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, KEY_BASE + 48) \
86	ldd	[%o0 + 0xd0], %f56; \
87	ldd	[%o0 + 0xd8], %f58; \
88	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, KEY_BASE +  0) \
89	ldd	[%o0 + 0xe0], %f60; \
90	ldd	[%o0 + 0xe8], %f62; \
91	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE +  0) \
92	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE +  0) \
93	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE +  0) \
94	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE +  0) \
95	AES_EROUND01(KEY_BASE +  48, I0, I1, KEY_BASE + 0) \
96	AES_EROUND23(KEY_BASE +  50, I0, I1, KEY_BASE + 2) \
97	AES_EROUND01(KEY_BASE +  48, I2, I3, KEY_BASE + 4) \
98	AES_EROUND23(KEY_BASE +  50, I2, I3, KEY_BASE + 6) \
99	AES_EROUND01_L(KEY_BASE +  52, KEY_BASE + 0, KEY_BASE + 2, I0) \
100	AES_EROUND23_L(KEY_BASE +  54, KEY_BASE + 0, KEY_BASE + 2, I1) \
101	ldd	[%o0 + 0x10], %f8; \
102	ldd	[%o0 + 0x18], %f10; \
103	AES_EROUND01_L(KEY_BASE +  52, KEY_BASE + 4, KEY_BASE + 6, I2) \
104	AES_EROUND23_L(KEY_BASE +  54, KEY_BASE + 4, KEY_BASE + 6, I3) \
105	ldd	[%o0 + 0x20], %f12; \
106	ldd	[%o0 + 0x28], %f14;
107
108#define DECRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
109	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
110	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
111	AES_DROUND23(KEY_BASE +  4, T0, T1, I1) \
112	AES_DROUND01(KEY_BASE +  6, T0, T1, I0)
113
114#define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
115	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
116	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
117	AES_DROUND23(KEY_BASE +  0, I2, I3, T3) \
118	AES_DROUND01(KEY_BASE +  2, I2, I3, T2) \
119	AES_DROUND23(KEY_BASE +  4, T0, T1, I1) \
120	AES_DROUND01(KEY_BASE +  6, T0, T1, I0) \
121	AES_DROUND23(KEY_BASE +  4, T2, T3, I3) \
122	AES_DROUND01(KEY_BASE +  6, T2, T3, I2)
123
124#define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
125	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
126	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
127	AES_DROUND23_L(KEY_BASE +  4, T0, T1, I1) \
128	AES_DROUND01_L(KEY_BASE +  6, T0, T1, I0)
129
130#define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
131	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
132	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
133	AES_DROUND23(KEY_BASE +  0, I2, I3, T3) \
134	AES_DROUND01(KEY_BASE +  2, I2, I3, T2) \
135	AES_DROUND23_L(KEY_BASE +  4, T0, T1, I1) \
136	AES_DROUND01_L(KEY_BASE +  6, T0, T1, I0) \
137	AES_DROUND23_L(KEY_BASE +  4, T2, T3, I3) \
138	AES_DROUND01_L(KEY_BASE +  6, T2, T3, I2)
139
140	/* 10 rounds */
141#define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \
142	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
143	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
144	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
145	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
146	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
147
148#define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
149	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
150	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
151	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
152	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
153	DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
154
155	/* 12 rounds */
156#define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \
157	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
158	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
159	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
160	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
161	DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
162	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
163
164#define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
165	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
166	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
167	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
168	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
169	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
170	DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
171
172	/* 14 rounds */
173#define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \
174	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
175	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
176	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
177	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
178	DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
179	DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
180	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
181
182#define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
183	DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
184			     TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
185
186#define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
187	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, KEY_BASE + 48) \
188	ldd	[%o0 + 0x18], %f56; \
189	ldd	[%o0 + 0x10], %f58; \
190	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, KEY_BASE +  0) \
191	ldd	[%o0 + 0x08], %f60; \
192	ldd	[%o0 + 0x00], %f62; \
193	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE +  0) \
194	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE +  0) \
195	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE +  0) \
196	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE +  0) \
197	AES_DROUND23(KEY_BASE +  48, I0, I1, KEY_BASE + 2) \
198	AES_DROUND01(KEY_BASE +  50, I0, I1, KEY_BASE + 0) \
199	AES_DROUND23(KEY_BASE +  48, I2, I3, KEY_BASE + 6) \
200	AES_DROUND01(KEY_BASE +  50, I2, I3, KEY_BASE + 4) \
201	AES_DROUND23_L(KEY_BASE +  52, KEY_BASE + 0, KEY_BASE + 2, I1) \
202	AES_DROUND01_L(KEY_BASE +  54, KEY_BASE + 0, KEY_BASE + 2, I0) \
203	ldd	[%o0 + 0xd8], %f8; \
204	ldd	[%o0 + 0xd0], %f10; \
205	AES_DROUND23_L(KEY_BASE +  52, KEY_BASE + 4, KEY_BASE + 6, I3) \
206	AES_DROUND01_L(KEY_BASE +  54, KEY_BASE + 4, KEY_BASE + 6, I2) \
207	ldd	[%o0 + 0xc8], %f12; \
208	ldd	[%o0 + 0xc0], %f14;
209
210	.align	32
211ENTRY(aes_sparc64_key_expand)
212	/* %o0=input_key, %o1=output_key, %o2=key_len */
213	VISEntry
214	ld	[%o0 + 0x00], %f0
215	ld	[%o0 + 0x04], %f1
216	ld	[%o0 + 0x08], %f2
217	ld	[%o0 + 0x0c], %f3
218
219	std	%f0, [%o1 + 0x00]
220	std	%f2, [%o1 + 0x08]
221	add	%o1, 0x10, %o1
222
223	cmp	%o2, 24
224	bl	2f
225	 nop
226
227	be	1f
228	 nop
229
230	/* 256-bit key expansion */
231	ld	[%o0 + 0x10], %f4
232	ld	[%o0 + 0x14], %f5
233	ld	[%o0 + 0x18], %f6
234	ld	[%o0 + 0x1c], %f7
235
236	std	%f4, [%o1 + 0x00]
237	std	%f6, [%o1 + 0x08]
238	add	%o1, 0x10, %o1
239
240	AES_KEXPAND1(0, 6, 0x0, 8)
241	AES_KEXPAND2(2, 8, 10)
242	AES_KEXPAND0(4, 10, 12)
243	AES_KEXPAND2(6, 12, 14)
244	AES_KEXPAND1(8, 14, 0x1, 16)
245	AES_KEXPAND2(10, 16, 18)
246	AES_KEXPAND0(12, 18, 20)
247	AES_KEXPAND2(14, 20, 22)
248	AES_KEXPAND1(16, 22, 0x2, 24)
249	AES_KEXPAND2(18, 24, 26)
250	AES_KEXPAND0(20, 26, 28)
251	AES_KEXPAND2(22, 28, 30)
252	AES_KEXPAND1(24, 30, 0x3, 32)
253	AES_KEXPAND2(26, 32, 34)
254	AES_KEXPAND0(28, 34, 36)
255	AES_KEXPAND2(30, 36, 38)
256	AES_KEXPAND1(32, 38, 0x4, 40)
257	AES_KEXPAND2(34, 40, 42)
258	AES_KEXPAND0(36, 42, 44)
259	AES_KEXPAND2(38, 44, 46)
260	AES_KEXPAND1(40, 46, 0x5, 48)
261	AES_KEXPAND2(42, 48, 50)
262	AES_KEXPAND0(44, 50, 52)
263	AES_KEXPAND2(46, 52, 54)
264	AES_KEXPAND1(48, 54, 0x6, 56)
265	AES_KEXPAND2(50, 56, 58)
266
267	std	%f8, [%o1 + 0x00]
268	std	%f10, [%o1 + 0x08]
269	std	%f12, [%o1 + 0x10]
270	std	%f14, [%o1 + 0x18]
271	std	%f16, [%o1 + 0x20]
272	std	%f18, [%o1 + 0x28]
273	std	%f20, [%o1 + 0x30]
274	std	%f22, [%o1 + 0x38]
275	std	%f24, [%o1 + 0x40]
276	std	%f26, [%o1 + 0x48]
277	std	%f28, [%o1 + 0x50]
278	std	%f30, [%o1 + 0x58]
279	std	%f32, [%o1 + 0x60]
280	std	%f34, [%o1 + 0x68]
281	std	%f36, [%o1 + 0x70]
282	std	%f38, [%o1 + 0x78]
283	std	%f40, [%o1 + 0x80]
284	std	%f42, [%o1 + 0x88]
285	std	%f44, [%o1 + 0x90]
286	std	%f46, [%o1 + 0x98]
287	std	%f48, [%o1 + 0xa0]
288	std	%f50, [%o1 + 0xa8]
289	std	%f52, [%o1 + 0xb0]
290	std	%f54, [%o1 + 0xb8]
291	std	%f56, [%o1 + 0xc0]
292	ba,pt	%xcc, 80f
293	 std	%f58, [%o1 + 0xc8]
294
2951:
296	/* 192-bit key expansion */
297	ld	[%o0 + 0x10], %f4
298	ld	[%o0 + 0x14], %f5
299
300	std	%f4, [%o1 + 0x00]
301	add	%o1, 0x08, %o1
302
303	AES_KEXPAND1(0, 4, 0x0, 6)
304	AES_KEXPAND2(2, 6, 8)
305	AES_KEXPAND2(4, 8, 10)
306	AES_KEXPAND1(6, 10, 0x1, 12)
307	AES_KEXPAND2(8, 12, 14)
308	AES_KEXPAND2(10, 14, 16)
309	AES_KEXPAND1(12, 16, 0x2, 18)
310	AES_KEXPAND2(14, 18, 20)
311	AES_KEXPAND2(16, 20, 22)
312	AES_KEXPAND1(18, 22, 0x3, 24)
313	AES_KEXPAND2(20, 24, 26)
314	AES_KEXPAND2(22, 26, 28)
315	AES_KEXPAND1(24, 28, 0x4, 30)
316	AES_KEXPAND2(26, 30, 32)
317	AES_KEXPAND2(28, 32, 34)
318	AES_KEXPAND1(30, 34, 0x5, 36)
319	AES_KEXPAND2(32, 36, 38)
320	AES_KEXPAND2(34, 38, 40)
321	AES_KEXPAND1(36, 40, 0x6, 42)
322	AES_KEXPAND2(38, 42, 44)
323	AES_KEXPAND2(40, 44, 46)
324	AES_KEXPAND1(42, 46, 0x7, 48)
325	AES_KEXPAND2(44, 48, 50)
326
327	std	%f6, [%o1 + 0x00]
328	std	%f8, [%o1 + 0x08]
329	std	%f10, [%o1 + 0x10]
330	std	%f12, [%o1 + 0x18]
331	std	%f14, [%o1 + 0x20]
332	std	%f16, [%o1 + 0x28]
333	std	%f18, [%o1 + 0x30]
334	std	%f20, [%o1 + 0x38]
335	std	%f22, [%o1 + 0x40]
336	std	%f24, [%o1 + 0x48]
337	std	%f26, [%o1 + 0x50]
338	std	%f28, [%o1 + 0x58]
339	std	%f30, [%o1 + 0x60]
340	std	%f32, [%o1 + 0x68]
341	std	%f34, [%o1 + 0x70]
342	std	%f36, [%o1 + 0x78]
343	std	%f38, [%o1 + 0x80]
344	std	%f40, [%o1 + 0x88]
345	std	%f42, [%o1 + 0x90]
346	std	%f44, [%o1 + 0x98]
347	std	%f46, [%o1 + 0xa0]
348	std	%f48, [%o1 + 0xa8]
349	ba,pt	%xcc, 80f
350	 std	%f50, [%o1 + 0xb0]
351
3522:
353	/* 128-bit key expansion */
354	AES_KEXPAND1(0, 2, 0x0, 4)
355	AES_KEXPAND2(2, 4, 6)
356	AES_KEXPAND1(4, 6, 0x1, 8)
357	AES_KEXPAND2(6, 8, 10)
358	AES_KEXPAND1(8, 10, 0x2, 12)
359	AES_KEXPAND2(10, 12, 14)
360	AES_KEXPAND1(12, 14, 0x3, 16)
361	AES_KEXPAND2(14, 16, 18)
362	AES_KEXPAND1(16, 18, 0x4, 20)
363	AES_KEXPAND2(18, 20, 22)
364	AES_KEXPAND1(20, 22, 0x5, 24)
365	AES_KEXPAND2(22, 24, 26)
366	AES_KEXPAND1(24, 26, 0x6, 28)
367	AES_KEXPAND2(26, 28, 30)
368	AES_KEXPAND1(28, 30, 0x7, 32)
369	AES_KEXPAND2(30, 32, 34)
370	AES_KEXPAND1(32, 34, 0x8, 36)
371	AES_KEXPAND2(34, 36, 38)
372	AES_KEXPAND1(36, 38, 0x9, 40)
373	AES_KEXPAND2(38, 40, 42)
374
375	std	%f4, [%o1 + 0x00]
376	std	%f6, [%o1 + 0x08]
377	std	%f8, [%o1 + 0x10]
378	std	%f10, [%o1 + 0x18]
379	std	%f12, [%o1 + 0x20]
380	std	%f14, [%o1 + 0x28]
381	std	%f16, [%o1 + 0x30]
382	std	%f18, [%o1 + 0x38]
383	std	%f20, [%o1 + 0x40]
384	std	%f22, [%o1 + 0x48]
385	std	%f24, [%o1 + 0x50]
386	std	%f26, [%o1 + 0x58]
387	std	%f28, [%o1 + 0x60]
388	std	%f30, [%o1 + 0x68]
389	std	%f32, [%o1 + 0x70]
390	std	%f34, [%o1 + 0x78]
391	std	%f36, [%o1 + 0x80]
392	std	%f38, [%o1 + 0x88]
393	std	%f40, [%o1 + 0x90]
394	std	%f42, [%o1 + 0x98]
39580:
396	retl
397	 VISExit
398ENDPROC(aes_sparc64_key_expand)
399
400	.align		32
401ENTRY(aes_sparc64_encrypt_128)
402	/* %o0=key, %o1=input, %o2=output */
403	VISEntry
404	ld		[%o1 + 0x00], %f4
405	ld		[%o1 + 0x04], %f5
406	ld		[%o1 + 0x08], %f6
407	ld		[%o1 + 0x0c], %f7
408	ldd		[%o0 + 0x00], %f8
409	ldd		[%o0 + 0x08], %f10
410	ldd		[%o0 + 0x10], %f12
411	ldd		[%o0 + 0x18], %f14
412	ldd		[%o0 + 0x20], %f16
413	ldd		[%o0 + 0x28], %f18
414	ldd		[%o0 + 0x30], %f20
415	ldd		[%o0 + 0x38], %f22
416	ldd		[%o0 + 0x40], %f24
417	ldd		[%o0 + 0x48], %f26
418	ldd		[%o0 + 0x50], %f28
419	ldd		[%o0 + 0x58], %f30
420	ldd		[%o0 + 0x60], %f32
421	ldd		[%o0 + 0x68], %f34
422	ldd		[%o0 + 0x70], %f36
423	ldd		[%o0 + 0x78], %f38
424	ldd		[%o0 + 0x80], %f40
425	ldd		[%o0 + 0x88], %f42
426	ldd		[%o0 + 0x90], %f44
427	ldd		[%o0 + 0x98], %f46
428	ldd		[%o0 + 0xa0], %f48
429	ldd		[%o0 + 0xa8], %f50
430	fxor		%f8, %f4, %f4
431	fxor		%f10, %f6, %f6
432	ENCRYPT_128(12, 4, 6, 0, 2)
433	st		%f4, [%o2 + 0x00]
434	st		%f5, [%o2 + 0x04]
435	st		%f6, [%o2 + 0x08]
436	st		%f7, [%o2 + 0x0c]
437	retl
438	 VISExit
439ENDPROC(aes_sparc64_encrypt_128)
440
441	.align		32
442ENTRY(aes_sparc64_encrypt_192)
443	/* %o0=key, %o1=input, %o2=output */
444	VISEntry
445	ld		[%o1 + 0x00], %f4
446	ld		[%o1 + 0x04], %f5
447	ld		[%o1 + 0x08], %f6
448	ld		[%o1 + 0x0c], %f7
449
450	ldd		[%o0 + 0x00], %f8
451	ldd		[%o0 + 0x08], %f10
452
453	fxor		%f8, %f4, %f4
454	fxor		%f10, %f6, %f6
455
456	ldd		[%o0 + 0x10], %f8
457	ldd		[%o0 + 0x18], %f10
458	ldd		[%o0 + 0x20], %f12
459	ldd		[%o0 + 0x28], %f14
460	add		%o0, 0x20, %o0
461
462	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
463
464	ldd		[%o0 + 0x10], %f12
465	ldd		[%o0 + 0x18], %f14
466	ldd		[%o0 + 0x20], %f16
467	ldd		[%o0 + 0x28], %f18
468	ldd		[%o0 + 0x30], %f20
469	ldd		[%o0 + 0x38], %f22
470	ldd		[%o0 + 0x40], %f24
471	ldd		[%o0 + 0x48], %f26
472	ldd		[%o0 + 0x50], %f28
473	ldd		[%o0 + 0x58], %f30
474	ldd		[%o0 + 0x60], %f32
475	ldd		[%o0 + 0x68], %f34
476	ldd		[%o0 + 0x70], %f36
477	ldd		[%o0 + 0x78], %f38
478	ldd		[%o0 + 0x80], %f40
479	ldd		[%o0 + 0x88], %f42
480	ldd		[%o0 + 0x90], %f44
481	ldd		[%o0 + 0x98], %f46
482	ldd		[%o0 + 0xa0], %f48
483	ldd		[%o0 + 0xa8], %f50
484
485
486	ENCRYPT_128(12, 4, 6, 0, 2)
487
488	st		%f4, [%o2 + 0x00]
489	st		%f5, [%o2 + 0x04]
490	st		%f6, [%o2 + 0x08]
491	st		%f7, [%o2 + 0x0c]
492
493	retl
494	 VISExit
495ENDPROC(aes_sparc64_encrypt_192)
496
497	.align		32
498ENTRY(aes_sparc64_encrypt_256)
499	/* %o0=key, %o1=input, %o2=output */
500	VISEntry
501	ld		[%o1 + 0x00], %f4
502	ld		[%o1 + 0x04], %f5
503	ld		[%o1 + 0x08], %f6
504	ld		[%o1 + 0x0c], %f7
505
506	ldd		[%o0 + 0x00], %f8
507	ldd		[%o0 + 0x08], %f10
508
509	fxor		%f8, %f4, %f4
510	fxor		%f10, %f6, %f6
511
512	ldd		[%o0 + 0x10], %f8
513
514	ldd		[%o0 + 0x18], %f10
515	ldd		[%o0 + 0x20], %f12
516	ldd		[%o0 + 0x28], %f14
517	add		%o0, 0x20, %o0
518
519	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
520
521	ldd		[%o0 + 0x10], %f8
522
523	ldd		[%o0 + 0x18], %f10
524	ldd		[%o0 + 0x20], %f12
525	ldd		[%o0 + 0x28], %f14
526	add		%o0, 0x20, %o0
527
528	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
529
530	ldd		[%o0 + 0x10], %f12
531	ldd		[%o0 + 0x18], %f14
532	ldd		[%o0 + 0x20], %f16
533	ldd		[%o0 + 0x28], %f18
534	ldd		[%o0 + 0x30], %f20
535	ldd		[%o0 + 0x38], %f22
536	ldd		[%o0 + 0x40], %f24
537	ldd		[%o0 + 0x48], %f26
538	ldd		[%o0 + 0x50], %f28
539	ldd		[%o0 + 0x58], %f30
540	ldd		[%o0 + 0x60], %f32
541	ldd		[%o0 + 0x68], %f34
542	ldd		[%o0 + 0x70], %f36
543	ldd		[%o0 + 0x78], %f38
544	ldd		[%o0 + 0x80], %f40
545	ldd		[%o0 + 0x88], %f42
546	ldd		[%o0 + 0x90], %f44
547	ldd		[%o0 + 0x98], %f46
548	ldd		[%o0 + 0xa0], %f48
549	ldd		[%o0 + 0xa8], %f50
550
551	ENCRYPT_128(12, 4, 6, 0, 2)
552
553	st		%f4, [%o2 + 0x00]
554	st		%f5, [%o2 + 0x04]
555	st		%f6, [%o2 + 0x08]
556	st		%f7, [%o2 + 0x0c]
557
558	retl
559	 VISExit
560ENDPROC(aes_sparc64_encrypt_256)
561
562	.align		32
563ENTRY(aes_sparc64_decrypt_128)
564	/* %o0=key, %o1=input, %o2=output */
565	VISEntry
566	ld		[%o1 + 0x00], %f4
567	ld		[%o1 + 0x04], %f5
568	ld		[%o1 + 0x08], %f6
569	ld		[%o1 + 0x0c], %f7
570	ldd		[%o0 + 0xa0], %f8
571	ldd		[%o0 + 0xa8], %f10
572	ldd		[%o0 + 0x98], %f12
573	ldd		[%o0 + 0x90], %f14
574	ldd		[%o0 + 0x88], %f16
575	ldd		[%o0 + 0x80], %f18
576	ldd		[%o0 + 0x78], %f20
577	ldd		[%o0 + 0x70], %f22
578	ldd		[%o0 + 0x68], %f24
579	ldd		[%o0 + 0x60], %f26
580	ldd		[%o0 + 0x58], %f28
581	ldd		[%o0 + 0x50], %f30
582	ldd		[%o0 + 0x48], %f32
583	ldd		[%o0 + 0x40], %f34
584	ldd		[%o0 + 0x38], %f36
585	ldd		[%o0 + 0x30], %f38
586	ldd		[%o0 + 0x28], %f40
587	ldd		[%o0 + 0x20], %f42
588	ldd		[%o0 + 0x18], %f44
589	ldd		[%o0 + 0x10], %f46
590	ldd		[%o0 + 0x08], %f48
591	ldd		[%o0 + 0x00], %f50
592	fxor		%f8, %f4, %f4
593	fxor		%f10, %f6, %f6
594	DECRYPT_128(12, 4, 6, 0, 2)
595	st		%f4, [%o2 + 0x00]
596	st		%f5, [%o2 + 0x04]
597	st		%f6, [%o2 + 0x08]
598	st		%f7, [%o2 + 0x0c]
599	retl
600	 VISExit
601ENDPROC(aes_sparc64_decrypt_128)
602
603	.align		32
604ENTRY(aes_sparc64_decrypt_192)
605	/* %o0=key, %o1=input, %o2=output */
606	VISEntry
607	ld		[%o1 + 0x00], %f4
608	ld		[%o1 + 0x04], %f5
609	ld		[%o1 + 0x08], %f6
610	ld		[%o1 + 0x0c], %f7
611	ldd		[%o0 + 0xc0], %f8
612	ldd		[%o0 + 0xc8], %f10
613	ldd		[%o0 + 0xb8], %f12
614	ldd		[%o0 + 0xb0], %f14
615	ldd		[%o0 + 0xa8], %f16
616	ldd		[%o0 + 0xa0], %f18
617	fxor		%f8, %f4, %f4
618	fxor		%f10, %f6, %f6
619	ldd		[%o0 + 0x98], %f20
620	ldd		[%o0 + 0x90], %f22
621	ldd		[%o0 + 0x88], %f24
622	ldd		[%o0 + 0x80], %f26
623	DECRYPT_TWO_ROUNDS(12, 4, 6, 0, 2)
624	ldd		[%o0 + 0x78], %f28
625	ldd		[%o0 + 0x70], %f30
626	ldd		[%o0 + 0x68], %f32
627	ldd		[%o0 + 0x60], %f34
628	ldd		[%o0 + 0x58], %f36
629	ldd		[%o0 + 0x50], %f38
630	ldd		[%o0 + 0x48], %f40
631	ldd		[%o0 + 0x40], %f42
632	ldd		[%o0 + 0x38], %f44
633	ldd		[%o0 + 0x30], %f46
634	ldd		[%o0 + 0x28], %f48
635	ldd		[%o0 + 0x20], %f50
636	ldd		[%o0 + 0x18], %f52
637	ldd		[%o0 + 0x10], %f54
638	ldd		[%o0 + 0x08], %f56
639	ldd		[%o0 + 0x00], %f58
640	DECRYPT_128(20, 4, 6, 0, 2)
641	st		%f4, [%o2 + 0x00]
642	st		%f5, [%o2 + 0x04]
643	st		%f6, [%o2 + 0x08]
644	st		%f7, [%o2 + 0x0c]
645	retl
646	 VISExit
647ENDPROC(aes_sparc64_decrypt_192)
648
649	.align		32
650ENTRY(aes_sparc64_decrypt_256)
651	/* %o0=key, %o1=input, %o2=output */
652	VISEntry
653	ld		[%o1 + 0x00], %f4
654	ld		[%o1 + 0x04], %f5
655	ld		[%o1 + 0x08], %f6
656	ld		[%o1 + 0x0c], %f7
657	ldd		[%o0 + 0xe0], %f8
658	ldd		[%o0 + 0xe8], %f10
659	ldd		[%o0 + 0xd8], %f12
660	ldd		[%o0 + 0xd0], %f14
661	ldd		[%o0 + 0xc8], %f16
662	fxor		%f8, %f4, %f4
663	ldd		[%o0 + 0xc0], %f18
664	fxor		%f10, %f6, %f6
665	ldd		[%o0 + 0xb8], %f20
666	AES_DROUND23(12, 4, 6, 2)
667	ldd		[%o0 + 0xb0], %f22
668	AES_DROUND01(14, 4, 6, 0)
669	ldd		[%o0 + 0xa8], %f24
670	AES_DROUND23(16, 0, 2, 6)
671	ldd		[%o0 + 0xa0], %f26
672	AES_DROUND01(18, 0, 2, 4)
673	ldd		[%o0 + 0x98], %f12
674	AES_DROUND23(20, 4, 6, 2)
675	ldd		[%o0 + 0x90], %f14
676	AES_DROUND01(22, 4, 6, 0)
677	ldd		[%o0 + 0x88], %f16
678	AES_DROUND23(24, 0, 2, 6)
679	ldd		[%o0 + 0x80], %f18
680	AES_DROUND01(26, 0, 2, 4)
681	ldd		[%o0 + 0x78], %f20
682	AES_DROUND23(12, 4, 6, 2)
683	ldd		[%o0 + 0x70], %f22
684	AES_DROUND01(14, 4, 6, 0)
685	ldd		[%o0 + 0x68], %f24
686	AES_DROUND23(16, 0, 2, 6)
687	ldd		[%o0 + 0x60], %f26
688	AES_DROUND01(18, 0, 2, 4)
689	ldd		[%o0 + 0x58], %f28
690	AES_DROUND23(20, 4, 6, 2)
691	ldd		[%o0 + 0x50], %f30
692	AES_DROUND01(22, 4, 6, 0)
693	ldd		[%o0 + 0x48], %f32
694	AES_DROUND23(24, 0, 2, 6)
695	ldd		[%o0 + 0x40], %f34
696	AES_DROUND01(26, 0, 2, 4)
697	ldd		[%o0 + 0x38], %f36
698	AES_DROUND23(28, 4, 6, 2)
699	ldd		[%o0 + 0x30], %f38
700	AES_DROUND01(30, 4, 6, 0)
701	ldd		[%o0 + 0x28], %f40
702	AES_DROUND23(32, 0, 2, 6)
703	ldd		[%o0 + 0x20], %f42
704	AES_DROUND01(34, 0, 2, 4)
705	ldd		[%o0 + 0x18], %f44
706	AES_DROUND23(36, 4, 6, 2)
707	ldd		[%o0 + 0x10], %f46
708	AES_DROUND01(38, 4, 6, 0)
709	ldd		[%o0 + 0x08], %f48
710	AES_DROUND23(40, 0, 2, 6)
711	ldd		[%o0 + 0x00], %f50
712	AES_DROUND01(42, 0, 2, 4)
713	AES_DROUND23(44, 4, 6, 2)
714	AES_DROUND01(46, 4, 6, 0)
715	AES_DROUND23_L(48, 0, 2, 6)
716	AES_DROUND01_L(50, 0, 2, 4)
717	st		%f4, [%o2 + 0x00]
718	st		%f5, [%o2 + 0x04]
719	st		%f6, [%o2 + 0x08]
720	st		%f7, [%o2 + 0x0c]
721	retl
722	 VISExit
723ENDPROC(aes_sparc64_decrypt_256)
724
725	.align		32
726ENTRY(aes_sparc64_load_encrypt_keys_128)
727	/* %o0=key */
728	VISEntry
729	ldd		[%o0 + 0x10], %f8
730	ldd		[%o0 + 0x18], %f10
731	ldd		[%o0 + 0x20], %f12
732	ldd		[%o0 + 0x28], %f14
733	ldd		[%o0 + 0x30], %f16
734	ldd		[%o0 + 0x38], %f18
735	ldd		[%o0 + 0x40], %f20
736	ldd		[%o0 + 0x48], %f22
737	ldd		[%o0 + 0x50], %f24
738	ldd		[%o0 + 0x58], %f26
739	ldd		[%o0 + 0x60], %f28
740	ldd		[%o0 + 0x68], %f30
741	ldd		[%o0 + 0x70], %f32
742	ldd		[%o0 + 0x78], %f34
743	ldd		[%o0 + 0x80], %f36
744	ldd		[%o0 + 0x88], %f38
745	ldd		[%o0 + 0x90], %f40
746	ldd		[%o0 + 0x98], %f42
747	ldd		[%o0 + 0xa0], %f44
748	retl
749	 ldd		[%o0 + 0xa8], %f46
750ENDPROC(aes_sparc64_load_encrypt_keys_128)
751
752	.align		32
753ENTRY(aes_sparc64_load_encrypt_keys_192)
754	/* %o0=key */
755	VISEntry
756	ldd		[%o0 + 0x10], %f8
757	ldd		[%o0 + 0x18], %f10
758	ldd		[%o0 + 0x20], %f12
759	ldd		[%o0 + 0x28], %f14
760	ldd		[%o0 + 0x30], %f16
761	ldd		[%o0 + 0x38], %f18
762	ldd		[%o0 + 0x40], %f20
763	ldd		[%o0 + 0x48], %f22
764	ldd		[%o0 + 0x50], %f24
765	ldd		[%o0 + 0x58], %f26
766	ldd		[%o0 + 0x60], %f28
767	ldd		[%o0 + 0x68], %f30
768	ldd		[%o0 + 0x70], %f32
769	ldd		[%o0 + 0x78], %f34
770	ldd		[%o0 + 0x80], %f36
771	ldd		[%o0 + 0x88], %f38
772	ldd		[%o0 + 0x90], %f40
773	ldd		[%o0 + 0x98], %f42
774	ldd		[%o0 + 0xa0], %f44
775	ldd		[%o0 + 0xa8], %f46
776	ldd		[%o0 + 0xb0], %f48
777	ldd		[%o0 + 0xb8], %f50
778	ldd		[%o0 + 0xc0], %f52
779	retl
780	 ldd		[%o0 + 0xc8], %f54
781ENDPROC(aes_sparc64_load_encrypt_keys_192)
782
783	.align		32
784ENTRY(aes_sparc64_load_encrypt_keys_256)
785	/* %o0=key */
786	VISEntry
787	ldd		[%o0 + 0x10], %f8
788	ldd		[%o0 + 0x18], %f10
789	ldd		[%o0 + 0x20], %f12
790	ldd		[%o0 + 0x28], %f14
791	ldd		[%o0 + 0x30], %f16
792	ldd		[%o0 + 0x38], %f18
793	ldd		[%o0 + 0x40], %f20
794	ldd		[%o0 + 0x48], %f22
795	ldd		[%o0 + 0x50], %f24
796	ldd		[%o0 + 0x58], %f26
797	ldd		[%o0 + 0x60], %f28
798	ldd		[%o0 + 0x68], %f30
799	ldd		[%o0 + 0x70], %f32
800	ldd		[%o0 + 0x78], %f34
801	ldd		[%o0 + 0x80], %f36
802	ldd		[%o0 + 0x88], %f38
803	ldd		[%o0 + 0x90], %f40
804	ldd		[%o0 + 0x98], %f42
805	ldd		[%o0 + 0xa0], %f44
806	ldd		[%o0 + 0xa8], %f46
807	ldd		[%o0 + 0xb0], %f48
808	ldd		[%o0 + 0xb8], %f50
809	ldd		[%o0 + 0xc0], %f52
810	ldd		[%o0 + 0xc8], %f54
811	ldd		[%o0 + 0xd0], %f56
812	ldd		[%o0 + 0xd8], %f58
813	ldd		[%o0 + 0xe0], %f60
814	retl
815	 ldd		[%o0 + 0xe8], %f62
816ENDPROC(aes_sparc64_load_encrypt_keys_256)
817
818	.align		32
819ENTRY(aes_sparc64_load_decrypt_keys_128)
820	/* %o0=key */
821	VISEntry
822	ldd		[%o0 + 0x98], %f8
823	ldd		[%o0 + 0x90], %f10
824	ldd		[%o0 + 0x88], %f12
825	ldd		[%o0 + 0x80], %f14
826	ldd		[%o0 + 0x78], %f16
827	ldd		[%o0 + 0x70], %f18
828	ldd		[%o0 + 0x68], %f20
829	ldd		[%o0 + 0x60], %f22
830	ldd		[%o0 + 0x58], %f24
831	ldd		[%o0 + 0x50], %f26
832	ldd		[%o0 + 0x48], %f28
833	ldd		[%o0 + 0x40], %f30
834	ldd		[%o0 + 0x38], %f32
835	ldd		[%o0 + 0x30], %f34
836	ldd		[%o0 + 0x28], %f36
837	ldd		[%o0 + 0x20], %f38
838	ldd		[%o0 + 0x18], %f40
839	ldd		[%o0 + 0x10], %f42
840	ldd		[%o0 + 0x08], %f44
841	retl
842	 ldd		[%o0 + 0x00], %f46
843ENDPROC(aes_sparc64_load_decrypt_keys_128)
844
845	.align		32
846ENTRY(aes_sparc64_load_decrypt_keys_192)
847	/* %o0=key */
848	VISEntry
849	ldd		[%o0 + 0xb8], %f8
850	ldd		[%o0 + 0xb0], %f10
851	ldd		[%o0 + 0xa8], %f12
852	ldd		[%o0 + 0xa0], %f14
853	ldd		[%o0 + 0x98], %f16
854	ldd		[%o0 + 0x90], %f18
855	ldd		[%o0 + 0x88], %f20
856	ldd		[%o0 + 0x80], %f22
857	ldd		[%o0 + 0x78], %f24
858	ldd		[%o0 + 0x70], %f26
859	ldd		[%o0 + 0x68], %f28
860	ldd		[%o0 + 0x60], %f30
861	ldd		[%o0 + 0x58], %f32
862	ldd		[%o0 + 0x50], %f34
863	ldd		[%o0 + 0x48], %f36
864	ldd		[%o0 + 0x40], %f38
865	ldd		[%o0 + 0x38], %f40
866	ldd		[%o0 + 0x30], %f42
867	ldd		[%o0 + 0x28], %f44
868	ldd		[%o0 + 0x20], %f46
869	ldd		[%o0 + 0x18], %f48
870	ldd		[%o0 + 0x10], %f50
871	ldd		[%o0 + 0x08], %f52
872	retl
873	 ldd		[%o0 + 0x00], %f54
874ENDPROC(aes_sparc64_load_decrypt_keys_192)
875
876	.align		32
877ENTRY(aes_sparc64_load_decrypt_keys_256)
878	/* %o0=key */
879	VISEntry
880	ldd		[%o0 + 0xd8], %f8
881	ldd		[%o0 + 0xd0], %f10
882	ldd		[%o0 + 0xc8], %f12
883	ldd		[%o0 + 0xc0], %f14
884	ldd		[%o0 + 0xb8], %f16
885	ldd		[%o0 + 0xb0], %f18
886	ldd		[%o0 + 0xa8], %f20
887	ldd		[%o0 + 0xa0], %f22
888	ldd		[%o0 + 0x98], %f24
889	ldd		[%o0 + 0x90], %f26
890	ldd		[%o0 + 0x88], %f28
891	ldd		[%o0 + 0x80], %f30
892	ldd		[%o0 + 0x78], %f32
893	ldd		[%o0 + 0x70], %f34
894	ldd		[%o0 + 0x68], %f36
895	ldd		[%o0 + 0x60], %f38
896	ldd		[%o0 + 0x58], %f40
897	ldd		[%o0 + 0x50], %f42
898	ldd		[%o0 + 0x48], %f44
899	ldd		[%o0 + 0x40], %f46
900	ldd		[%o0 + 0x38], %f48
901	ldd		[%o0 + 0x30], %f50
902	ldd		[%o0 + 0x28], %f52
903	ldd		[%o0 + 0x20], %f54
904	ldd		[%o0 + 0x18], %f56
905	ldd		[%o0 + 0x10], %f58
906	ldd		[%o0 + 0x08], %f60
907	retl
908	 ldd		[%o0 + 0x00], %f62
909ENDPROC(aes_sparc64_load_decrypt_keys_256)
910
911	.align		32
912ENTRY(aes_sparc64_ecb_encrypt_128)
913	/* %o0=key, %o1=input, %o2=output, %o3=len */
914	ldx		[%o0 + 0x00], %g1
915	subcc		%o3, 0x10, %o3
916	be		10f
917	 ldx		[%o0 + 0x08], %g2
9181:	ldx		[%o1 + 0x00], %g3
919	ldx		[%o1 + 0x08], %g7
920	ldx		[%o1 + 0x10], %o4
921	ldx		[%o1 + 0x18], %o5
922	xor		%g1, %g3, %g3
923	xor		%g2, %g7, %g7
924	MOVXTOD_G3_F4
925	MOVXTOD_G7_F6
926	xor		%g1, %o4, %g3
927	xor		%g2, %o5, %g7
928	MOVXTOD_G3_F60
929	MOVXTOD_G7_F62
930	ENCRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
931	std		%f4, [%o2 + 0x00]
932	std		%f6, [%o2 + 0x08]
933	std		%f60, [%o2 + 0x10]
934	std		%f62, [%o2 + 0x18]
935	sub		%o3, 0x20, %o3
936	add		%o1, 0x20, %o1
937	brgz		%o3, 1b
938	 add		%o2, 0x20, %o2
939	brlz,pt		%o3, 11f
940	 nop
94110:	ldx		[%o1 + 0x00], %g3
942	ldx		[%o1 + 0x08], %g7
943	xor		%g1, %g3, %g3
944	xor		%g2, %g7, %g7
945	MOVXTOD_G3_F4
946	MOVXTOD_G7_F6
947	ENCRYPT_128(8, 4, 6, 0, 2)
948	std		%f4, [%o2 + 0x00]
949	std		%f6, [%o2 + 0x08]
95011:	retl
951	 nop
952ENDPROC(aes_sparc64_ecb_encrypt_128)
953
954	.align		32
955ENTRY(aes_sparc64_ecb_encrypt_192)
956	/* %o0=key, %o1=input, %o2=output, %o3=len */
957	ldx		[%o0 + 0x00], %g1
958	subcc		%o3, 0x10, %o3
959	be		10f
960	 ldx		[%o0 + 0x08], %g2
9611:	ldx		[%o1 + 0x00], %g3
962	ldx		[%o1 + 0x08], %g7
963	ldx		[%o1 + 0x10], %o4
964	ldx		[%o1 + 0x18], %o5
965	xor		%g1, %g3, %g3
966	xor		%g2, %g7, %g7
967	MOVXTOD_G3_F4
968	MOVXTOD_G7_F6
969	xor		%g1, %o4, %g3
970	xor		%g2, %o5, %g7
971	MOVXTOD_G3_F60
972	MOVXTOD_G7_F62
973	ENCRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
974	std		%f4, [%o2 + 0x00]
975	std		%f6, [%o2 + 0x08]
976	std		%f60, [%o2 + 0x10]
977	std		%f62, [%o2 + 0x18]
978	sub		%o3, 0x20, %o3
979	add		%o1, 0x20, %o1
980	brgz		%o3, 1b
981	 add		%o2, 0x20, %o2
982	brlz,pt		%o3, 11f
983	 nop
98410:	ldx		[%o1 + 0x00], %g3
985	ldx		[%o1 + 0x08], %g7
986	xor		%g1, %g3, %g3
987	xor		%g2, %g7, %g7
988	MOVXTOD_G3_F4
989	MOVXTOD_G7_F6
990	ENCRYPT_192(8, 4, 6, 0, 2)
991	std		%f4, [%o2 + 0x00]
992	std		%f6, [%o2 + 0x08]
99311:	retl
994	 nop
995ENDPROC(aes_sparc64_ecb_encrypt_192)
996
997	.align		32
998ENTRY(aes_sparc64_ecb_encrypt_256)
999	/* %o0=key, %o1=input, %o2=output, %o3=len */
1000	ldx		[%o0 + 0x00], %g1
1001	subcc		%o3, 0x10, %o3
1002	be		10f
1003	 ldx		[%o0 + 0x08], %g2
10041:	ldx		[%o1 + 0x00], %g3
1005	ldx		[%o1 + 0x08], %g7
1006	ldx		[%o1 + 0x10], %o4
1007	ldx		[%o1 + 0x18], %o5
1008	xor		%g1, %g3, %g3
1009	xor		%g2, %g7, %g7
1010	MOVXTOD_G3_F4
1011	MOVXTOD_G7_F6
1012	xor		%g1, %o4, %g3
1013	xor		%g2, %o5, %g7
1014	MOVXTOD_G3_F0
1015	MOVXTOD_G7_F2
1016	ENCRYPT_256_2(8, 4, 6, 0, 2)
1017	std		%f4, [%o2 + 0x00]
1018	std		%f6, [%o2 + 0x08]
1019	std		%f0, [%o2 + 0x10]
1020	std		%f2, [%o2 + 0x18]
1021	sub		%o3, 0x20, %o3
1022	add		%o1, 0x20, %o1
1023	brgz		%o3, 1b
1024	 add		%o2, 0x20, %o2
1025	brlz,pt		%o3, 11f
1026	 nop
102710:	ldd		[%o0 + 0xd0], %f56
1028	ldd		[%o0 + 0xd8], %f58
1029	ldd		[%o0 + 0xe0], %f60
1030	ldd		[%o0 + 0xe8], %f62
1031	ldx		[%o1 + 0x00], %g3
1032	ldx		[%o1 + 0x08], %g7
1033	xor		%g1, %g3, %g3
1034	xor		%g2, %g7, %g7
1035	MOVXTOD_G3_F4
1036	MOVXTOD_G7_F6
1037	ENCRYPT_256(8, 4, 6, 0, 2)
1038	std		%f4, [%o2 + 0x00]
1039	std		%f6, [%o2 + 0x08]
104011:	retl
1041	 nop
1042ENDPROC(aes_sparc64_ecb_encrypt_256)
1043
1044	.align		32
1045ENTRY(aes_sparc64_ecb_decrypt_128)
1046	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
1047	ldx		[%o0 - 0x10], %g1
1048	subcc		%o3, 0x10, %o3
1049	be		10f
1050	 ldx		[%o0 - 0x08], %g2
10511:	ldx		[%o1 + 0x00], %g3
1052	ldx		[%o1 + 0x08], %g7
1053	ldx		[%o1 + 0x10], %o4
1054	ldx		[%o1 + 0x18], %o5
1055	xor		%g1, %g3, %g3
1056	xor		%g2, %g7, %g7
1057	MOVXTOD_G3_F4
1058	MOVXTOD_G7_F6
1059	xor		%g1, %o4, %g3
1060	xor		%g2, %o5, %g7
1061	MOVXTOD_G3_F60
1062	MOVXTOD_G7_F62
1063	DECRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
1064	std		%f4, [%o2 + 0x00]
1065	std		%f6, [%o2 + 0x08]
1066	std		%f60, [%o2 + 0x10]
1067	std		%f62, [%o2 + 0x18]
1068	sub		%o3, 0x20, %o3
1069	add		%o1, 0x20, %o1
1070	brgz,pt		%o3, 1b
1071	 add		%o2, 0x20, %o2
1072	brlz,pt		%o3, 11f
1073	 nop
107410:	ldx		[%o1 + 0x00], %g3
1075	ldx		[%o1 + 0x08], %g7
1076	xor		%g1, %g3, %g3
1077	xor		%g2, %g7, %g7
1078	MOVXTOD_G3_F4
1079	MOVXTOD_G7_F6
1080	DECRYPT_128(8, 4, 6, 0, 2)
1081	std		%f4, [%o2 + 0x00]
1082	std		%f6, [%o2 + 0x08]
108311:	retl
1084	 nop
1085ENDPROC(aes_sparc64_ecb_decrypt_128)
1086
1087	.align		32
1088ENTRY(aes_sparc64_ecb_decrypt_192)
1089	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
1090	ldx		[%o0 - 0x10], %g1
1091	subcc		%o3, 0x10, %o3
1092	be		10f
1093	 ldx		[%o0 - 0x08], %g2
10941:	ldx		[%o1 + 0x00], %g3
1095	ldx		[%o1 + 0x08], %g7
1096	ldx		[%o1 + 0x10], %o4
1097	ldx		[%o1 + 0x18], %o5
1098	xor		%g1, %g3, %g3
1099	xor		%g2, %g7, %g7
1100	MOVXTOD_G3_F4
1101	MOVXTOD_G7_F6
1102	xor		%g1, %o4, %g3
1103	xor		%g2, %o5, %g7
1104	MOVXTOD_G3_F60
1105	MOVXTOD_G7_F62
1106	DECRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
1107	std		%f4, [%o2 + 0x00]
1108	std		%f6, [%o2 + 0x08]
1109	std		%f60, [%o2 + 0x10]
1110	std		%f62, [%o2 + 0x18]
1111	sub		%o3, 0x20, %o3
1112	add		%o1, 0x20, %o1
1113	brgz,pt		%o3, 1b
1114	 add		%o2, 0x20, %o2
1115	brlz,pt		%o3, 11f
1116	 nop
111710:	ldx		[%o1 + 0x00], %g3
1118	ldx		[%o1 + 0x08], %g7
1119	xor		%g1, %g3, %g3
1120	xor		%g2, %g7, %g7
1121	MOVXTOD_G3_F4
1122	MOVXTOD_G7_F6
1123	DECRYPT_192(8, 4, 6, 0, 2)
1124	std		%f4, [%o2 + 0x00]
1125	std		%f6, [%o2 + 0x08]
112611:	retl
1127	 nop
1128ENDPROC(aes_sparc64_ecb_decrypt_192)
1129
1130	.align		32
1131ENTRY(aes_sparc64_ecb_decrypt_256)
1132	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
1133	ldx		[%o0 - 0x10], %g1
1134	subcc		%o3, 0x10, %o3
1135	ldx		[%o0 - 0x08], %g2
1136	be		10f
1137	 sub		%o0, 0xf0, %o0
11381:	ldx		[%o1 + 0x00], %g3
1139	ldx		[%o1 + 0x08], %g7
1140	ldx		[%o1 + 0x10], %o4
1141	ldx		[%o1 + 0x18], %o5
1142	xor		%g1, %g3, %g3
1143	xor		%g2, %g7, %g7
1144	MOVXTOD_G3_F4
1145	MOVXTOD_G7_F6
1146	xor		%g1, %o4, %g3
1147	xor		%g2, %o5, %g7
1148	MOVXTOD_G3_F0
1149	MOVXTOD_G7_F2
1150	DECRYPT_256_2(8, 4, 6, 0, 2)
1151	std		%f4, [%o2 + 0x00]
1152	std		%f6, [%o2 + 0x08]
1153	std		%f0, [%o2 + 0x10]
1154	std		%f2, [%o2 + 0x18]
1155	sub		%o3, 0x20, %o3
1156	add		%o1, 0x20, %o1
1157	brgz,pt		%o3, 1b
1158	 add		%o2, 0x20, %o2
1159	brlz,pt		%o3, 11f
1160	 nop
116110:	ldd		[%o0 + 0x18], %f56
1162	ldd		[%o0 + 0x10], %f58
1163	ldd		[%o0 + 0x08], %f60
1164	ldd		[%o0 + 0x00], %f62
1165	ldx		[%o1 + 0x00], %g3
1166	ldx		[%o1 + 0x08], %g7
1167	xor		%g1, %g3, %g3
1168	xor		%g2, %g7, %g7
1169	MOVXTOD_G3_F4
1170	MOVXTOD_G7_F6
1171	DECRYPT_256(8, 4, 6, 0, 2)
1172	std		%f4, [%o2 + 0x00]
1173	std		%f6, [%o2 + 0x08]
117411:	retl
1175	 nop
1176ENDPROC(aes_sparc64_ecb_decrypt_256)
1177
1178	.align		32
1179ENTRY(aes_sparc64_cbc_encrypt_128)
1180	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1181	ldd		[%o4 + 0x00], %f4
1182	ldd		[%o4 + 0x08], %f6
1183	ldx		[%o0 + 0x00], %g1
1184	ldx		[%o0 + 0x08], %g2
11851:	ldx		[%o1 + 0x00], %g3
1186	ldx		[%o1 + 0x08], %g7
1187	add		%o1, 0x10, %o1
1188	xor		%g1, %g3, %g3
1189	xor		%g2, %g7, %g7
1190	MOVXTOD_G3_F0
1191	MOVXTOD_G7_F2
1192	fxor		%f4, %f0, %f4
1193	fxor		%f6, %f2, %f6
1194	ENCRYPT_128(8, 4, 6, 0, 2)
1195	std		%f4, [%o2 + 0x00]
1196	std		%f6, [%o2 + 0x08]
1197	subcc		%o3, 0x10, %o3
1198	bne,pt		%xcc, 1b
1199	 add		%o2, 0x10, %o2
1200	std		%f4, [%o4 + 0x00]
1201	std		%f6, [%o4 + 0x08]
1202	retl
1203	 nop
1204ENDPROC(aes_sparc64_cbc_encrypt_128)
1205
1206	.align		32
1207ENTRY(aes_sparc64_cbc_encrypt_192)
1208	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1209	ldd		[%o4 + 0x00], %f4
1210	ldd		[%o4 + 0x08], %f6
1211	ldx		[%o0 + 0x00], %g1
1212	ldx		[%o0 + 0x08], %g2
12131:	ldx		[%o1 + 0x00], %g3
1214	ldx		[%o1 + 0x08], %g7
1215	add		%o1, 0x10, %o1
1216	xor		%g1, %g3, %g3
1217	xor		%g2, %g7, %g7
1218	MOVXTOD_G3_F0
1219	MOVXTOD_G7_F2
1220	fxor		%f4, %f0, %f4
1221	fxor		%f6, %f2, %f6
1222	ENCRYPT_192(8, 4, 6, 0, 2)
1223	std		%f4, [%o2 + 0x00]
1224	std		%f6, [%o2 + 0x08]
1225	subcc		%o3, 0x10, %o3
1226	bne,pt		%xcc, 1b
1227	 add		%o2, 0x10, %o2
1228	std		%f4, [%o4 + 0x00]
1229	std		%f6, [%o4 + 0x08]
1230	retl
1231	 nop
1232ENDPROC(aes_sparc64_cbc_encrypt_192)
1233
1234	.align		32
1235ENTRY(aes_sparc64_cbc_encrypt_256)
1236	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1237	ldd		[%o4 + 0x00], %f4
1238	ldd		[%o4 + 0x08], %f6
1239	ldx		[%o0 + 0x00], %g1
1240	ldx		[%o0 + 0x08], %g2
12411:	ldx		[%o1 + 0x00], %g3
1242	ldx		[%o1 + 0x08], %g7
1243	add		%o1, 0x10, %o1
1244	xor		%g1, %g3, %g3
1245	xor		%g2, %g7, %g7
1246	MOVXTOD_G3_F0
1247	MOVXTOD_G7_F2
1248	fxor		%f4, %f0, %f4
1249	fxor		%f6, %f2, %f6
1250	ENCRYPT_256(8, 4, 6, 0, 2)
1251	std		%f4, [%o2 + 0x00]
1252	std		%f6, [%o2 + 0x08]
1253	subcc		%o3, 0x10, %o3
1254	bne,pt		%xcc, 1b
1255	 add		%o2, 0x10, %o2
1256	std		%f4, [%o4 + 0x00]
1257	std		%f6, [%o4 + 0x08]
1258	retl
1259	 nop
1260ENDPROC(aes_sparc64_cbc_encrypt_256)
1261
1262	.align		32
1263ENTRY(aes_sparc64_cbc_decrypt_128)
1264	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
1265	ldx		[%o0 - 0x10], %g1
1266	ldx		[%o0 - 0x08], %g2
1267	ldx		[%o4 + 0x00], %o0
1268	ldx		[%o4 + 0x08], %o5
12691:	ldx		[%o1 + 0x00], %g3
1270	ldx		[%o1 + 0x08], %g7
1271	add		%o1, 0x10, %o1
1272	xor		%g1, %g3, %g3
1273	xor		%g2, %g7, %g7
1274	MOVXTOD_G3_F4
1275	MOVXTOD_G7_F6
1276	DECRYPT_128(8, 4, 6, 0, 2)
1277	MOVXTOD_O0_F0
1278	MOVXTOD_O5_F2
1279	xor		%g1, %g3, %o0
1280	xor		%g2, %g7, %o5
1281	fxor		%f4, %f0, %f4
1282	fxor		%f6, %f2, %f6
1283	std		%f4, [%o2 + 0x00]
1284	std		%f6, [%o2 + 0x08]
1285	subcc		%o3, 0x10, %o3
1286	bne,pt		%xcc, 1b
1287	 add		%o2, 0x10, %o2
1288	stx		%o0, [%o4 + 0x00]
1289	stx		%o5, [%o4 + 0x08]
1290	retl
1291	 nop
1292ENDPROC(aes_sparc64_cbc_decrypt_128)
1293
1294	.align		32
1295ENTRY(aes_sparc64_cbc_decrypt_192)
1296	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
1297	ldx		[%o0 - 0x10], %g1
1298	ldx		[%o0 - 0x08], %g2
1299	ldx		[%o4 + 0x00], %o0
1300	ldx		[%o4 + 0x08], %o5
13011:	ldx		[%o1 + 0x00], %g3
1302	ldx		[%o1 + 0x08], %g7
1303	add		%o1, 0x10, %o1
1304	xor		%g1, %g3, %g3
1305	xor		%g2, %g7, %g7
1306	MOVXTOD_G3_F4
1307	MOVXTOD_G7_F6
1308	DECRYPT_192(8, 4, 6, 0, 2)
1309	MOVXTOD_O0_F0
1310	MOVXTOD_O5_F2
1311	xor		%g1, %g3, %o0
1312	xor		%g2, %g7, %o5
1313	fxor		%f4, %f0, %f4
1314	fxor		%f6, %f2, %f6
1315	std		%f4, [%o2 + 0x00]
1316	std		%f6, [%o2 + 0x08]
1317	subcc		%o3, 0x10, %o3
1318	bne,pt		%xcc, 1b
1319	 add		%o2, 0x10, %o2
1320	stx		%o0, [%o4 + 0x00]
1321	stx		%o5, [%o4 + 0x08]
1322	retl
1323	 nop
1324ENDPROC(aes_sparc64_cbc_decrypt_192)
1325
1326	.align		32
1327ENTRY(aes_sparc64_cbc_decrypt_256)
1328	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
1329	ldx		[%o0 - 0x10], %g1
1330	ldx		[%o0 - 0x08], %g2
1331	ldx		[%o4 + 0x00], %o0
1332	ldx		[%o4 + 0x08], %o5
13331:	ldx		[%o1 + 0x00], %g3
1334	ldx		[%o1 + 0x08], %g7
1335	add		%o1, 0x10, %o1
1336	xor		%g1, %g3, %g3
1337	xor		%g2, %g7, %g7
1338	MOVXTOD_G3_F4
1339	MOVXTOD_G7_F6
1340	DECRYPT_256(8, 4, 6, 0, 2)
1341	MOVXTOD_O0_F0
1342	MOVXTOD_O5_F2
1343	xor		%g1, %g3, %o0
1344	xor		%g2, %g7, %o5
1345	fxor		%f4, %f0, %f4
1346	fxor		%f6, %f2, %f6
1347	std		%f4, [%o2 + 0x00]
1348	std		%f6, [%o2 + 0x08]
1349	subcc		%o3, 0x10, %o3
1350	bne,pt		%xcc, 1b
1351	 add		%o2, 0x10, %o2
1352	stx		%o0, [%o4 + 0x00]
1353	stx		%o5, [%o4 + 0x08]
1354	retl
1355	 nop
1356ENDPROC(aes_sparc64_cbc_decrypt_256)
1357
1358	.align		32
1359ENTRY(aes_sparc64_ctr_crypt_128)
1360	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1361	ldx		[%o4 + 0x00], %g3
1362	ldx		[%o4 + 0x08], %g7
1363	subcc		%o3, 0x10, %o3
1364	ldx		[%o0 + 0x00], %g1
1365	be		10f
1366	 ldx		[%o0 + 0x08], %g2
13671:	xor		%g1, %g3, %o5
1368	MOVXTOD_O5_F0
1369	xor		%g2, %g7, %o5
1370	MOVXTOD_O5_F2
1371	add		%g7, 1, %g7
1372	add		%g3, 1, %o5
1373	movrz		%g7, %o5, %g3
1374	xor		%g1, %g3, %o5
1375	MOVXTOD_O5_F4
1376	xor		%g2, %g7, %o5
1377	MOVXTOD_O5_F6
1378	add		%g7, 1, %g7
1379	add		%g3, 1, %o5
1380	movrz		%g7, %o5, %g3
1381	ENCRYPT_128_2(8, 0, 2, 4, 6, 56, 58, 60, 62)
1382	ldd		[%o1 + 0x00], %f56
1383	ldd		[%o1 + 0x08], %f58
1384	ldd		[%o1 + 0x10], %f60
1385	ldd		[%o1 + 0x18], %f62
1386	fxor		%f56, %f0, %f56
1387	fxor		%f58, %f2, %f58
1388	fxor		%f60, %f4, %f60
1389	fxor		%f62, %f6, %f62
1390	std		%f56, [%o2 + 0x00]
1391	std		%f58, [%o2 + 0x08]
1392	std		%f60, [%o2 + 0x10]
1393	std		%f62, [%o2 + 0x18]
1394	subcc		%o3, 0x20, %o3
1395	add		%o1, 0x20, %o1
1396	brgz		%o3, 1b
1397	 add		%o2, 0x20, %o2
1398	brlz,pt		%o3, 11f
1399	 nop
140010:	xor		%g1, %g3, %o5
1401	MOVXTOD_O5_F0
1402	xor		%g2, %g7, %o5
1403	MOVXTOD_O5_F2
1404	add		%g7, 1, %g7
1405	add		%g3, 1, %o5
1406	movrz		%g7, %o5, %g3
1407	ENCRYPT_128(8, 0, 2, 4, 6)
1408	ldd		[%o1 + 0x00], %f4
1409	ldd		[%o1 + 0x08], %f6
1410	fxor		%f4, %f0, %f4
1411	fxor		%f6, %f2, %f6
1412	std		%f4, [%o2 + 0x00]
1413	std		%f6, [%o2 + 0x08]
141411:	stx		%g3, [%o4 + 0x00]
1415	retl
1416	 stx		%g7, [%o4 + 0x08]
1417ENDPROC(aes_sparc64_ctr_crypt_128)
1418
1419	.align		32
1420ENTRY(aes_sparc64_ctr_crypt_192)
1421	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1422	ldx		[%o4 + 0x00], %g3
1423	ldx		[%o4 + 0x08], %g7
1424	subcc		%o3, 0x10, %o3
1425	ldx		[%o0 + 0x00], %g1
1426	be		10f
1427	 ldx		[%o0 + 0x08], %g2
14281:	xor		%g1, %g3, %o5
1429	MOVXTOD_O5_F0
1430	xor		%g2, %g7, %o5
1431	MOVXTOD_O5_F2
1432	add		%g7, 1, %g7
1433	add		%g3, 1, %o5
1434	movrz		%g7, %o5, %g3
1435	xor		%g1, %g3, %o5
1436	MOVXTOD_O5_F4
1437	xor		%g2, %g7, %o5
1438	MOVXTOD_O5_F6
1439	add		%g7, 1, %g7
1440	add		%g3, 1, %o5
1441	movrz		%g7, %o5, %g3
1442	ENCRYPT_192_2(8, 0, 2, 4, 6, 56, 58, 60, 62)
1443	ldd		[%o1 + 0x00], %f56
1444	ldd		[%o1 + 0x08], %f58
1445	ldd		[%o1 + 0x10], %f60
1446	ldd		[%o1 + 0x18], %f62
1447	fxor		%f56, %f0, %f56
1448	fxor		%f58, %f2, %f58
1449	fxor		%f60, %f4, %f60
1450	fxor		%f62, %f6, %f62
1451	std		%f56, [%o2 + 0x00]
1452	std		%f58, [%o2 + 0x08]
1453	std		%f60, [%o2 + 0x10]
1454	std		%f62, [%o2 + 0x18]
1455	subcc		%o3, 0x20, %o3
1456	add		%o1, 0x20, %o1
1457	brgz		%o3, 1b
1458	 add		%o2, 0x20, %o2
1459	brlz,pt		%o3, 11f
1460	 nop
146110:	xor		%g1, %g3, %o5
1462	MOVXTOD_O5_F0
1463	xor		%g2, %g7, %o5
1464	MOVXTOD_O5_F2
1465	add		%g7, 1, %g7
1466	add		%g3, 1, %o5
1467	movrz		%g7, %o5, %g3
1468	ENCRYPT_192(8, 0, 2, 4, 6)
1469	ldd		[%o1 + 0x00], %f4
1470	ldd		[%o1 + 0x08], %f6
1471	fxor		%f4, %f0, %f4
1472	fxor		%f6, %f2, %f6
1473	std		%f4, [%o2 + 0x00]
1474	std		%f6, [%o2 + 0x08]
147511:	stx		%g3, [%o4 + 0x00]
1476	retl
1477	 stx		%g7, [%o4 + 0x08]
1478ENDPROC(aes_sparc64_ctr_crypt_192)
1479
1480	.align		32
1481ENTRY(aes_sparc64_ctr_crypt_256)
1482	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1483	ldx		[%o4 + 0x00], %g3
1484	ldx		[%o4 + 0x08], %g7
1485	subcc		%o3, 0x10, %o3
1486	ldx		[%o0 + 0x00], %g1
1487	be		10f
1488	 ldx		[%o0 + 0x08], %g2
14891:	xor		%g1, %g3, %o5
1490	MOVXTOD_O5_F0
1491	xor		%g2, %g7, %o5
1492	MOVXTOD_O5_F2
1493	add		%g7, 1, %g7
1494	add		%g3, 1, %o5
1495	movrz		%g7, %o5, %g3
1496	xor		%g1, %g3, %o5
1497	MOVXTOD_O5_F4
1498	xor		%g2, %g7, %o5
1499	MOVXTOD_O5_F6
1500	add		%g7, 1, %g7
1501	add		%g3, 1, %o5
1502	movrz		%g7, %o5, %g3
1503	ENCRYPT_256_2(8, 0, 2, 4, 6)
1504	ldd		[%o1 + 0x00], %f56
1505	ldd		[%o1 + 0x08], %f58
1506	ldd		[%o1 + 0x10], %f60
1507	ldd		[%o1 + 0x18], %f62
1508	fxor		%f56, %f0, %f56
1509	fxor		%f58, %f2, %f58
1510	fxor		%f60, %f4, %f60
1511	fxor		%f62, %f6, %f62
1512	std		%f56, [%o2 + 0x00]
1513	std		%f58, [%o2 + 0x08]
1514	std		%f60, [%o2 + 0x10]
1515	std		%f62, [%o2 + 0x18]
1516	subcc		%o3, 0x20, %o3
1517	add		%o1, 0x20, %o1
1518	brgz		%o3, 1b
1519	 add		%o2, 0x20, %o2
1520	brlz,pt		%o3, 11f
1521	 nop
152210:	ldd		[%o0 + 0xd0], %f56
1523	ldd		[%o0 + 0xd8], %f58
1524	ldd		[%o0 + 0xe0], %f60
1525	ldd		[%o0 + 0xe8], %f62
1526	xor		%g1, %g3, %o5
1527	MOVXTOD_O5_F0
1528	xor		%g2, %g7, %o5
1529	MOVXTOD_O5_F2
1530	add		%g7, 1, %g7
1531	add		%g3, 1, %o5
1532	movrz		%g7, %o5, %g3
1533	ENCRYPT_256(8, 0, 2, 4, 6)
1534	ldd		[%o1 + 0x00], %f4
1535	ldd		[%o1 + 0x08], %f6
1536	fxor		%f4, %f0, %f4
1537	fxor		%f6, %f2, %f6
1538	std		%f4, [%o2 + 0x00]
1539	std		%f6, [%o2 + 0x08]
154011:	stx		%g3, [%o4 + 0x00]
1541	retl
1542	 stx		%g7, [%o4 + 0x08]
1543ENDPROC(aes_sparc64_ctr_crypt_256)
1544