xref: /openbmc/linux/arch/sparc/crypto/aes_asm.S (revision e1858b2a)
1#include <linux/linkage.h>
2#include <asm/visasm.h>
3
4#include "opcodes.h"
5
6#define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
7	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
8	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
9	AES_EROUND01(KEY_BASE +  4, T0, T1, I0) \
10	AES_EROUND23(KEY_BASE +  6, T0, T1, I1)
11
12#define ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
13	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
14	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
15	AES_EROUND01(KEY_BASE +  0, I2, I3, T2) \
16	AES_EROUND23(KEY_BASE +  2, I2, I3, T3) \
17	AES_EROUND01(KEY_BASE +  4, T0, T1, I0) \
18	AES_EROUND23(KEY_BASE +  6, T0, T1, I1) \
19	AES_EROUND01(KEY_BASE +  4, T2, T3, I2) \
20	AES_EROUND23(KEY_BASE +  6, T2, T3, I3)
21
22#define ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
23	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
24	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
25	AES_EROUND01_L(KEY_BASE +  4, T0, T1, I0) \
26	AES_EROUND23_L(KEY_BASE +  6, T0, T1, I1)
27
28#define ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
29	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
30	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
31	AES_EROUND01(KEY_BASE +  0, I2, I3, T2) \
32	AES_EROUND23(KEY_BASE +  2, I2, I3, T3) \
33	AES_EROUND01_L(KEY_BASE +  4, T0, T1, I0) \
34	AES_EROUND23_L(KEY_BASE +  6, T0, T1, I1) \
35	AES_EROUND01_L(KEY_BASE +  4, T2, T3, I2) \
36	AES_EROUND23_L(KEY_BASE +  6, T2, T3, I3)
37
38	/* 10 rounds */
39#define ENCRYPT_128(KEY_BASE, I0, I1, T0, T1) \
40	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
41	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
42	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
43	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
44	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
45
46#define ENCRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
47	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
48	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
49	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
50	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
51	ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
52
53	/* 12 rounds */
54#define ENCRYPT_192(KEY_BASE, I0, I1, T0, T1) \
55	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
56	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
57	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
58	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
59	ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
60	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
61
62#define ENCRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
63	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
64	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
65	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
66	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
67	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
68	ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
69
70	/* 14 rounds */
71#define ENCRYPT_256(KEY_BASE, I0, I1, T0, T1) \
72	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
73	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
74	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
75	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
76	ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
77	ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
78	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
79
80#define ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
81	ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
82			     TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
83
84#define ENCRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
85	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, KEY_BASE + 48) \
86	ldd	[%o0 + 0xd0], %f56; \
87	ldd	[%o0 + 0xd8], %f58; \
88	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, KEY_BASE +  0) \
89	ldd	[%o0 + 0xe0], %f60; \
90	ldd	[%o0 + 0xe8], %f62; \
91	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE +  0) \
92	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE +  0) \
93	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE +  0) \
94	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE +  0) \
95	AES_EROUND01(KEY_BASE +  48, I0, I1, KEY_BASE + 0) \
96	AES_EROUND23(KEY_BASE +  50, I0, I1, KEY_BASE + 2) \
97	AES_EROUND01(KEY_BASE +  48, I2, I3, KEY_BASE + 4) \
98	AES_EROUND23(KEY_BASE +  50, I2, I3, KEY_BASE + 6) \
99	AES_EROUND01_L(KEY_BASE +  52, KEY_BASE + 0, KEY_BASE + 2, I0) \
100	AES_EROUND23_L(KEY_BASE +  54, KEY_BASE + 0, KEY_BASE + 2, I1) \
101	ldd	[%o0 + 0x10], %f8; \
102	ldd	[%o0 + 0x18], %f10; \
103	AES_EROUND01_L(KEY_BASE +  52, KEY_BASE + 4, KEY_BASE + 6, I2) \
104	AES_EROUND23_L(KEY_BASE +  54, KEY_BASE + 4, KEY_BASE + 6, I3) \
105	ldd	[%o0 + 0x20], %f12; \
106	ldd	[%o0 + 0x28], %f14;
107
108#define DECRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
109	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
110	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
111	AES_DROUND23(KEY_BASE +  4, T0, T1, I1) \
112	AES_DROUND01(KEY_BASE +  6, T0, T1, I0)
113
114#define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
115	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
116	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
117	AES_DROUND23(KEY_BASE +  0, I2, I3, T3) \
118	AES_DROUND01(KEY_BASE +  2, I2, I3, T2) \
119	AES_DROUND23(KEY_BASE +  4, T0, T1, I1) \
120	AES_DROUND01(KEY_BASE +  6, T0, T1, I0) \
121	AES_DROUND23(KEY_BASE +  4, T2, T3, I3) \
122	AES_DROUND01(KEY_BASE +  6, T2, T3, I2)
123
124#define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
125	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
126	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
127	AES_DROUND23_L(KEY_BASE +  4, T0, T1, I1) \
128	AES_DROUND01_L(KEY_BASE +  6, T0, T1, I0)
129
130#define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
131	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
132	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
133	AES_DROUND23(KEY_BASE +  0, I2, I3, T3) \
134	AES_DROUND01(KEY_BASE +  2, I2, I3, T2) \
135	AES_DROUND23_L(KEY_BASE +  4, T0, T1, I1) \
136	AES_DROUND01_L(KEY_BASE +  6, T0, T1, I0) \
137	AES_DROUND23_L(KEY_BASE +  4, T2, T3, I3) \
138	AES_DROUND01_L(KEY_BASE +  6, T2, T3, I2)
139
140	/* 10 rounds */
141#define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \
142	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
143	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
144	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
145	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
146	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
147
148#define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
149	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
150	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
151	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
152	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
153	DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
154
155	/* 12 rounds */
156#define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \
157	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
158	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
159	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
160	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
161	DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
162	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
163
164#define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
165	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
166	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
167	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
168	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
169	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
170	DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
171
172	/* 14 rounds */
173#define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \
174	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
175	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
176	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
177	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
178	DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
179	DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
180	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
181
182#define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
183	DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
184			     TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
185
186#define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
187	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, KEY_BASE + 48) \
188	ldd	[%o0 + 0x18], %f56; \
189	ldd	[%o0 + 0x10], %f58; \
190	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, KEY_BASE +  0) \
191	ldd	[%o0 + 0x08], %f60; \
192	ldd	[%o0 + 0x00], %f62; \
193	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE +  0) \
194	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE +  0) \
195	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE +  0) \
196	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE +  0) \
197	AES_DROUND23(KEY_BASE +  48, I0, I1, KEY_BASE + 2) \
198	AES_DROUND01(KEY_BASE +  50, I0, I1, KEY_BASE + 0) \
199	AES_DROUND23(KEY_BASE +  48, I2, I3, KEY_BASE + 6) \
200	AES_DROUND01(KEY_BASE +  50, I2, I3, KEY_BASE + 4) \
201	AES_DROUND23_L(KEY_BASE +  52, KEY_BASE + 0, KEY_BASE + 2, I1) \
202	AES_DROUND01_L(KEY_BASE +  54, KEY_BASE + 0, KEY_BASE + 2, I0) \
203	ldd	[%o0 + 0xd8], %f8; \
204	ldd	[%o0 + 0xd0], %f10; \
205	AES_DROUND23_L(KEY_BASE +  52, KEY_BASE + 4, KEY_BASE + 6, I3) \
206	AES_DROUND01_L(KEY_BASE +  54, KEY_BASE + 4, KEY_BASE + 6, I2) \
207	ldd	[%o0 + 0xc8], %f12; \
208	ldd	[%o0 + 0xc0], %f14;
209
210	.align	32
211ENTRY(aes_sparc64_key_expand)
212	/* %o0=input_key, %o1=output_key, %o2=key_len */
213	VISEntry
214	ld	[%o0 + 0x00], %f0
215	ld	[%o0 + 0x04], %f1
216	ld	[%o0 + 0x08], %f2
217	ld	[%o0 + 0x0c], %f3
218
219	std	%f0, [%o1 + 0x00]
220	std	%f2, [%o1 + 0x08]
221	add	%o1, 0x10, %o1
222
223	cmp	%o2, 24
224	bl	2f
225	 nop
226
227	be	1f
228	 nop
229
230	/* 256-bit key expansion */
231	ld	[%o0 + 0x10], %f4
232	ld	[%o0 + 0x14], %f5
233	ld	[%o0 + 0x18], %f6
234	ld	[%o0 + 0x1c], %f7
235
236	std	%f4, [%o1 + 0x00]
237	std	%f6, [%o1 + 0x08]
238	add	%o1, 0x10, %o1
239
240	AES_KEXPAND1(0, 6, 0x0, 8)
241	AES_KEXPAND2(2, 8, 10)
242	AES_KEXPAND0(4, 10, 12)
243	AES_KEXPAND2(6, 12, 14)
244	AES_KEXPAND1(8, 14, 0x1, 16)
245	AES_KEXPAND2(10, 16, 18)
246	AES_KEXPAND0(12, 18, 20)
247	AES_KEXPAND2(14, 20, 22)
248	AES_KEXPAND1(16, 22, 0x2, 24)
249	AES_KEXPAND2(18, 24, 26)
250	AES_KEXPAND0(20, 26, 28)
251	AES_KEXPAND2(22, 28, 30)
252	AES_KEXPAND1(24, 30, 0x3, 32)
253	AES_KEXPAND2(26, 32, 34)
254	AES_KEXPAND0(28, 34, 36)
255	AES_KEXPAND2(30, 36, 38)
256	AES_KEXPAND1(32, 38, 0x4, 40)
257	AES_KEXPAND2(34, 40, 42)
258	AES_KEXPAND0(36, 42, 44)
259	AES_KEXPAND2(38, 44, 46)
260	AES_KEXPAND1(40, 46, 0x5, 48)
261	AES_KEXPAND2(42, 48, 50)
262	AES_KEXPAND0(44, 50, 52)
263	AES_KEXPAND2(46, 52, 54)
264	AES_KEXPAND1(48, 54, 0x6, 56)
265	AES_KEXPAND2(50, 56, 58)
266
267	std	%f8, [%o1 + 0x00]
268	std	%f10, [%o1 + 0x08]
269	std	%f12, [%o1 + 0x10]
270	std	%f14, [%o1 + 0x18]
271	std	%f16, [%o1 + 0x20]
272	std	%f18, [%o1 + 0x28]
273	std	%f20, [%o1 + 0x30]
274	std	%f22, [%o1 + 0x38]
275	std	%f24, [%o1 + 0x40]
276	std	%f26, [%o1 + 0x48]
277	std	%f28, [%o1 + 0x50]
278	std	%f30, [%o1 + 0x58]
279	std	%f32, [%o1 + 0x60]
280	std	%f34, [%o1 + 0x68]
281	std	%f36, [%o1 + 0x70]
282	std	%f38, [%o1 + 0x78]
283	std	%f40, [%o1 + 0x80]
284	std	%f42, [%o1 + 0x88]
285	std	%f44, [%o1 + 0x90]
286	std	%f46, [%o1 + 0x98]
287	std	%f48, [%o1 + 0xa0]
288	std	%f50, [%o1 + 0xa8]
289	std	%f52, [%o1 + 0xb0]
290	std	%f54, [%o1 + 0xb8]
291	std	%f56, [%o1 + 0xc0]
292	ba,pt	%xcc, 80f
293	 std	%f58, [%o1 + 0xc8]
294
2951:
296	/* 192-bit key expansion */
297	ld	[%o0 + 0x10], %f4
298	ld	[%o0 + 0x14], %f5
299
300	std	%f4, [%o1 + 0x00]
301	add	%o1, 0x08, %o1
302
303	AES_KEXPAND1(0, 4, 0x0, 6)
304	AES_KEXPAND2(2, 6, 8)
305	AES_KEXPAND2(4, 8, 10)
306	AES_KEXPAND1(6, 10, 0x1, 12)
307	AES_KEXPAND2(8, 12, 14)
308	AES_KEXPAND2(10, 14, 16)
309	AES_KEXPAND1(12, 16, 0x2, 18)
310	AES_KEXPAND2(14, 18, 20)
311	AES_KEXPAND2(16, 20, 22)
312	AES_KEXPAND1(18, 22, 0x3, 24)
313	AES_KEXPAND2(20, 24, 26)
314	AES_KEXPAND2(22, 26, 28)
315	AES_KEXPAND1(24, 28, 0x4, 30)
316	AES_KEXPAND2(26, 30, 32)
317	AES_KEXPAND2(28, 32, 34)
318	AES_KEXPAND1(30, 34, 0x5, 36)
319	AES_KEXPAND2(32, 36, 38)
320	AES_KEXPAND2(34, 38, 40)
321	AES_KEXPAND1(36, 40, 0x6, 42)
322	AES_KEXPAND2(38, 42, 44)
323	AES_KEXPAND2(40, 44, 46)
324	AES_KEXPAND1(42, 46, 0x7, 48)
325	AES_KEXPAND2(44, 48, 50)
326
327	std	%f6, [%o1 + 0x00]
328	std	%f8, [%o1 + 0x08]
329	std	%f10, [%o1 + 0x10]
330	std	%f12, [%o1 + 0x18]
331	std	%f14, [%o1 + 0x20]
332	std	%f16, [%o1 + 0x28]
333	std	%f18, [%o1 + 0x30]
334	std	%f20, [%o1 + 0x38]
335	std	%f22, [%o1 + 0x40]
336	std	%f24, [%o1 + 0x48]
337	std	%f26, [%o1 + 0x50]
338	std	%f28, [%o1 + 0x58]
339	std	%f30, [%o1 + 0x60]
340	std	%f32, [%o1 + 0x68]
341	std	%f34, [%o1 + 0x70]
342	std	%f36, [%o1 + 0x78]
343	std	%f38, [%o1 + 0x80]
344	std	%f40, [%o1 + 0x88]
345	std	%f42, [%o1 + 0x90]
346	std	%f44, [%o1 + 0x98]
347	std	%f46, [%o1 + 0xa0]
348	std	%f48, [%o1 + 0xa8]
349	ba,pt	%xcc, 80f
350	 std	%f50, [%o1 + 0xb0]
351
3522:
353	/* 128-bit key expansion */
354	AES_KEXPAND1(0, 2, 0x0, 4)
355	AES_KEXPAND2(2, 4, 6)
356	AES_KEXPAND1(4, 6, 0x1, 8)
357	AES_KEXPAND2(6, 8, 10)
358	AES_KEXPAND1(8, 10, 0x2, 12)
359	AES_KEXPAND2(10, 12, 14)
360	AES_KEXPAND1(12, 14, 0x3, 16)
361	AES_KEXPAND2(14, 16, 18)
362	AES_KEXPAND1(16, 18, 0x4, 20)
363	AES_KEXPAND2(18, 20, 22)
364	AES_KEXPAND1(20, 22, 0x5, 24)
365	AES_KEXPAND2(22, 24, 26)
366	AES_KEXPAND1(24, 26, 0x6, 28)
367	AES_KEXPAND2(26, 28, 30)
368	AES_KEXPAND1(28, 30, 0x7, 32)
369	AES_KEXPAND2(30, 32, 34)
370	AES_KEXPAND1(32, 34, 0x8, 36)
371	AES_KEXPAND2(34, 36, 38)
372	AES_KEXPAND1(36, 38, 0x9, 40)
373	AES_KEXPAND2(38, 40, 42)
374
375	std	%f4, [%o1 + 0x00]
376	std	%f6, [%o1 + 0x08]
377	std	%f8, [%o1 + 0x10]
378	std	%f10, [%o1 + 0x18]
379	std	%f12, [%o1 + 0x20]
380	std	%f14, [%o1 + 0x28]
381	std	%f16, [%o1 + 0x30]
382	std	%f18, [%o1 + 0x38]
383	std	%f20, [%o1 + 0x40]
384	std	%f22, [%o1 + 0x48]
385	std	%f24, [%o1 + 0x50]
386	std	%f26, [%o1 + 0x58]
387	std	%f28, [%o1 + 0x60]
388	std	%f30, [%o1 + 0x68]
389	std	%f32, [%o1 + 0x70]
390	std	%f34, [%o1 + 0x78]
391	std	%f36, [%o1 + 0x80]
392	std	%f38, [%o1 + 0x88]
393	std	%f40, [%o1 + 0x90]
394	std	%f42, [%o1 + 0x98]
39580:
396	retl
397	 VISExit
398ENDPROC(aes_sparc64_key_expand)
399
400	.align		32
401ENTRY(aes_sparc64_encrypt_128)
402	/* %o0=key, %o1=input, %o2=output */
403	VISEntry
404	ld		[%o1 + 0x00], %f4
405	ld		[%o1 + 0x04], %f5
406	ld		[%o1 + 0x08], %f6
407	ld		[%o1 + 0x0c], %f7
408	ldd		[%o0 + 0x00], %f8
409	ldd		[%o0 + 0x08], %f10
410	ldd		[%o0 + 0x10], %f12
411	ldd		[%o0 + 0x18], %f14
412	ldd		[%o0 + 0x20], %f16
413	ldd		[%o0 + 0x28], %f18
414	ldd		[%o0 + 0x30], %f20
415	ldd		[%o0 + 0x38], %f22
416	ldd		[%o0 + 0x40], %f24
417	ldd		[%o0 + 0x48], %f26
418	ldd		[%o0 + 0x50], %f28
419	ldd		[%o0 + 0x58], %f30
420	ldd		[%o0 + 0x60], %f32
421	ldd		[%o0 + 0x68], %f34
422	ldd		[%o0 + 0x70], %f36
423	ldd		[%o0 + 0x78], %f38
424	ldd		[%o0 + 0x80], %f40
425	ldd		[%o0 + 0x88], %f42
426	ldd		[%o0 + 0x90], %f44
427	ldd		[%o0 + 0x98], %f46
428	ldd		[%o0 + 0xa0], %f48
429	ldd		[%o0 + 0xa8], %f50
430	fxor		%f8, %f4, %f4
431	fxor		%f10, %f6, %f6
432	ENCRYPT_128(12, 4, 6, 0, 2)
433	st		%f4, [%o2 + 0x00]
434	st		%f5, [%o2 + 0x04]
435	st		%f6, [%o2 + 0x08]
436	st		%f7, [%o2 + 0x0c]
437	retl
438	 VISExit
439ENDPROC(aes_sparc64_encrypt_128)
440
441	.align		32
442ENTRY(aes_sparc64_encrypt_192)
443	/* %o0=key, %o1=input, %o2=output */
444	VISEntry
445	ld		[%o1 + 0x00], %f4
446	ld		[%o1 + 0x04], %f5
447	ld		[%o1 + 0x08], %f6
448	ld		[%o1 + 0x0c], %f7
449
450	ldd		[%o0 + 0x00], %f8
451	ldd		[%o0 + 0x08], %f10
452
453	fxor		%f8, %f4, %f4
454	fxor		%f10, %f6, %f6
455
456	ldd		[%o0 + 0x10], %f8
457	ldd		[%o0 + 0x18], %f10
458	ldd		[%o0 + 0x20], %f12
459	ldd		[%o0 + 0x28], %f14
460	add		%o0, 0x20, %o0
461
462	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
463
464	ldd		[%o0 + 0x10], %f12
465	ldd		[%o0 + 0x18], %f14
466	ldd		[%o0 + 0x20], %f16
467	ldd		[%o0 + 0x28], %f18
468	ldd		[%o0 + 0x30], %f20
469	ldd		[%o0 + 0x38], %f22
470	ldd		[%o0 + 0x40], %f24
471	ldd		[%o0 + 0x48], %f26
472	ldd		[%o0 + 0x50], %f28
473	ldd		[%o0 + 0x58], %f30
474	ldd		[%o0 + 0x60], %f32
475	ldd		[%o0 + 0x68], %f34
476	ldd		[%o0 + 0x70], %f36
477	ldd		[%o0 + 0x78], %f38
478	ldd		[%o0 + 0x80], %f40
479	ldd		[%o0 + 0x88], %f42
480	ldd		[%o0 + 0x90], %f44
481	ldd		[%o0 + 0x98], %f46
482	ldd		[%o0 + 0xa0], %f48
483	ldd		[%o0 + 0xa8], %f50
484
485
486	ENCRYPT_128(12, 4, 6, 0, 2)
487
488	st		%f4, [%o2 + 0x00]
489	st		%f5, [%o2 + 0x04]
490	st		%f6, [%o2 + 0x08]
491	st		%f7, [%o2 + 0x0c]
492
493	retl
494	 VISExit
495ENDPROC(aes_sparc64_encrypt_192)
496
497	.align		32
498ENTRY(aes_sparc64_encrypt_256)
499	/* %o0=key, %o1=input, %o2=output */
500	VISEntry
501	ld		[%o1 + 0x00], %f4
502	ld		[%o1 + 0x04], %f5
503	ld		[%o1 + 0x08], %f6
504	ld		[%o1 + 0x0c], %f7
505
506	ldd		[%o0 + 0x00], %f8
507	ldd		[%o0 + 0x08], %f10
508
509	fxor		%f8, %f4, %f4
510	fxor		%f10, %f6, %f6
511
512	ldd		[%o0 + 0x10], %f8
513
514	ldd		[%o0 + 0x18], %f10
515	ldd		[%o0 + 0x20], %f12
516	ldd		[%o0 + 0x28], %f14
517	add		%o0, 0x20, %o0
518
519	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
520
521	ldd		[%o0 + 0x10], %f8
522
523	ldd		[%o0 + 0x18], %f10
524	ldd		[%o0 + 0x20], %f12
525	ldd		[%o0 + 0x28], %f14
526	add		%o0, 0x20, %o0
527
528	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
529
530	ldd		[%o0 + 0x10], %f12
531	ldd		[%o0 + 0x18], %f14
532	ldd		[%o0 + 0x20], %f16
533	ldd		[%o0 + 0x28], %f18
534	ldd		[%o0 + 0x30], %f20
535	ldd		[%o0 + 0x38], %f22
536	ldd		[%o0 + 0x40], %f24
537	ldd		[%o0 + 0x48], %f26
538	ldd		[%o0 + 0x50], %f28
539	ldd		[%o0 + 0x58], %f30
540	ldd		[%o0 + 0x60], %f32
541	ldd		[%o0 + 0x68], %f34
542	ldd		[%o0 + 0x70], %f36
543	ldd		[%o0 + 0x78], %f38
544	ldd		[%o0 + 0x80], %f40
545	ldd		[%o0 + 0x88], %f42
546	ldd		[%o0 + 0x90], %f44
547	ldd		[%o0 + 0x98], %f46
548	ldd		[%o0 + 0xa0], %f48
549	ldd		[%o0 + 0xa8], %f50
550
551	ENCRYPT_128(12, 4, 6, 0, 2)
552
553	st		%f4, [%o2 + 0x00]
554	st		%f5, [%o2 + 0x04]
555	st		%f6, [%o2 + 0x08]
556	st		%f7, [%o2 + 0x0c]
557
558	retl
559	 VISExit
560ENDPROC(aes_sparc64_encrypt_256)
561
562	.align		32
563ENTRY(aes_sparc64_decrypt_128)
564	/* %o0=key, %o1=input, %o2=output */
565	VISEntry
566	ld		[%o1 + 0x00], %f4
567	ld		[%o1 + 0x04], %f5
568	ld		[%o1 + 0x08], %f6
569	ld		[%o1 + 0x0c], %f7
570	ldd		[%o0 + 0xa0], %f8
571	ldd		[%o0 + 0xa8], %f10
572	ldd		[%o0 + 0x98], %f12
573	ldd		[%o0 + 0x90], %f14
574	ldd		[%o0 + 0x88], %f16
575	ldd		[%o0 + 0x80], %f18
576	ldd		[%o0 + 0x78], %f20
577	ldd		[%o0 + 0x70], %f22
578	ldd		[%o0 + 0x68], %f24
579	ldd		[%o0 + 0x60], %f26
580	ldd		[%o0 + 0x58], %f28
581	ldd		[%o0 + 0x50], %f30
582	ldd		[%o0 + 0x48], %f32
583	ldd		[%o0 + 0x40], %f34
584	ldd		[%o0 + 0x38], %f36
585	ldd		[%o0 + 0x30], %f38
586	ldd		[%o0 + 0x28], %f40
587	ldd		[%o0 + 0x20], %f42
588	ldd		[%o0 + 0x18], %f44
589	ldd		[%o0 + 0x10], %f46
590	ldd		[%o0 + 0x08], %f48
591	ldd		[%o0 + 0x00], %f50
592	fxor		%f8, %f4, %f4
593	fxor		%f10, %f6, %f6
594	DECRYPT_128(12, 4, 6, 0, 2)
595	st		%f4, [%o2 + 0x00]
596	st		%f5, [%o2 + 0x04]
597	st		%f6, [%o2 + 0x08]
598	st		%f7, [%o2 + 0x0c]
599	retl
600	 VISExit
601ENDPROC(aes_sparc64_decrypt_128)
602
603	.align		32
604ENTRY(aes_sparc64_decrypt_192)
605	/* %o0=key, %o1=input, %o2=output */
606	VISEntry
607	ld		[%o1 + 0x00], %f4
608	ld		[%o1 + 0x04], %f5
609	ld		[%o1 + 0x08], %f6
610	ld		[%o1 + 0x0c], %f7
611	ldd		[%o0 + 0xc0], %f8
612	ldd		[%o0 + 0xc8], %f10
613	ldd		[%o0 + 0xb8], %f12
614	ldd		[%o0 + 0xb0], %f14
615	ldd		[%o0 + 0xa8], %f16
616	ldd		[%o0 + 0xa0], %f18
617	fxor		%f8, %f4, %f4
618	fxor		%f10, %f6, %f6
619	ldd		[%o0 + 0x98], %f20
620	ldd		[%o0 + 0x90], %f22
621	ldd		[%o0 + 0x88], %f24
622	ldd		[%o0 + 0x80], %f26
623	DECRYPT_TWO_ROUNDS(12, 4, 6, 0, 2)
624	ldd		[%o0 + 0x78], %f28
625	ldd		[%o0 + 0x70], %f30
626	ldd		[%o0 + 0x68], %f32
627	ldd		[%o0 + 0x60], %f34
628	ldd		[%o0 + 0x58], %f36
629	ldd		[%o0 + 0x50], %f38
630	ldd		[%o0 + 0x48], %f40
631	ldd		[%o0 + 0x40], %f42
632	ldd		[%o0 + 0x38], %f44
633	ldd		[%o0 + 0x30], %f46
634	ldd		[%o0 + 0x28], %f48
635	ldd		[%o0 + 0x20], %f50
636	ldd		[%o0 + 0x18], %f52
637	ldd		[%o0 + 0x10], %f54
638	ldd		[%o0 + 0x08], %f56
639	ldd		[%o0 + 0x00], %f58
640	DECRYPT_128(20, 4, 6, 0, 2)
641	st		%f4, [%o2 + 0x00]
642	st		%f5, [%o2 + 0x04]
643	st		%f6, [%o2 + 0x08]
644	st		%f7, [%o2 + 0x0c]
645	retl
646	 VISExit
647ENDPROC(aes_sparc64_decrypt_192)
648
649	.align		32
650ENTRY(aes_sparc64_decrypt_256)
651	/* %o0=key, %o1=input, %o2=output */
652	VISEntry
653	ld		[%o1 + 0x00], %f4
654	ld		[%o1 + 0x04], %f5
655	ld		[%o1 + 0x08], %f6
656	ld		[%o1 + 0x0c], %f7
657	ldd		[%o0 + 0xe0], %f8
658	ldd		[%o0 + 0xe8], %f10
659	ldd		[%o0 + 0xd8], %f12
660	ldd		[%o0 + 0xd0], %f14
661	ldd		[%o0 + 0xc8], %f16
662	fxor		%f8, %f4, %f4
663	ldd		[%o0 + 0xc0], %f18
664	fxor		%f10, %f6, %f6
665	ldd		[%o0 + 0xb8], %f20
666	AES_DROUND23(12, 4, 6, 2)
667	ldd		[%o0 + 0xb0], %f22
668	AES_DROUND01(14, 4, 6, 0)
669	ldd		[%o0 + 0xa8], %f24
670	AES_DROUND23(16, 0, 2, 6)
671	ldd		[%o0 + 0xa0], %f26
672	AES_DROUND01(18, 0, 2, 4)
673	ldd		[%o0 + 0x98], %f12
674	AES_DROUND23(20, 4, 6, 2)
675	ldd		[%o0 + 0x90], %f14
676	AES_DROUND01(22, 4, 6, 0)
677	ldd		[%o0 + 0x88], %f16
678	AES_DROUND23(24, 0, 2, 6)
679	ldd		[%o0 + 0x80], %f18
680	AES_DROUND01(26, 0, 2, 4)
681	ldd		[%o0 + 0x78], %f20
682	AES_DROUND23(12, 4, 6, 2)
683	ldd		[%o0 + 0x70], %f22
684	AES_DROUND01(14, 4, 6, 0)
685	ldd		[%o0 + 0x68], %f24
686	AES_DROUND23(16, 0, 2, 6)
687	ldd		[%o0 + 0x60], %f26
688	AES_DROUND01(18, 0, 2, 4)
689	ldd		[%o0 + 0x58], %f28
690	AES_DROUND23(20, 4, 6, 2)
691	ldd		[%o0 + 0x50], %f30
692	AES_DROUND01(22, 4, 6, 0)
693	ldd		[%o0 + 0x48], %f32
694	AES_DROUND23(24, 0, 2, 6)
695	ldd		[%o0 + 0x40], %f34
696	AES_DROUND01(26, 0, 2, 4)
697	ldd		[%o0 + 0x38], %f36
698	AES_DROUND23(28, 4, 6, 2)
699	ldd		[%o0 + 0x30], %f38
700	AES_DROUND01(30, 4, 6, 0)
701	ldd		[%o0 + 0x28], %f40
702	AES_DROUND23(32, 0, 2, 6)
703	ldd		[%o0 + 0x20], %f42
704	AES_DROUND01(34, 0, 2, 4)
705	ldd		[%o0 + 0x18], %f44
706	AES_DROUND23(36, 4, 6, 2)
707	ldd		[%o0 + 0x10], %f46
708	AES_DROUND01(38, 4, 6, 0)
709	ldd		[%o0 + 0x08], %f48
710	AES_DROUND23(40, 0, 2, 6)
711	ldd		[%o0 + 0x00], %f50
712	AES_DROUND01(42, 0, 2, 4)
713	AES_DROUND23(44, 4, 6, 2)
714	AES_DROUND01(46, 4, 6, 0)
715	AES_DROUND23_L(48, 0, 2, 6)
716	AES_DROUND01_L(50, 0, 2, 4)
717	st		%f4, [%o2 + 0x00]
718	st		%f5, [%o2 + 0x04]
719	st		%f6, [%o2 + 0x08]
720	st		%f7, [%o2 + 0x0c]
721	retl
722	 VISExit
723ENDPROC(aes_sparc64_decrypt_256)
724
725	.align		32
726ENTRY(aes_sparc64_load_encrypt_keys_128)
727	/* %o0=key */
728	VISEntry
729	ldd		[%o0 + 0x10], %f8
730	ldd		[%o0 + 0x18], %f10
731	ldd		[%o0 + 0x20], %f12
732	ldd		[%o0 + 0x28], %f14
733	ldd		[%o0 + 0x30], %f16
734	ldd		[%o0 + 0x38], %f18
735	ldd		[%o0 + 0x40], %f20
736	ldd		[%o0 + 0x48], %f22
737	ldd		[%o0 + 0x50], %f24
738	ldd		[%o0 + 0x58], %f26
739	ldd		[%o0 + 0x60], %f28
740	ldd		[%o0 + 0x68], %f30
741	ldd		[%o0 + 0x70], %f32
742	ldd		[%o0 + 0x78], %f34
743	ldd		[%o0 + 0x80], %f36
744	ldd		[%o0 + 0x88], %f38
745	ldd		[%o0 + 0x90], %f40
746	ldd		[%o0 + 0x98], %f42
747	ldd		[%o0 + 0xa0], %f44
748	retl
749	 ldd		[%o0 + 0xa8], %f46
750ENDPROC(aes_sparc64_load_encrypt_keys_128)
751
752	.align		32
753ENTRY(aes_sparc64_load_encrypt_keys_192)
754	/* %o0=key */
755	VISEntry
756	ldd		[%o0 + 0x10], %f8
757	ldd		[%o0 + 0x18], %f10
758	ldd		[%o0 + 0x20], %f12
759	ldd		[%o0 + 0x28], %f14
760	ldd		[%o0 + 0x30], %f16
761	ldd		[%o0 + 0x38], %f18
762	ldd		[%o0 + 0x40], %f20
763	ldd		[%o0 + 0x48], %f22
764	ldd		[%o0 + 0x50], %f24
765	ldd		[%o0 + 0x58], %f26
766	ldd		[%o0 + 0x60], %f28
767	ldd		[%o0 + 0x68], %f30
768	ldd		[%o0 + 0x70], %f32
769	ldd		[%o0 + 0x78], %f34
770	ldd		[%o0 + 0x80], %f36
771	ldd		[%o0 + 0x88], %f38
772	ldd		[%o0 + 0x90], %f40
773	ldd		[%o0 + 0x98], %f42
774	ldd		[%o0 + 0xa0], %f44
775	ldd		[%o0 + 0xa8], %f46
776	ldd		[%o0 + 0xb0], %f48
777	ldd		[%o0 + 0xb8], %f50
778	ldd		[%o0 + 0xc0], %f52
779	retl
780	 ldd		[%o0 + 0xc8], %f54
781ENDPROC(aes_sparc64_load_encrypt_keys_192)
782
783	.align		32
784ENTRY(aes_sparc64_load_encrypt_keys_256)
785	/* %o0=key */
786	VISEntry
787	ldd		[%o0 + 0x10], %f8
788	ldd		[%o0 + 0x18], %f10
789	ldd		[%o0 + 0x20], %f12
790	ldd		[%o0 + 0x28], %f14
791	ldd		[%o0 + 0x30], %f16
792	ldd		[%o0 + 0x38], %f18
793	ldd		[%o0 + 0x40], %f20
794	ldd		[%o0 + 0x48], %f22
795	ldd		[%o0 + 0x50], %f24
796	ldd		[%o0 + 0x58], %f26
797	ldd		[%o0 + 0x60], %f28
798	ldd		[%o0 + 0x68], %f30
799	ldd		[%o0 + 0x70], %f32
800	ldd		[%o0 + 0x78], %f34
801	ldd		[%o0 + 0x80], %f36
802	ldd		[%o0 + 0x88], %f38
803	ldd		[%o0 + 0x90], %f40
804	ldd		[%o0 + 0x98], %f42
805	ldd		[%o0 + 0xa0], %f44
806	ldd		[%o0 + 0xa8], %f46
807	ldd		[%o0 + 0xb0], %f48
808	ldd		[%o0 + 0xb8], %f50
809	ldd		[%o0 + 0xc0], %f52
810	ldd		[%o0 + 0xc8], %f54
811	ldd		[%o0 + 0xd0], %f56
812	ldd		[%o0 + 0xd8], %f58
813	ldd		[%o0 + 0xe0], %f60
814	retl
815	 ldd		[%o0 + 0xe8], %f62
816ENDPROC(aes_sparc64_load_encrypt_keys_256)
817
818	.align		32
819ENTRY(aes_sparc64_load_decrypt_keys_128)
820	/* %o0=key */
821	VISEntry
822	ldd		[%o0 + 0x98], %f8
823	ldd		[%o0 + 0x90], %f10
824	ldd		[%o0 + 0x88], %f12
825	ldd		[%o0 + 0x80], %f14
826	ldd		[%o0 + 0x78], %f16
827	ldd		[%o0 + 0x70], %f18
828	ldd		[%o0 + 0x68], %f20
829	ldd		[%o0 + 0x60], %f22
830	ldd		[%o0 + 0x58], %f24
831	ldd		[%o0 + 0x50], %f26
832	ldd		[%o0 + 0x48], %f28
833	ldd		[%o0 + 0x40], %f30
834	ldd		[%o0 + 0x38], %f32
835	ldd		[%o0 + 0x30], %f34
836	ldd		[%o0 + 0x28], %f36
837	ldd		[%o0 + 0x20], %f38
838	ldd		[%o0 + 0x18], %f40
839	ldd		[%o0 + 0x10], %f42
840	ldd		[%o0 + 0x08], %f44
841	retl
842	 ldd		[%o0 + 0x00], %f46
843ENDPROC(aes_sparc64_load_decrypt_keys_128)
844
845	.align		32
846ENTRY(aes_sparc64_load_decrypt_keys_192)
847	/* %o0=key */
848	VISEntry
849	ldd		[%o0 + 0xb8], %f8
850	ldd		[%o0 + 0xb0], %f10
851	ldd		[%o0 + 0xa8], %f12
852	ldd		[%o0 + 0xa0], %f14
853	ldd		[%o0 + 0x98], %f16
854	ldd		[%o0 + 0x90], %f18
855	ldd		[%o0 + 0x88], %f20
856	ldd		[%o0 + 0x80], %f22
857	ldd		[%o0 + 0x78], %f24
858	ldd		[%o0 + 0x70], %f26
859	ldd		[%o0 + 0x68], %f28
860	ldd		[%o0 + 0x60], %f30
861	ldd		[%o0 + 0x58], %f32
862	ldd		[%o0 + 0x50], %f34
863	ldd		[%o0 + 0x48], %f36
864	ldd		[%o0 + 0x40], %f38
865	ldd		[%o0 + 0x38], %f40
866	ldd		[%o0 + 0x30], %f42
867	ldd		[%o0 + 0x28], %f44
868	ldd		[%o0 + 0x20], %f46
869	ldd		[%o0 + 0x18], %f48
870	ldd		[%o0 + 0x10], %f50
871	ldd		[%o0 + 0x08], %f52
872	retl
873	 ldd		[%o0 + 0x00], %f54
874ENDPROC(aes_sparc64_load_decrypt_keys_192)
875
876	.align		32
877ENTRY(aes_sparc64_load_decrypt_keys_256)
878	/* %o0=key */
879	VISEntry
880	ldd		[%o0 + 0xd8], %f8
881	ldd		[%o0 + 0xd0], %f10
882	ldd		[%o0 + 0xc8], %f12
883	ldd		[%o0 + 0xc0], %f14
884	ldd		[%o0 + 0xb8], %f16
885	ldd		[%o0 + 0xb0], %f18
886	ldd		[%o0 + 0xa8], %f20
887	ldd		[%o0 + 0xa0], %f22
888	ldd		[%o0 + 0x98], %f24
889	ldd		[%o0 + 0x90], %f26
890	ldd		[%o0 + 0x88], %f28
891	ldd		[%o0 + 0x80], %f30
892	ldd		[%o0 + 0x78], %f32
893	ldd		[%o0 + 0x70], %f34
894	ldd		[%o0 + 0x68], %f36
895	ldd		[%o0 + 0x60], %f38
896	ldd		[%o0 + 0x58], %f40
897	ldd		[%o0 + 0x50], %f42
898	ldd		[%o0 + 0x48], %f44
899	ldd		[%o0 + 0x40], %f46
900	ldd		[%o0 + 0x38], %f48
901	ldd		[%o0 + 0x30], %f50
902	ldd		[%o0 + 0x28], %f52
903	ldd		[%o0 + 0x20], %f54
904	ldd		[%o0 + 0x18], %f56
905	ldd		[%o0 + 0x10], %f58
906	ldd		[%o0 + 0x08], %f60
907	retl
908	 ldd		[%o0 + 0x00], %f62
909ENDPROC(aes_sparc64_load_decrypt_keys_256)
910
911	.align		32
912ENTRY(aes_sparc64_ecb_encrypt_128)
913	/* %o0=key, %o1=input, %o2=output, %o3=len */
914	ldx		[%o0 + 0x00], %g1
915	subcc		%o3, 0x10, %o3
916	be		10f
917	 ldx		[%o0 + 0x08], %g2
9181:	ldx		[%o1 + 0x00], %g3
919	ldx		[%o1 + 0x08], %g7
920	ldx		[%o1 + 0x10], %o4
921	ldx		[%o1 + 0x18], %o5
922	xor		%g1, %g3, %g3
923	xor		%g2, %g7, %g7
924	MOVXTOD_G3_F4
925	MOVXTOD_G7_F6
926	xor		%g1, %o4, %g3
927	xor		%g2, %o5, %g7
928	MOVXTOD_G3_F60
929	MOVXTOD_G7_F62
930	ENCRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
931	std		%f4, [%o2 + 0x00]
932	std		%f6, [%o2 + 0x08]
933	std		%f60, [%o2 + 0x10]
934	std		%f62, [%o2 + 0x18]
935	sub		%o3, 0x20, %o3
936	add		%o1, 0x20, %o1
937	brgz		%o3, 1b
938	 add		%o2, 0x20, %o2
939	brlz,pt		%o3, 11f
940	 nop
94110:	ldx		[%o1 + 0x00], %g3
942	ldx		[%o1 + 0x08], %g7
943	xor		%g1, %g3, %g3
944	xor		%g2, %g7, %g7
945	MOVXTOD_G3_F4
946	MOVXTOD_G7_F6
947	ENCRYPT_128(8, 4, 6, 0, 2)
948	std		%f4, [%o2 + 0x00]
949	std		%f6, [%o2 + 0x08]
95011:	retl
951	 nop
952ENDPROC(aes_sparc64_ecb_encrypt_128)
953
954	.align		32
955ENTRY(aes_sparc64_ecb_encrypt_192)
956	/* %o0=key, %o1=input, %o2=output, %o3=len */
957	ldx		[%o0 + 0x00], %g1
958	subcc		%o3, 0x10, %o3
959	be		10f
960	 ldx		[%o0 + 0x08], %g2
9611:	ldx		[%o1 + 0x00], %g3
962	ldx		[%o1 + 0x08], %g7
963	ldx		[%o1 + 0x10], %o4
964	ldx		[%o1 + 0x18], %o5
965	xor		%g1, %g3, %g3
966	xor		%g2, %g7, %g7
967	MOVXTOD_G3_F4
968	MOVXTOD_G7_F6
969	xor		%g1, %o4, %g3
970	xor		%g2, %o5, %g7
971	MOVXTOD_G3_F60
972	MOVXTOD_G7_F62
973	ENCRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
974	std		%f4, [%o2 + 0x00]
975	std		%f6, [%o2 + 0x08]
976	std		%f60, [%o2 + 0x10]
977	std		%f62, [%o2 + 0x18]
978	sub		%o3, 0x20, %o3
979	add		%o1, 0x20, %o1
980	brgz		%o3, 1b
981	 add		%o2, 0x20, %o2
982	brlz,pt		%o3, 11f
983	 nop
98410:	ldx		[%o1 + 0x00], %g3
985	ldx		[%o1 + 0x08], %g7
986	xor		%g1, %g3, %g3
987	xor		%g2, %g7, %g7
988	MOVXTOD_G3_F4
989	MOVXTOD_G7_F6
990	ENCRYPT_192(8, 4, 6, 0, 2)
991	std		%f4, [%o2 + 0x00]
992	std		%f6, [%o2 + 0x08]
99311:	retl
994	 nop
995ENDPROC(aes_sparc64_ecb_encrypt_192)
996
997	.align		32
998ENTRY(aes_sparc64_ecb_encrypt_256)
999	/* %o0=key, %o1=input, %o2=output, %o3=len */
1000	ldx		[%o0 + 0x00], %g1
1001	subcc		%o3, 0x10, %o3
1002	be		10f
1003	 ldx		[%o0 + 0x08], %g2
10041:	ldx		[%o1 + 0x00], %g3
1005	ldx		[%o1 + 0x08], %g7
1006	ldx		[%o1 + 0x10], %o4
1007	ldx		[%o1 + 0x18], %o5
1008	xor		%g1, %g3, %g3
1009	xor		%g2, %g7, %g7
1010	MOVXTOD_G3_F4
1011	MOVXTOD_G7_F6
1012	xor		%g1, %o4, %g3
1013	xor		%g2, %o5, %g7
1014	MOVXTOD_G3_F0
1015	MOVXTOD_G7_F2
1016	ENCRYPT_256_2(8, 4, 6, 0, 2)
1017	std		%f4, [%o2 + 0x00]
1018	std		%f6, [%o2 + 0x08]
1019	std		%f0, [%o2 + 0x10]
1020	std		%f2, [%o2 + 0x18]
1021	sub		%o3, 0x20, %o3
1022	add		%o1, 0x20, %o1
1023	brgz		%o3, 1b
1024	 add		%o2, 0x20, %o2
1025	brlz,pt		%o3, 11f
1026	 nop
102710:	ldx		[%o1 + 0x00], %g3
1028	ldx		[%o1 + 0x08], %g7
1029	xor		%g1, %g3, %g3
1030	xor		%g2, %g7, %g7
1031	MOVXTOD_G3_F4
1032	MOVXTOD_G7_F6
1033	ENCRYPT_256(8, 4, 6, 0, 2)
1034	std		%f4, [%o2 + 0x00]
1035	std		%f6, [%o2 + 0x08]
103611:	retl
1037	 nop
1038ENDPROC(aes_sparc64_ecb_encrypt_256)
1039
1040	.align		32
1041ENTRY(aes_sparc64_ecb_decrypt_128)
1042	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
1043	ldx		[%o0 - 0x10], %g1
1044	subcc		%o3, 0x10, %o3
1045	be		10f
1046	 ldx		[%o0 - 0x08], %g2
10471:	ldx		[%o1 + 0x00], %g3
1048	ldx		[%o1 + 0x08], %g7
1049	ldx		[%o1 + 0x10], %o4
1050	ldx		[%o1 + 0x18], %o5
1051	xor		%g1, %g3, %g3
1052	xor		%g2, %g7, %g7
1053	MOVXTOD_G3_F4
1054	MOVXTOD_G7_F6
1055	xor		%g1, %o4, %g3
1056	xor		%g2, %o5, %g7
1057	MOVXTOD_G3_F60
1058	MOVXTOD_G7_F62
1059	DECRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
1060	std		%f4, [%o2 + 0x00]
1061	std		%f6, [%o2 + 0x08]
1062	std		%f60, [%o2 + 0x10]
1063	std		%f62, [%o2 + 0x18]
1064	sub		%o3, 0x20, %o3
1065	add		%o1, 0x20, %o1
1066	brgz,pt		%o3, 1b
1067	 add		%o2, 0x20, %o2
1068	brlz,pt		%o3, 11f
1069	 nop
107010:	ldx		[%o1 + 0x00], %g3
1071	ldx		[%o1 + 0x08], %g7
1072	xor		%g1, %g3, %g3
1073	xor		%g2, %g7, %g7
1074	MOVXTOD_G3_F4
1075	MOVXTOD_G7_F6
1076	DECRYPT_128(8, 4, 6, 0, 2)
1077	std		%f4, [%o2 + 0x00]
1078	std		%f6, [%o2 + 0x08]
107911:	retl
1080	 nop
1081ENDPROC(aes_sparc64_ecb_decrypt_128)
1082
1083	.align		32
1084ENTRY(aes_sparc64_ecb_decrypt_192)
1085	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
1086	ldx		[%o0 - 0x10], %g1
1087	subcc		%o3, 0x10, %o3
1088	be		10f
1089	 ldx		[%o0 - 0x08], %g2
10901:	ldx		[%o1 + 0x00], %g3
1091	ldx		[%o1 + 0x08], %g7
1092	ldx		[%o1 + 0x10], %o4
1093	ldx		[%o1 + 0x18], %o5
1094	xor		%g1, %g3, %g3
1095	xor		%g2, %g7, %g7
1096	MOVXTOD_G3_F4
1097	MOVXTOD_G7_F6
1098	xor		%g1, %o4, %g3
1099	xor		%g2, %o5, %g7
1100	MOVXTOD_G3_F60
1101	MOVXTOD_G7_F62
1102	DECRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
1103	std		%f4, [%o2 + 0x00]
1104	std		%f6, [%o2 + 0x08]
1105	std		%f60, [%o2 + 0x10]
1106	std		%f62, [%o2 + 0x18]
1107	sub		%o3, 0x20, %o3
1108	add		%o1, 0x20, %o1
1109	brgz,pt		%o3, 1b
1110	 add		%o2, 0x20, %o2
1111	brlz,pt		%o3, 11f
1112	 nop
111310:	ldx		[%o1 + 0x00], %g3
1114	ldx		[%o1 + 0x08], %g7
1115	xor		%g1, %g3, %g3
1116	xor		%g2, %g7, %g7
1117	MOVXTOD_G3_F4
1118	MOVXTOD_G7_F6
1119	DECRYPT_192(8, 4, 6, 0, 2)
1120	std		%f4, [%o2 + 0x00]
1121	std		%f6, [%o2 + 0x08]
112211:	retl
1123	 nop
1124ENDPROC(aes_sparc64_ecb_decrypt_192)
1125
1126	.align		32
1127ENTRY(aes_sparc64_ecb_decrypt_256)
1128	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
1129	ldx		[%o0 - 0x10], %g1
1130	subcc		%o3, 0x10, %o3
1131	be		10f
1132	 ldx		[%o0 - 0x08], %g2
1133	sub		%o0, 0xf0, %o0
11341:	ldx		[%o1 + 0x00], %g3
1135	ldx		[%o1 + 0x08], %g7
1136	ldx		[%o1 + 0x10], %o4
1137	ldx		[%o1 + 0x18], %o5
1138	xor		%g1, %g3, %g3
1139	xor		%g2, %g7, %g7
1140	MOVXTOD_G3_F4
1141	MOVXTOD_G7_F6
1142	xor		%g1, %o4, %g3
1143	xor		%g2, %o5, %g7
1144	MOVXTOD_G3_F0
1145	MOVXTOD_G7_F2
1146	DECRYPT_256_2(8, 4, 6, 0, 2)
1147	std		%f4, [%o2 + 0x00]
1148	std		%f6, [%o2 + 0x08]
1149	std		%f0, [%o2 + 0x10]
1150	std		%f2, [%o2 + 0x18]
1151	sub		%o3, 0x20, %o3
1152	add		%o1, 0x20, %o1
1153	brgz,pt		%o3, 1b
1154	 add		%o2, 0x20, %o2
1155	brlz,pt		%o3, 11f
1156	 nop
115710:	ldx		[%o1 + 0x00], %g3
1158	ldx		[%o1 + 0x08], %g7
1159	xor		%g1, %g3, %g3
1160	xor		%g2, %g7, %g7
1161	MOVXTOD_G3_F4
1162	MOVXTOD_G7_F6
1163	DECRYPT_256(8, 4, 6, 0, 2)
1164	std		%f4, [%o2 + 0x00]
1165	std		%f6, [%o2 + 0x08]
116611:	retl
1167	 nop
1168ENDPROC(aes_sparc64_ecb_decrypt_256)
1169
1170	.align		32
1171ENTRY(aes_sparc64_cbc_encrypt_128)
1172	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1173	ldd		[%o4 + 0x00], %f4
1174	ldd		[%o4 + 0x08], %f6
1175	ldx		[%o0 + 0x00], %g1
1176	ldx		[%o0 + 0x08], %g2
11771:	ldx		[%o1 + 0x00], %g3
1178	ldx		[%o1 + 0x08], %g7
1179	add		%o1, 0x10, %o1
1180	xor		%g1, %g3, %g3
1181	xor		%g2, %g7, %g7
1182	MOVXTOD_G3_F0
1183	MOVXTOD_G7_F2
1184	fxor		%f4, %f0, %f4
1185	fxor		%f6, %f2, %f6
1186	ENCRYPT_128(8, 4, 6, 0, 2)
1187	std		%f4, [%o2 + 0x00]
1188	std		%f6, [%o2 + 0x08]
1189	subcc		%o3, 0x10, %o3
1190	bne,pt		%xcc, 1b
1191	 add		%o2, 0x10, %o2
1192	std		%f4, [%o4 + 0x00]
1193	std		%f6, [%o4 + 0x08]
1194	retl
1195	 nop
1196ENDPROC(aes_sparc64_cbc_encrypt_128)
1197
1198	.align		32
1199ENTRY(aes_sparc64_cbc_encrypt_192)
1200	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1201	ldd		[%o4 + 0x00], %f4
1202	ldd		[%o4 + 0x08], %f6
1203	ldx		[%o0 + 0x00], %g1
1204	ldx		[%o0 + 0x08], %g2
12051:	ldx		[%o1 + 0x00], %g3
1206	ldx		[%o1 + 0x08], %g7
1207	add		%o1, 0x10, %o1
1208	xor		%g1, %g3, %g3
1209	xor		%g2, %g7, %g7
1210	MOVXTOD_G3_F0
1211	MOVXTOD_G7_F2
1212	fxor		%f4, %f0, %f4
1213	fxor		%f6, %f2, %f6
1214	ENCRYPT_192(8, 4, 6, 0, 2)
1215	std		%f4, [%o2 + 0x00]
1216	std		%f6, [%o2 + 0x08]
1217	subcc		%o3, 0x10, %o3
1218	bne,pt		%xcc, 1b
1219	 add		%o2, 0x10, %o2
1220	std		%f4, [%o4 + 0x00]
1221	std		%f6, [%o4 + 0x08]
1222	retl
1223	 nop
1224ENDPROC(aes_sparc64_cbc_encrypt_192)
1225
1226	.align		32
1227ENTRY(aes_sparc64_cbc_encrypt_256)
1228	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1229	ldd		[%o4 + 0x00], %f4
1230	ldd		[%o4 + 0x08], %f6
1231	ldx		[%o0 + 0x00], %g1
1232	ldx		[%o0 + 0x08], %g2
12331:	ldx		[%o1 + 0x00], %g3
1234	ldx		[%o1 + 0x08], %g7
1235	add		%o1, 0x10, %o1
1236	xor		%g1, %g3, %g3
1237	xor		%g2, %g7, %g7
1238	MOVXTOD_G3_F0
1239	MOVXTOD_G7_F2
1240	fxor		%f4, %f0, %f4
1241	fxor		%f6, %f2, %f6
1242	ENCRYPT_256(8, 4, 6, 0, 2)
1243	std		%f4, [%o2 + 0x00]
1244	std		%f6, [%o2 + 0x08]
1245	subcc		%o3, 0x10, %o3
1246	bne,pt		%xcc, 1b
1247	 add		%o2, 0x10, %o2
1248	std		%f4, [%o4 + 0x00]
1249	std		%f6, [%o4 + 0x08]
1250	retl
1251	 nop
1252ENDPROC(aes_sparc64_cbc_encrypt_256)
1253
1254	.align		32
1255ENTRY(aes_sparc64_cbc_decrypt_128)
1256	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
1257	ldx		[%o0 - 0x10], %g1
1258	ldx		[%o0 - 0x08], %g2
1259	ldx		[%o4 + 0x00], %o0
1260	ldx		[%o4 + 0x08], %o5
12611:	ldx		[%o1 + 0x00], %g3
1262	ldx		[%o1 + 0x08], %g7
1263	add		%o1, 0x10, %o1
1264	xor		%g1, %g3, %g3
1265	xor		%g2, %g7, %g7
1266	MOVXTOD_G3_F4
1267	MOVXTOD_G7_F6
1268	DECRYPT_128(8, 4, 6, 0, 2)
1269	MOVXTOD_O0_F0
1270	MOVXTOD_O5_F2
1271	xor		%g1, %g3, %o0
1272	xor		%g2, %g7, %o5
1273	fxor		%f4, %f0, %f4
1274	fxor		%f6, %f2, %f6
1275	std		%f4, [%o2 + 0x00]
1276	std		%f6, [%o2 + 0x08]
1277	subcc		%o3, 0x10, %o3
1278	bne,pt		%xcc, 1b
1279	 add		%o2, 0x10, %o2
1280	stx		%o0, [%o4 + 0x00]
1281	stx		%o5, [%o4 + 0x08]
1282	retl
1283	 nop
1284ENDPROC(aes_sparc64_cbc_decrypt_128)
1285
1286	.align		32
1287ENTRY(aes_sparc64_cbc_decrypt_192)
1288	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
1289	ldx		[%o0 - 0x10], %g1
1290	ldx		[%o0 - 0x08], %g2
1291	ldx		[%o4 + 0x00], %o0
1292	ldx		[%o4 + 0x08], %o5
12931:	ldx		[%o1 + 0x00], %g3
1294	ldx		[%o1 + 0x08], %g7
1295	add		%o1, 0x10, %o1
1296	xor		%g1, %g3, %g3
1297	xor		%g2, %g7, %g7
1298	MOVXTOD_G3_F4
1299	MOVXTOD_G7_F6
1300	DECRYPT_192(8, 4, 6, 0, 2)
1301	MOVXTOD_O0_F0
1302	MOVXTOD_O5_F2
1303	xor		%g1, %g3, %o0
1304	xor		%g2, %g7, %o5
1305	fxor		%f4, %f0, %f4
1306	fxor		%f6, %f2, %f6
1307	std		%f4, [%o2 + 0x00]
1308	std		%f6, [%o2 + 0x08]
1309	subcc		%o3, 0x10, %o3
1310	bne,pt		%xcc, 1b
1311	 add		%o2, 0x10, %o2
1312	stx		%o0, [%o4 + 0x00]
1313	stx		%o5, [%o4 + 0x08]
1314	retl
1315	 nop
1316ENDPROC(aes_sparc64_cbc_decrypt_192)
1317
1318	.align		32
1319ENTRY(aes_sparc64_cbc_decrypt_256)
1320	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
1321	ldx		[%o0 - 0x10], %g1
1322	ldx		[%o0 - 0x08], %g2
1323	ldx		[%o4 + 0x00], %o0
1324	ldx		[%o4 + 0x08], %o5
13251:	ldx		[%o1 + 0x00], %g3
1326	ldx		[%o1 + 0x08], %g7
1327	add		%o1, 0x10, %o1
1328	xor		%g1, %g3, %g3
1329	xor		%g2, %g7, %g7
1330	MOVXTOD_G3_F4
1331	MOVXTOD_G7_F6
1332	DECRYPT_256(8, 4, 6, 0, 2)
1333	MOVXTOD_O0_F0
1334	MOVXTOD_O5_F2
1335	xor		%g1, %g3, %o0
1336	xor		%g2, %g7, %o5
1337	fxor		%f4, %f0, %f4
1338	fxor		%f6, %f2, %f6
1339	std		%f4, [%o2 + 0x00]
1340	std		%f6, [%o2 + 0x08]
1341	subcc		%o3, 0x10, %o3
1342	bne,pt		%xcc, 1b
1343	 add		%o2, 0x10, %o2
1344	stx		%o0, [%o4 + 0x00]
1345	stx		%o5, [%o4 + 0x08]
1346	retl
1347	 nop
1348ENDPROC(aes_sparc64_cbc_decrypt_256)
1349
1350	.align		32
1351ENTRY(aes_sparc64_ctr_crypt_128)
1352	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1353	ldx		[%o4 + 0x00], %g3
1354	ldx		[%o4 + 0x08], %g7
1355	subcc		%o3, 0x10, %o3
1356	ldx		[%o0 + 0x00], %g1
1357	be		10f
1358	 ldx		[%o0 + 0x08], %g2
13591:	xor		%g1, %g3, %o5
1360	MOVXTOD_O5_F0
1361	xor		%g2, %g7, %o5
1362	MOVXTOD_O5_F2
1363	add		%g7, 1, %g7
1364	add		%g3, 1, %o5
1365	movrz		%g7, %o5, %g3
1366	xor		%g1, %g3, %o5
1367	MOVXTOD_O5_F4
1368	xor		%g2, %g7, %o5
1369	MOVXTOD_O5_F6
1370	add		%g7, 1, %g7
1371	add		%g3, 1, %o5
1372	movrz		%g7, %o5, %g3
1373	ENCRYPT_128_2(8, 0, 2, 4, 6, 56, 58, 60, 62)
1374	ldd		[%o1 + 0x00], %f56
1375	ldd		[%o1 + 0x08], %f58
1376	ldd		[%o1 + 0x10], %f60
1377	ldd		[%o1 + 0x18], %f62
1378	fxor		%f56, %f0, %f56
1379	fxor		%f58, %f2, %f58
1380	fxor		%f60, %f4, %f60
1381	fxor		%f62, %f6, %f62
1382	std		%f56, [%o2 + 0x00]
1383	std		%f58, [%o2 + 0x08]
1384	std		%f60, [%o2 + 0x10]
1385	std		%f62, [%o2 + 0x18]
1386	subcc		%o3, 0x20, %o3
1387	add		%o1, 0x20, %o1
1388	brgz		%o3, 1b
1389	 add		%o2, 0x20, %o2
1390	brlz,pt		%o3, 11f
1391	 nop
139210:	xor		%g1, %g3, %o5
1393	MOVXTOD_O5_F0
1394	xor		%g2, %g7, %o5
1395	MOVXTOD_O5_F2
1396	add		%g7, 1, %g7
1397	add		%g3, 1, %o5
1398	movrz		%g7, %o5, %g3
1399	ENCRYPT_128(8, 0, 2, 4, 6)
1400	ldd		[%o1 + 0x00], %f4
1401	ldd		[%o1 + 0x08], %f6
1402	fxor		%f4, %f0, %f4
1403	fxor		%f6, %f2, %f6
1404	std		%f4, [%o2 + 0x00]
1405	std		%f6, [%o2 + 0x08]
140611:	stx		%g3, [%o4 + 0x00]
1407	retl
1408	 stx		%g7, [%o4 + 0x08]
1409ENDPROC(aes_sparc64_ctr_crypt_128)
1410
1411	.align		32
1412ENTRY(aes_sparc64_ctr_crypt_192)
1413	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1414	ldx		[%o4 + 0x00], %g3
1415	ldx		[%o4 + 0x08], %g7
1416	subcc		%o3, 0x10, %o3
1417	ldx		[%o0 + 0x00], %g1
1418	be		10f
1419	 ldx		[%o0 + 0x08], %g2
14201:	xor		%g1, %g3, %o5
1421	MOVXTOD_O5_F0
1422	xor		%g2, %g7, %o5
1423	MOVXTOD_O5_F2
1424	add		%g7, 1, %g7
1425	add		%g3, 1, %o5
1426	movrz		%g7, %o5, %g3
1427	xor		%g1, %g3, %o5
1428	MOVXTOD_O5_F4
1429	xor		%g2, %g7, %o5
1430	MOVXTOD_O5_F6
1431	add		%g7, 1, %g7
1432	add		%g3, 1, %o5
1433	movrz		%g7, %o5, %g3
1434	ENCRYPT_192_2(8, 0, 2, 4, 6, 56, 58, 60, 62)
1435	ldd		[%o1 + 0x00], %f56
1436	ldd		[%o1 + 0x08], %f58
1437	ldd		[%o1 + 0x10], %f60
1438	ldd		[%o1 + 0x18], %f62
1439	fxor		%f56, %f0, %f56
1440	fxor		%f58, %f2, %f58
1441	fxor		%f60, %f4, %f60
1442	fxor		%f62, %f6, %f62
1443	std		%f56, [%o2 + 0x00]
1444	std		%f58, [%o2 + 0x08]
1445	std		%f60, [%o2 + 0x10]
1446	std		%f62, [%o2 + 0x18]
1447	subcc		%o3, 0x20, %o3
1448	add		%o1, 0x20, %o1
1449	brgz		%o3, 1b
1450	 add		%o2, 0x20, %o2
1451	brlz,pt		%o3, 11f
1452	 nop
145310:	xor		%g1, %g3, %o5
1454	MOVXTOD_O5_F0
1455	xor		%g2, %g7, %o5
1456	MOVXTOD_O5_F2
1457	add		%g7, 1, %g7
1458	add		%g3, 1, %o5
1459	movrz		%g7, %o5, %g3
1460	ENCRYPT_192(8, 0, 2, 4, 6)
1461	ldd		[%o1 + 0x00], %f4
1462	ldd		[%o1 + 0x08], %f6
1463	fxor		%f4, %f0, %f4
1464	fxor		%f6, %f2, %f6
1465	std		%f4, [%o2 + 0x00]
1466	std		%f6, [%o2 + 0x08]
146711:	stx		%g3, [%o4 + 0x00]
1468	retl
1469	 stx		%g7, [%o4 + 0x08]
1470ENDPROC(aes_sparc64_ctr_crypt_192)
1471
1472	.align		32
1473ENTRY(aes_sparc64_ctr_crypt_256)
1474	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1475	ldx		[%o4 + 0x00], %g3
1476	ldx		[%o4 + 0x08], %g7
1477	subcc		%o3, 0x10, %o3
1478	ldx		[%o0 + 0x00], %g1
1479	be		10f
1480	 ldx		[%o0 + 0x08], %g2
14811:	xor		%g1, %g3, %o5
1482	MOVXTOD_O5_F0
1483	xor		%g2, %g7, %o5
1484	MOVXTOD_O5_F2
1485	add		%g7, 1, %g7
1486	add		%g3, 1, %o5
1487	movrz		%g7, %o5, %g3
1488	xor		%g1, %g3, %o5
1489	MOVXTOD_O5_F4
1490	xor		%g2, %g7, %o5
1491	MOVXTOD_O5_F6
1492	add		%g7, 1, %g7
1493	add		%g3, 1, %o5
1494	movrz		%g7, %o5, %g3
1495	ENCRYPT_256_2(8, 0, 2, 4, 6)
1496	ldd		[%o1 + 0x00], %f56
1497	ldd		[%o1 + 0x08], %f58
1498	ldd		[%o1 + 0x10], %f60
1499	ldd		[%o1 + 0x18], %f62
1500	fxor		%f56, %f0, %f56
1501	fxor		%f58, %f2, %f58
1502	fxor		%f60, %f4, %f60
1503	fxor		%f62, %f6, %f62
1504	std		%f56, [%o2 + 0x00]
1505	std		%f58, [%o2 + 0x08]
1506	std		%f60, [%o2 + 0x10]
1507	std		%f62, [%o2 + 0x18]
1508	subcc		%o3, 0x20, %o3
1509	add		%o1, 0x20, %o1
1510	brgz		%o3, 1b
1511	 add		%o2, 0x20, %o2
1512	brlz,pt		%o3, 11f
1513	 nop
1514	ldd		[%o0 + 0xd0], %f56
1515	ldd		[%o0 + 0xd8], %f58
1516	ldd		[%o0 + 0xe0], %f60
1517	ldd		[%o0 + 0xe8], %f62
151810:	xor		%g1, %g3, %o5
1519	MOVXTOD_O5_F0
1520	xor		%g2, %g7, %o5
1521	MOVXTOD_O5_F2
1522	add		%g7, 1, %g7
1523	add		%g3, 1, %o5
1524	movrz		%g7, %o5, %g3
1525	ENCRYPT_256(8, 0, 2, 4, 6)
1526	ldd		[%o1 + 0x00], %f4
1527	ldd		[%o1 + 0x08], %f6
1528	fxor		%f4, %f0, %f4
1529	fxor		%f6, %f2, %f6
1530	std		%f4, [%o2 + 0x00]
1531	std		%f6, [%o2 + 0x08]
153211:	stx		%g3, [%o4 + 0x00]
1533	retl
1534	 stx		%g7, [%o4 + 0x08]
1535ENDPROC(aes_sparc64_ctr_crypt_256)
1536