xref: /openbmc/linux/arch/x86/crypto/des3_ede-asm_64.S (revision 1ac731c529cd4d6adbce134754b51ff7d822b145)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * des3_ede-asm_64.S  -  x86-64 assembly implementation of 3DES cipher
4  *
5  * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
6  */
7 
8 #include <linux/linkage.h>
9 
10 .file "des3_ede-asm_64.S"
11 .text
12 
13 #define s1 .L_s1
14 #define s2 ((s1) + (64*8))
15 #define s3 ((s2) + (64*8))
16 #define s4 ((s3) + (64*8))
17 #define s5 ((s4) + (64*8))
18 #define s6 ((s5) + (64*8))
19 #define s7 ((s6) + (64*8))
20 #define s8 ((s7) + (64*8))
21 
22 /* register macros */
23 #define CTX %rdi
24 
25 #define RL0 %r8
26 #define RL1 %r9
27 #define RL2 %r10
28 
29 #define RL0d %r8d
30 #define RL1d %r9d
31 #define RL2d %r10d
32 
33 #define RR0 %r11
34 #define RR1 %r12
35 #define RR2 %r13
36 
37 #define RR0d %r11d
38 #define RR1d %r12d
39 #define RR2d %r13d
40 
41 #define RW0 %rax
42 #define RW1 %rbx
43 #define RW2 %rcx
44 
45 #define RW0d %eax
46 #define RW1d %ebx
47 #define RW2d %ecx
48 
49 #define RW0bl %al
50 #define RW1bl %bl
51 #define RW2bl %cl
52 
53 #define RW0bh %ah
54 #define RW1bh %bh
55 #define RW2bh %ch
56 
57 #define RT0 %r15
58 #define RT1 %rsi
59 #define RT2 %r14
60 #define RT3 %rdx
61 
62 #define RT0d %r15d
63 #define RT1d %esi
64 #define RT2d %r14d
65 #define RT3d %edx
66 
67 /***********************************************************************
68  * 1-way 3DES
69  ***********************************************************************/
70 #define do_permutation(a, b, offset, mask) \
71 	movl a, RT0d; \
72 	shrl $(offset), RT0d; \
73 	xorl b, RT0d; \
74 	andl $(mask), RT0d; \
75 	xorl RT0d, b; \
76 	shll $(offset), RT0d; \
77 	xorl RT0d, a;
78 
79 #define expand_to_64bits(val, mask) \
80 	movl val##d, RT0d; \
81 	rorl $4, RT0d; \
82 	shlq $32, RT0; \
83 	orq RT0, val; \
84 	andq mask, val;
85 
86 #define compress_to_64bits(val) \
87 	movq val, RT0; \
88 	shrq $32, RT0; \
89 	roll $4, RT0d; \
90 	orl RT0d, val##d;
91 
92 #define initial_permutation(left, right) \
93 	do_permutation(left##d, right##d,  4, 0x0f0f0f0f); \
94 	do_permutation(left##d, right##d, 16, 0x0000ffff); \
95 	do_permutation(right##d, left##d,  2, 0x33333333); \
96 	do_permutation(right##d, left##d,  8, 0x00ff00ff); \
97 	movabs $0x3f3f3f3f3f3f3f3f, RT3; \
98 	movl left##d, RW0d; \
99 	roll $1, right##d; \
100 	xorl right##d, RW0d; \
101 	andl $0xaaaaaaaa, RW0d; \
102 	xorl RW0d, left##d; \
103 	xorl RW0d, right##d; \
104 	roll $1, left##d; \
105 	expand_to_64bits(right, RT3); \
106 	expand_to_64bits(left, RT3);
107 
108 #define final_permutation(left, right) \
109 	compress_to_64bits(right); \
110 	compress_to_64bits(left); \
111 	movl right##d, RW0d; \
112 	rorl $1, left##d; \
113 	xorl left##d, RW0d; \
114 	andl $0xaaaaaaaa, RW0d; \
115 	xorl RW0d, right##d; \
116 	xorl RW0d, left##d; \
117 	rorl $1, right##d; \
118 	do_permutation(right##d, left##d,  8, 0x00ff00ff); \
119 	do_permutation(right##d, left##d,  2, 0x33333333); \
120 	do_permutation(left##d, right##d, 16, 0x0000ffff); \
121 	do_permutation(left##d, right##d,  4, 0x0f0f0f0f);
122 
123 #define round1(n, from, to, load_next_key) \
124 	xorq from, RW0; \
125 	\
126 	movzbl RW0bl, RT0d; \
127 	movzbl RW0bh, RT1d; \
128 	shrq $16, RW0; \
129 	movzbl RW0bl, RT2d; \
130 	movzbl RW0bh, RT3d; \
131 	shrq $16, RW0; \
132 	leaq s8(%rip), RW1; \
133 	movq (RW1, RT0, 8), RT0; \
134 	leaq s6(%rip), RW1; \
135 	xorq (RW1, RT1, 8), to; \
136 	movzbl RW0bl, RL1d; \
137 	movzbl RW0bh, RT1d; \
138 	shrl $16, RW0d; \
139 	leaq s4(%rip), RW1; \
140 	xorq (RW1, RT2, 8), RT0; \
141 	leaq s2(%rip), RW1; \
142 	xorq (RW1, RT3, 8), to; \
143 	movzbl RW0bl, RT2d; \
144 	movzbl RW0bh, RT3d; \
145 	leaq s7(%rip), RW1; \
146 	xorq (RW1, RL1, 8), RT0; \
147 	leaq s5(%rip), RW1; \
148 	xorq (RW1, RT1, 8), to; \
149 	leaq s3(%rip), RW1; \
150 	xorq (RW1, RT2, 8), RT0; \
151 	load_next_key(n, RW0); \
152 	xorq RT0, to; \
153 	leaq s1(%rip), RW1; \
154 	xorq (RW1, RT3, 8), to; \
155 
156 #define load_next_key(n, RWx) \
157 	movq (((n) + 1) * 8)(CTX), RWx;
158 
159 #define dummy2(a, b) /*_*/
160 
161 #define read_block(io, left, right) \
162 	movl    (io), left##d; \
163 	movl   4(io), right##d; \
164 	bswapl left##d; \
165 	bswapl right##d;
166 
167 #define write_block(io, left, right) \
168 	bswapl left##d; \
169 	bswapl right##d; \
170 	movl   left##d,   (io); \
171 	movl   right##d, 4(io);
172 
173 SYM_FUNC_START(des3_ede_x86_64_crypt_blk)
174 	/* input:
175 	 *	%rdi: round keys, CTX
176 	 *	%rsi: dst
177 	 *	%rdx: src
178 	 */
179 	pushq %rbx;
180 	pushq %r12;
181 	pushq %r13;
182 	pushq %r14;
183 	pushq %r15;
184 
185 	pushq %rsi; /* dst */
186 
187 	read_block(%rdx, RL0, RR0);
188 	initial_permutation(RL0, RR0);
189 
190 	movq (CTX), RW0;
191 
192 	round1(0, RR0, RL0, load_next_key);
193 	round1(1, RL0, RR0, load_next_key);
194 	round1(2, RR0, RL0, load_next_key);
195 	round1(3, RL0, RR0, load_next_key);
196 	round1(4, RR0, RL0, load_next_key);
197 	round1(5, RL0, RR0, load_next_key);
198 	round1(6, RR0, RL0, load_next_key);
199 	round1(7, RL0, RR0, load_next_key);
200 	round1(8, RR0, RL0, load_next_key);
201 	round1(9, RL0, RR0, load_next_key);
202 	round1(10, RR0, RL0, load_next_key);
203 	round1(11, RL0, RR0, load_next_key);
204 	round1(12, RR0, RL0, load_next_key);
205 	round1(13, RL0, RR0, load_next_key);
206 	round1(14, RR0, RL0, load_next_key);
207 	round1(15, RL0, RR0, load_next_key);
208 
209 	round1(16+0, RL0, RR0, load_next_key);
210 	round1(16+1, RR0, RL0, load_next_key);
211 	round1(16+2, RL0, RR0, load_next_key);
212 	round1(16+3, RR0, RL0, load_next_key);
213 	round1(16+4, RL0, RR0, load_next_key);
214 	round1(16+5, RR0, RL0, load_next_key);
215 	round1(16+6, RL0, RR0, load_next_key);
216 	round1(16+7, RR0, RL0, load_next_key);
217 	round1(16+8, RL0, RR0, load_next_key);
218 	round1(16+9, RR0, RL0, load_next_key);
219 	round1(16+10, RL0, RR0, load_next_key);
220 	round1(16+11, RR0, RL0, load_next_key);
221 	round1(16+12, RL0, RR0, load_next_key);
222 	round1(16+13, RR0, RL0, load_next_key);
223 	round1(16+14, RL0, RR0, load_next_key);
224 	round1(16+15, RR0, RL0, load_next_key);
225 
226 	round1(32+0, RR0, RL0, load_next_key);
227 	round1(32+1, RL0, RR0, load_next_key);
228 	round1(32+2, RR0, RL0, load_next_key);
229 	round1(32+3, RL0, RR0, load_next_key);
230 	round1(32+4, RR0, RL0, load_next_key);
231 	round1(32+5, RL0, RR0, load_next_key);
232 	round1(32+6, RR0, RL0, load_next_key);
233 	round1(32+7, RL0, RR0, load_next_key);
234 	round1(32+8, RR0, RL0, load_next_key);
235 	round1(32+9, RL0, RR0, load_next_key);
236 	round1(32+10, RR0, RL0, load_next_key);
237 	round1(32+11, RL0, RR0, load_next_key);
238 	round1(32+12, RR0, RL0, load_next_key);
239 	round1(32+13, RL0, RR0, load_next_key);
240 	round1(32+14, RR0, RL0, load_next_key);
241 	round1(32+15, RL0, RR0, dummy2);
242 
243 	final_permutation(RR0, RL0);
244 
245 	popq %rsi /* dst */
246 	write_block(%rsi, RR0, RL0);
247 
248 	popq %r15;
249 	popq %r14;
250 	popq %r13;
251 	popq %r12;
252 	popq %rbx;
253 
254 	RET;
255 SYM_FUNC_END(des3_ede_x86_64_crypt_blk)
256 
257 /***********************************************************************
258  * 3-way 3DES
259  ***********************************************************************/
260 #define expand_to_64bits(val, mask) \
261 	movl val##d, RT0d; \
262 	rorl $4, RT0d; \
263 	shlq $32, RT0; \
264 	orq RT0, val; \
265 	andq mask, val;
266 
267 #define compress_to_64bits(val) \
268 	movq val, RT0; \
269 	shrq $32, RT0; \
270 	roll $4, RT0d; \
271 	orl RT0d, val##d;
272 
273 #define initial_permutation3(left, right) \
274 	do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
275 	do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
276 	  do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
277 	  do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
278 	    do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f); \
279 	    do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
280 	    \
281 	do_permutation(right##0d, left##0d,  2, 0x33333333); \
282 	do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
283 	  do_permutation(right##1d, left##1d,  2, 0x33333333); \
284 	  do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
285 	    do_permutation(right##2d, left##2d,  2, 0x33333333); \
286 	    do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
287 	    \
288 	movabs $0x3f3f3f3f3f3f3f3f, RT3; \
289 	    \
290 	movl left##0d, RW0d; \
291 	roll $1, right##0d; \
292 	xorl right##0d, RW0d; \
293 	andl $0xaaaaaaaa, RW0d; \
294 	xorl RW0d, left##0d; \
295 	xorl RW0d, right##0d; \
296 	roll $1, left##0d; \
297 	expand_to_64bits(right##0, RT3); \
298 	expand_to_64bits(left##0, RT3); \
299 	  movl left##1d, RW1d; \
300 	  roll $1, right##1d; \
301 	  xorl right##1d, RW1d; \
302 	  andl $0xaaaaaaaa, RW1d; \
303 	  xorl RW1d, left##1d; \
304 	  xorl RW1d, right##1d; \
305 	  roll $1, left##1d; \
306 	  expand_to_64bits(right##1, RT3); \
307 	  expand_to_64bits(left##1, RT3); \
308 	    movl left##2d, RW2d; \
309 	    roll $1, right##2d; \
310 	    xorl right##2d, RW2d; \
311 	    andl $0xaaaaaaaa, RW2d; \
312 	    xorl RW2d, left##2d; \
313 	    xorl RW2d, right##2d; \
314 	    roll $1, left##2d; \
315 	    expand_to_64bits(right##2, RT3); \
316 	    expand_to_64bits(left##2, RT3);
317 
318 #define final_permutation3(left, right) \
319 	compress_to_64bits(right##0); \
320 	compress_to_64bits(left##0); \
321 	movl right##0d, RW0d; \
322 	rorl $1, left##0d; \
323 	xorl left##0d, RW0d; \
324 	andl $0xaaaaaaaa, RW0d; \
325 	xorl RW0d, right##0d; \
326 	xorl RW0d, left##0d; \
327 	rorl $1, right##0d; \
328 	  compress_to_64bits(right##1); \
329 	  compress_to_64bits(left##1); \
330 	  movl right##1d, RW1d; \
331 	  rorl $1, left##1d; \
332 	  xorl left##1d, RW1d; \
333 	  andl $0xaaaaaaaa, RW1d; \
334 	  xorl RW1d, right##1d; \
335 	  xorl RW1d, left##1d; \
336 	  rorl $1, right##1d; \
337 	    compress_to_64bits(right##2); \
338 	    compress_to_64bits(left##2); \
339 	    movl right##2d, RW2d; \
340 	    rorl $1, left##2d; \
341 	    xorl left##2d, RW2d; \
342 	    andl $0xaaaaaaaa, RW2d; \
343 	    xorl RW2d, right##2d; \
344 	    xorl RW2d, left##2d; \
345 	    rorl $1, right##2d; \
346 	    \
347 	do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
348 	do_permutation(right##0d, left##0d,  2, 0x33333333); \
349 	  do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
350 	  do_permutation(right##1d, left##1d,  2, 0x33333333); \
351 	    do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
352 	    do_permutation(right##2d, left##2d,  2, 0x33333333); \
353 	    \
354 	do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
355 	do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
356 	  do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
357 	  do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
358 	    do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
359 	    do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f);
360 
361 #define round3(n, from, to, load_next_key, do_movq) \
362 	xorq from##0, RW0; \
363 	movzbl RW0bl, RT3d; \
364 	movzbl RW0bh, RT1d; \
365 	shrq $16, RW0; \
366 	leaq s8(%rip), RT2; \
367 	xorq (RT2, RT3, 8), to##0; \
368 	leaq s6(%rip), RT2; \
369 	xorq (RT2, RT1, 8), to##0; \
370 	movzbl RW0bl, RT3d; \
371 	movzbl RW0bh, RT1d; \
372 	shrq $16, RW0; \
373 	leaq s4(%rip), RT2; \
374 	xorq (RT2, RT3, 8), to##0; \
375 	leaq s2(%rip), RT2; \
376 	xorq (RT2, RT1, 8), to##0; \
377 	movzbl RW0bl, RT3d; \
378 	movzbl RW0bh, RT1d; \
379 	shrl $16, RW0d; \
380 	leaq s7(%rip), RT2; \
381 	xorq (RT2, RT3, 8), to##0; \
382 	leaq s5(%rip), RT2; \
383 	xorq (RT2, RT1, 8), to##0; \
384 	movzbl RW0bl, RT3d; \
385 	movzbl RW0bh, RT1d; \
386 	load_next_key(n, RW0); \
387 	leaq s3(%rip), RT2; \
388 	xorq (RT2, RT3, 8), to##0; \
389 	leaq s1(%rip), RT2; \
390 	xorq (RT2, RT1, 8), to##0; \
391 		xorq from##1, RW1; \
392 		movzbl RW1bl, RT3d; \
393 		movzbl RW1bh, RT1d; \
394 		shrq $16, RW1; \
395 		leaq s8(%rip), RT2; \
396 		xorq (RT2, RT3, 8), to##1; \
397 		leaq s6(%rip), RT2; \
398 		xorq (RT2, RT1, 8), to##1; \
399 		movzbl RW1bl, RT3d; \
400 		movzbl RW1bh, RT1d; \
401 		shrq $16, RW1; \
402 		leaq s4(%rip), RT2; \
403 		xorq (RT2, RT3, 8), to##1; \
404 		leaq s2(%rip), RT2; \
405 		xorq (RT2, RT1, 8), to##1; \
406 		movzbl RW1bl, RT3d; \
407 		movzbl RW1bh, RT1d; \
408 		shrl $16, RW1d; \
409 		leaq s7(%rip), RT2; \
410 		xorq (RT2, RT3, 8), to##1; \
411 		leaq s5(%rip), RT2; \
412 		xorq (RT2, RT1, 8), to##1; \
413 		movzbl RW1bl, RT3d; \
414 		movzbl RW1bh, RT1d; \
415 		do_movq(RW0, RW1); \
416 		leaq s3(%rip), RT2; \
417 		xorq (RT2, RT3, 8), to##1; \
418 		leaq s1(%rip), RT2; \
419 		xorq (RT2, RT1, 8), to##1; \
420 			xorq from##2, RW2; \
421 			movzbl RW2bl, RT3d; \
422 			movzbl RW2bh, RT1d; \
423 			shrq $16, RW2; \
424 			leaq s8(%rip), RT2; \
425 			xorq (RT2, RT3, 8), to##2; \
426 			leaq s6(%rip), RT2; \
427 			xorq (RT2, RT1, 8), to##2; \
428 			movzbl RW2bl, RT3d; \
429 			movzbl RW2bh, RT1d; \
430 			shrq $16, RW2; \
431 			leaq s4(%rip), RT2; \
432 			xorq (RT2, RT3, 8), to##2; \
433 			leaq s2(%rip), RT2; \
434 			xorq (RT2, RT1, 8), to##2; \
435 			movzbl RW2bl, RT3d; \
436 			movzbl RW2bh, RT1d; \
437 			shrl $16, RW2d; \
438 			leaq s7(%rip), RT2; \
439 			xorq (RT2, RT3, 8), to##2; \
440 			leaq s5(%rip), RT2; \
441 			xorq (RT2, RT1, 8), to##2; \
442 			movzbl RW2bl, RT3d; \
443 			movzbl RW2bh, RT1d; \
444 			do_movq(RW0, RW2); \
445 			leaq s3(%rip), RT2; \
446 			xorq (RT2, RT3, 8), to##2; \
447 			leaq s1(%rip), RT2; \
448 			xorq (RT2, RT1, 8), to##2;
449 
450 #define __movq(src, dst) \
451 	movq src, dst;
452 
453 SYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way)
454 	/* input:
455 	 *	%rdi: ctx, round keys
456 	 *	%rsi: dst (3 blocks)
457 	 *	%rdx: src (3 blocks)
458 	 */
459 
460 	pushq %rbx;
461 	pushq %r12;
462 	pushq %r13;
463 	pushq %r14;
464 	pushq %r15;
465 
466 	pushq %rsi /* dst */
467 
468 	/* load input */
469 	movl 0 * 4(%rdx), RL0d;
470 	movl 1 * 4(%rdx), RR0d;
471 	movl 2 * 4(%rdx), RL1d;
472 	movl 3 * 4(%rdx), RR1d;
473 	movl 4 * 4(%rdx), RL2d;
474 	movl 5 * 4(%rdx), RR2d;
475 
476 	bswapl RL0d;
477 	bswapl RR0d;
478 	bswapl RL1d;
479 	bswapl RR1d;
480 	bswapl RL2d;
481 	bswapl RR2d;
482 
483 	initial_permutation3(RL, RR);
484 
485 	movq 0(CTX), RW0;
486 	movq RW0, RW1;
487 	movq RW0, RW2;
488 
489 	round3(0, RR, RL, load_next_key, __movq);
490 	round3(1, RL, RR, load_next_key, __movq);
491 	round3(2, RR, RL, load_next_key, __movq);
492 	round3(3, RL, RR, load_next_key, __movq);
493 	round3(4, RR, RL, load_next_key, __movq);
494 	round3(5, RL, RR, load_next_key, __movq);
495 	round3(6, RR, RL, load_next_key, __movq);
496 	round3(7, RL, RR, load_next_key, __movq);
497 	round3(8, RR, RL, load_next_key, __movq);
498 	round3(9, RL, RR, load_next_key, __movq);
499 	round3(10, RR, RL, load_next_key, __movq);
500 	round3(11, RL, RR, load_next_key, __movq);
501 	round3(12, RR, RL, load_next_key, __movq);
502 	round3(13, RL, RR, load_next_key, __movq);
503 	round3(14, RR, RL, load_next_key, __movq);
504 	round3(15, RL, RR, load_next_key, __movq);
505 
506 	round3(16+0, RL, RR, load_next_key, __movq);
507 	round3(16+1, RR, RL, load_next_key, __movq);
508 	round3(16+2, RL, RR, load_next_key, __movq);
509 	round3(16+3, RR, RL, load_next_key, __movq);
510 	round3(16+4, RL, RR, load_next_key, __movq);
511 	round3(16+5, RR, RL, load_next_key, __movq);
512 	round3(16+6, RL, RR, load_next_key, __movq);
513 	round3(16+7, RR, RL, load_next_key, __movq);
514 	round3(16+8, RL, RR, load_next_key, __movq);
515 	round3(16+9, RR, RL, load_next_key, __movq);
516 	round3(16+10, RL, RR, load_next_key, __movq);
517 	round3(16+11, RR, RL, load_next_key, __movq);
518 	round3(16+12, RL, RR, load_next_key, __movq);
519 	round3(16+13, RR, RL, load_next_key, __movq);
520 	round3(16+14, RL, RR, load_next_key, __movq);
521 	round3(16+15, RR, RL, load_next_key, __movq);
522 
523 	round3(32+0, RR, RL, load_next_key, __movq);
524 	round3(32+1, RL, RR, load_next_key, __movq);
525 	round3(32+2, RR, RL, load_next_key, __movq);
526 	round3(32+3, RL, RR, load_next_key, __movq);
527 	round3(32+4, RR, RL, load_next_key, __movq);
528 	round3(32+5, RL, RR, load_next_key, __movq);
529 	round3(32+6, RR, RL, load_next_key, __movq);
530 	round3(32+7, RL, RR, load_next_key, __movq);
531 	round3(32+8, RR, RL, load_next_key, __movq);
532 	round3(32+9, RL, RR, load_next_key, __movq);
533 	round3(32+10, RR, RL, load_next_key, __movq);
534 	round3(32+11, RL, RR, load_next_key, __movq);
535 	round3(32+12, RR, RL, load_next_key, __movq);
536 	round3(32+13, RL, RR, load_next_key, __movq);
537 	round3(32+14, RR, RL, load_next_key, __movq);
538 	round3(32+15, RL, RR, dummy2, dummy2);
539 
540 	final_permutation3(RR, RL);
541 
542 	bswapl RR0d;
543 	bswapl RL0d;
544 	bswapl RR1d;
545 	bswapl RL1d;
546 	bswapl RR2d;
547 	bswapl RL2d;
548 
549 	popq %rsi /* dst */
550 	movl RR0d, 0 * 4(%rsi);
551 	movl RL0d, 1 * 4(%rsi);
552 	movl RR1d, 2 * 4(%rsi);
553 	movl RL1d, 3 * 4(%rsi);
554 	movl RR2d, 4 * 4(%rsi);
555 	movl RL2d, 5 * 4(%rsi);
556 
557 	popq %r15;
558 	popq %r14;
559 	popq %r13;
560 	popq %r12;
561 	popq %rbx;
562 
563 	RET;
564 SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way)
565 
566 .section	.rodata, "a", @progbits
567 .align 16
568 .L_s1:
569 	.quad 0x0010100001010400, 0x0000000000000000
570 	.quad 0x0000100000010000, 0x0010100001010404
571 	.quad 0x0010100001010004, 0x0000100000010404
572 	.quad 0x0000000000000004, 0x0000100000010000
573 	.quad 0x0000000000000400, 0x0010100001010400
574 	.quad 0x0010100001010404, 0x0000000000000400
575 	.quad 0x0010000001000404, 0x0010100001010004
576 	.quad 0x0010000001000000, 0x0000000000000004
577 	.quad 0x0000000000000404, 0x0010000001000400
578 	.quad 0x0010000001000400, 0x0000100000010400
579 	.quad 0x0000100000010400, 0x0010100001010000
580 	.quad 0x0010100001010000, 0x0010000001000404
581 	.quad 0x0000100000010004, 0x0010000001000004
582 	.quad 0x0010000001000004, 0x0000100000010004
583 	.quad 0x0000000000000000, 0x0000000000000404
584 	.quad 0x0000100000010404, 0x0010000001000000
585 	.quad 0x0000100000010000, 0x0010100001010404
586 	.quad 0x0000000000000004, 0x0010100001010000
587 	.quad 0x0010100001010400, 0x0010000001000000
588 	.quad 0x0010000001000000, 0x0000000000000400
589 	.quad 0x0010100001010004, 0x0000100000010000
590 	.quad 0x0000100000010400, 0x0010000001000004
591 	.quad 0x0000000000000400, 0x0000000000000004
592 	.quad 0x0010000001000404, 0x0000100000010404
593 	.quad 0x0010100001010404, 0x0000100000010004
594 	.quad 0x0010100001010000, 0x0010000001000404
595 	.quad 0x0010000001000004, 0x0000000000000404
596 	.quad 0x0000100000010404, 0x0010100001010400
597 	.quad 0x0000000000000404, 0x0010000001000400
598 	.quad 0x0010000001000400, 0x0000000000000000
599 	.quad 0x0000100000010004, 0x0000100000010400
600 	.quad 0x0000000000000000, 0x0010100001010004
601 .L_s2:
602 	.quad 0x0801080200100020, 0x0800080000000000
603 	.quad 0x0000080000000000, 0x0001080200100020
604 	.quad 0x0001000000100000, 0x0000000200000020
605 	.quad 0x0801000200100020, 0x0800080200000020
606 	.quad 0x0800000200000020, 0x0801080200100020
607 	.quad 0x0801080000100000, 0x0800000000000000
608 	.quad 0x0800080000000000, 0x0001000000100000
609 	.quad 0x0000000200000020, 0x0801000200100020
610 	.quad 0x0001080000100000, 0x0001000200100020
611 	.quad 0x0800080200000020, 0x0000000000000000
612 	.quad 0x0800000000000000, 0x0000080000000000
613 	.quad 0x0001080200100020, 0x0801000000100000
614 	.quad 0x0001000200100020, 0x0800000200000020
615 	.quad 0x0000000000000000, 0x0001080000100000
616 	.quad 0x0000080200000020, 0x0801080000100000
617 	.quad 0x0801000000100000, 0x0000080200000020
618 	.quad 0x0000000000000000, 0x0001080200100020
619 	.quad 0x0801000200100020, 0x0001000000100000
620 	.quad 0x0800080200000020, 0x0801000000100000
621 	.quad 0x0801080000100000, 0x0000080000000000
622 	.quad 0x0801000000100000, 0x0800080000000000
623 	.quad 0x0000000200000020, 0x0801080200100020
624 	.quad 0x0001080200100020, 0x0000000200000020
625 	.quad 0x0000080000000000, 0x0800000000000000
626 	.quad 0x0000080200000020, 0x0801080000100000
627 	.quad 0x0001000000100000, 0x0800000200000020
628 	.quad 0x0001000200100020, 0x0800080200000020
629 	.quad 0x0800000200000020, 0x0001000200100020
630 	.quad 0x0001080000100000, 0x0000000000000000
631 	.quad 0x0800080000000000, 0x0000080200000020
632 	.quad 0x0800000000000000, 0x0801000200100020
633 	.quad 0x0801080200100020, 0x0001080000100000
634 .L_s3:
635 	.quad 0x0000002000000208, 0x0000202008020200
636 	.quad 0x0000000000000000, 0x0000200008020008
637 	.quad 0x0000002008000200, 0x0000000000000000
638 	.quad 0x0000202000020208, 0x0000002008000200
639 	.quad 0x0000200000020008, 0x0000000008000008
640 	.quad 0x0000000008000008, 0x0000200000020000
641 	.quad 0x0000202008020208, 0x0000200000020008
642 	.quad 0x0000200008020000, 0x0000002000000208
643 	.quad 0x0000000008000000, 0x0000000000000008
644 	.quad 0x0000202008020200, 0x0000002000000200
645 	.quad 0x0000202000020200, 0x0000200008020000
646 	.quad 0x0000200008020008, 0x0000202000020208
647 	.quad 0x0000002008000208, 0x0000202000020200
648 	.quad 0x0000200000020000, 0x0000002008000208
649 	.quad 0x0000000000000008, 0x0000202008020208
650 	.quad 0x0000002000000200, 0x0000000008000000
651 	.quad 0x0000202008020200, 0x0000000008000000
652 	.quad 0x0000200000020008, 0x0000002000000208
653 	.quad 0x0000200000020000, 0x0000202008020200
654 	.quad 0x0000002008000200, 0x0000000000000000
655 	.quad 0x0000002000000200, 0x0000200000020008
656 	.quad 0x0000202008020208, 0x0000002008000200
657 	.quad 0x0000000008000008, 0x0000002000000200
658 	.quad 0x0000000000000000, 0x0000200008020008
659 	.quad 0x0000002008000208, 0x0000200000020000
660 	.quad 0x0000000008000000, 0x0000202008020208
661 	.quad 0x0000000000000008, 0x0000202000020208
662 	.quad 0x0000202000020200, 0x0000000008000008
663 	.quad 0x0000200008020000, 0x0000002008000208
664 	.quad 0x0000002000000208, 0x0000200008020000
665 	.quad 0x0000202000020208, 0x0000000000000008
666 	.quad 0x0000200008020008, 0x0000202000020200
667 .L_s4:
668 	.quad 0x1008020000002001, 0x1000020800002001
669 	.quad 0x1000020800002001, 0x0000000800000000
670 	.quad 0x0008020800002000, 0x1008000800000001
671 	.quad 0x1008000000000001, 0x1000020000002001
672 	.quad 0x0000000000000000, 0x0008020000002000
673 	.quad 0x0008020000002000, 0x1008020800002001
674 	.quad 0x1000000800000001, 0x0000000000000000
675 	.quad 0x0008000800000000, 0x1008000000000001
676 	.quad 0x1000000000000001, 0x0000020000002000
677 	.quad 0x0008000000000000, 0x1008020000002001
678 	.quad 0x0000000800000000, 0x0008000000000000
679 	.quad 0x1000020000002001, 0x0000020800002000
680 	.quad 0x1008000800000001, 0x1000000000000001
681 	.quad 0x0000020800002000, 0x0008000800000000
682 	.quad 0x0000020000002000, 0x0008020800002000
683 	.quad 0x1008020800002001, 0x1000000800000001
684 	.quad 0x0008000800000000, 0x1008000000000001
685 	.quad 0x0008020000002000, 0x1008020800002001
686 	.quad 0x1000000800000001, 0x0000000000000000
687 	.quad 0x0000000000000000, 0x0008020000002000
688 	.quad 0x0000020800002000, 0x0008000800000000
689 	.quad 0x1008000800000001, 0x1000000000000001
690 	.quad 0x1008020000002001, 0x1000020800002001
691 	.quad 0x1000020800002001, 0x0000000800000000
692 	.quad 0x1008020800002001, 0x1000000800000001
693 	.quad 0x1000000000000001, 0x0000020000002000
694 	.quad 0x1008000000000001, 0x1000020000002001
695 	.quad 0x0008020800002000, 0x1008000800000001
696 	.quad 0x1000020000002001, 0x0000020800002000
697 	.quad 0x0008000000000000, 0x1008020000002001
698 	.quad 0x0000000800000000, 0x0008000000000000
699 	.quad 0x0000020000002000, 0x0008020800002000
700 .L_s5:
701 	.quad 0x0000001000000100, 0x0020001002080100
702 	.quad 0x0020000002080000, 0x0420001002000100
703 	.quad 0x0000000000080000, 0x0000001000000100
704 	.quad 0x0400000000000000, 0x0020000002080000
705 	.quad 0x0400001000080100, 0x0000000000080000
706 	.quad 0x0020001002000100, 0x0400001000080100
707 	.quad 0x0420001002000100, 0x0420000002080000
708 	.quad 0x0000001000080100, 0x0400000000000000
709 	.quad 0x0020000002000000, 0x0400000000080000
710 	.quad 0x0400000000080000, 0x0000000000000000
711 	.quad 0x0400001000000100, 0x0420001002080100
712 	.quad 0x0420001002080100, 0x0020001002000100
713 	.quad 0x0420000002080000, 0x0400001000000100
714 	.quad 0x0000000000000000, 0x0420000002000000
715 	.quad 0x0020001002080100, 0x0020000002000000
716 	.quad 0x0420000002000000, 0x0000001000080100
717 	.quad 0x0000000000080000, 0x0420001002000100
718 	.quad 0x0000001000000100, 0x0020000002000000
719 	.quad 0x0400000000000000, 0x0020000002080000
720 	.quad 0x0420001002000100, 0x0400001000080100
721 	.quad 0x0020001002000100, 0x0400000000000000
722 	.quad 0x0420000002080000, 0x0020001002080100
723 	.quad 0x0400001000080100, 0x0000001000000100
724 	.quad 0x0020000002000000, 0x0420000002080000
725 	.quad 0x0420001002080100, 0x0000001000080100
726 	.quad 0x0420000002000000, 0x0420001002080100
727 	.quad 0x0020000002080000, 0x0000000000000000
728 	.quad 0x0400000000080000, 0x0420000002000000
729 	.quad 0x0000001000080100, 0x0020001002000100
730 	.quad 0x0400001000000100, 0x0000000000080000
731 	.quad 0x0000000000000000, 0x0400000000080000
732 	.quad 0x0020001002080100, 0x0400001000000100
733 .L_s6:
734 	.quad 0x0200000120000010, 0x0204000020000000
735 	.quad 0x0000040000000000, 0x0204040120000010
736 	.quad 0x0204000020000000, 0x0000000100000010
737 	.quad 0x0204040120000010, 0x0004000000000000
738 	.quad 0x0200040020000000, 0x0004040100000010
739 	.quad 0x0004000000000000, 0x0200000120000010
740 	.quad 0x0004000100000010, 0x0200040020000000
741 	.quad 0x0200000020000000, 0x0000040100000010
742 	.quad 0x0000000000000000, 0x0004000100000010
743 	.quad 0x0200040120000010, 0x0000040000000000
744 	.quad 0x0004040000000000, 0x0200040120000010
745 	.quad 0x0000000100000010, 0x0204000120000010
746 	.quad 0x0204000120000010, 0x0000000000000000
747 	.quad 0x0004040100000010, 0x0204040020000000
748 	.quad 0x0000040100000010, 0x0004040000000000
749 	.quad 0x0204040020000000, 0x0200000020000000
750 	.quad 0x0200040020000000, 0x0000000100000010
751 	.quad 0x0204000120000010, 0x0004040000000000
752 	.quad 0x0204040120000010, 0x0004000000000000
753 	.quad 0x0000040100000010, 0x0200000120000010
754 	.quad 0x0004000000000000, 0x0200040020000000
755 	.quad 0x0200000020000000, 0x0000040100000010
756 	.quad 0x0200000120000010, 0x0204040120000010
757 	.quad 0x0004040000000000, 0x0204000020000000
758 	.quad 0x0004040100000010, 0x0204040020000000
759 	.quad 0x0000000000000000, 0x0204000120000010
760 	.quad 0x0000000100000010, 0x0000040000000000
761 	.quad 0x0204000020000000, 0x0004040100000010
762 	.quad 0x0000040000000000, 0x0004000100000010
763 	.quad 0x0200040120000010, 0x0000000000000000
764 	.quad 0x0204040020000000, 0x0200000020000000
765 	.quad 0x0004000100000010, 0x0200040120000010
766 .L_s7:
767 	.quad 0x0002000000200000, 0x2002000004200002
768 	.quad 0x2000000004000802, 0x0000000000000000
769 	.quad 0x0000000000000800, 0x2000000004000802
770 	.quad 0x2002000000200802, 0x0002000004200800
771 	.quad 0x2002000004200802, 0x0002000000200000
772 	.quad 0x0000000000000000, 0x2000000004000002
773 	.quad 0x2000000000000002, 0x0000000004000000
774 	.quad 0x2002000004200002, 0x2000000000000802
775 	.quad 0x0000000004000800, 0x2002000000200802
776 	.quad 0x2002000000200002, 0x0000000004000800
777 	.quad 0x2000000004000002, 0x0002000004200000
778 	.quad 0x0002000004200800, 0x2002000000200002
779 	.quad 0x0002000004200000, 0x0000000000000800
780 	.quad 0x2000000000000802, 0x2002000004200802
781 	.quad 0x0002000000200800, 0x2000000000000002
782 	.quad 0x0000000004000000, 0x0002000000200800
783 	.quad 0x0000000004000000, 0x0002000000200800
784 	.quad 0x0002000000200000, 0x2000000004000802
785 	.quad 0x2000000004000802, 0x2002000004200002
786 	.quad 0x2002000004200002, 0x2000000000000002
787 	.quad 0x2002000000200002, 0x0000000004000000
788 	.quad 0x0000000004000800, 0x0002000000200000
789 	.quad 0x0002000004200800, 0x2000000000000802
790 	.quad 0x2002000000200802, 0x0002000004200800
791 	.quad 0x2000000000000802, 0x2000000004000002
792 	.quad 0x2002000004200802, 0x0002000004200000
793 	.quad 0x0002000000200800, 0x0000000000000000
794 	.quad 0x2000000000000002, 0x2002000004200802
795 	.quad 0x0000000000000000, 0x2002000000200802
796 	.quad 0x0002000004200000, 0x0000000000000800
797 	.quad 0x2000000004000002, 0x0000000004000800
798 	.quad 0x0000000000000800, 0x2002000000200002
799 .L_s8:
800 	.quad 0x0100010410001000, 0x0000010000001000
801 	.quad 0x0000000000040000, 0x0100010410041000
802 	.quad 0x0100000010000000, 0x0100010410001000
803 	.quad 0x0000000400000000, 0x0100000010000000
804 	.quad 0x0000000400040000, 0x0100000010040000
805 	.quad 0x0100010410041000, 0x0000010000041000
806 	.quad 0x0100010010041000, 0x0000010400041000
807 	.quad 0x0000010000001000, 0x0000000400000000
808 	.quad 0x0100000010040000, 0x0100000410000000
809 	.quad 0x0100010010001000, 0x0000010400001000
810 	.quad 0x0000010000041000, 0x0000000400040000
811 	.quad 0x0100000410040000, 0x0100010010041000
812 	.quad 0x0000010400001000, 0x0000000000000000
813 	.quad 0x0000000000000000, 0x0100000410040000
814 	.quad 0x0100000410000000, 0x0100010010001000
815 	.quad 0x0000010400041000, 0x0000000000040000
816 	.quad 0x0000010400041000, 0x0000000000040000
817 	.quad 0x0100010010041000, 0x0000010000001000
818 	.quad 0x0000000400000000, 0x0100000410040000
819 	.quad 0x0000010000001000, 0x0000010400041000
820 	.quad 0x0100010010001000, 0x0000000400000000
821 	.quad 0x0100000410000000, 0x0100000010040000
822 	.quad 0x0100000410040000, 0x0100000010000000
823 	.quad 0x0000000000040000, 0x0100010410001000
824 	.quad 0x0000000000000000, 0x0100010410041000
825 	.quad 0x0000000400040000, 0x0100000410000000
826 	.quad 0x0100000010040000, 0x0100010010001000
827 	.quad 0x0100010410001000, 0x0000000000000000
828 	.quad 0x0100010410041000, 0x0000010000041000
829 	.quad 0x0000010000041000, 0x0000010400001000
830 	.quad 0x0000010400001000, 0x0000000400040000
831 	.quad 0x0100000010000000, 0x0100010010041000
832