1/* 2 * Serpent Cipher 4-way parallel algorithm (i586/SSE2) 3 * 4 * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 5 * 6 * Based on crypto/serpent.c by 7 * Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no> 8 * 2003 Herbert Valerio Riedel <hvr@gnu.org> 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License as published by 12 * the Free Software Foundation; either version 2 of the License, or 13 * (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 23 * USA 24 * 25 */ 26 27#include <linux/linkage.h> 28 29.file "serpent-sse2-i586-asm_32.S" 30.text 31 32#define arg_ctx 4 33#define arg_dst 8 34#define arg_src 12 35#define arg_xor 16 36 37/********************************************************************** 38 4-way SSE2 serpent 39 **********************************************************************/ 40#define CTX %edx 41 42#define RA %xmm0 43#define RB %xmm1 44#define RC %xmm2 45#define RD %xmm3 46#define RE %xmm4 47 48#define RT0 %xmm5 49#define RT1 %xmm6 50 51#define RNOT %xmm7 52 53#define get_key(i, j, t) \ 54 movd (4*(i)+(j))*4(CTX), t; \ 55 pshufd $0, t, t; 56 57#define K(x0, x1, x2, x3, x4, i) \ 58 get_key(i, 0, x4); \ 59 get_key(i, 1, RT0); \ 60 get_key(i, 2, RT1); \ 61 pxor x4, x0; \ 62 pxor RT0, x1; \ 63 pxor RT1, x2; \ 64 get_key(i, 3, x4); \ 65 pxor x4, x3; 66 67#define LK(x0, x1, x2, x3, x4, i) \ 68 movdqa x0, x4; \ 69 pslld $13, x0; \ 70 psrld $(32 - 13), x4; \ 71 por x4, x0; \ 72 pxor x0, x1; \ 73 movdqa x2, x4; \ 74 pslld $3, x2; \ 75 psrld $(32 - 3), x4; \ 76 por x4, x2; \ 77 pxor x2, x1; \ 78 movdqa x1, x4; \ 79 pslld $1, x1; \ 80 psrld $(32 - 1), x4; \ 81 por x4, x1; \ 82 movdqa x0, x4; \ 83 pslld $3, x4; \ 84 pxor x2, x3; \ 85 pxor x4, x3; \ 86 movdqa x3, x4; \ 87 pslld $7, x3; \ 88 psrld $(32 - 7), x4; \ 89 por x4, x3; \ 90 movdqa x1, x4; \ 91 pslld $7, x4; \ 92 pxor x1, x0; \ 93 pxor x3, x0; \ 94 pxor x3, x2; \ 95 pxor x4, x2; \ 96 movdqa x0, x4; \ 97 get_key(i, 1, RT0); \ 98 pxor RT0, x1; \ 99 get_key(i, 3, RT0); \ 100 pxor RT0, x3; \ 101 pslld $5, x0; \ 102 psrld $(32 - 5), x4; \ 103 por x4, x0; \ 104 movdqa x2, x4; \ 105 pslld $22, x2; \ 106 psrld $(32 - 22), x4; \ 107 por x4, x2; \ 108 get_key(i, 0, RT0); \ 109 pxor RT0, x0; \ 110 get_key(i, 2, RT0); \ 111 pxor RT0, x2; 112 113#define KL(x0, x1, x2, x3, x4, i) \ 114 K(x0, x1, x2, x3, x4, i); \ 115 movdqa x0, x4; \ 116 psrld $5, x0; \ 117 pslld $(32 - 5), x4; \ 118 por x4, x0; \ 119 movdqa x2, x4; \ 120 psrld $22, x2; \ 121 pslld $(32 - 22), x4; \ 122 por x4, x2; \ 123 pxor x3, x2; \ 124 pxor x3, x0; \ 125 movdqa x1, x4; \ 126 pslld $7, x4; \ 127 pxor x1, x0; \ 128 pxor x4, x2; \ 129 movdqa x1, x4; \ 130 psrld $1, x1; \ 131 pslld $(32 - 1), x4; \ 132 por x4, x1; \ 133 movdqa x3, x4; \ 134 psrld $7, x3; \ 135 pslld $(32 - 7), x4; \ 136 por x4, x3; \ 137 pxor x0, x1; \ 138 movdqa x0, x4; \ 139 pslld $3, x4; \ 140 pxor x4, x3; \ 141 movdqa x0, x4; \ 142 psrld $13, x0; \ 143 pslld $(32 - 13), x4; \ 144 por x4, x0; \ 145 pxor x2, x1; \ 146 pxor x2, x3; \ 147 movdqa x2, x4; \ 148 psrld $3, x2; \ 149 pslld $(32 - 3), x4; \ 150 por x4, x2; 151 152#define S0(x0, x1, x2, x3, x4) \ 153 movdqa x3, x4; \ 154 por x0, x3; \ 155 pxor x4, x0; \ 156 pxor x2, x4; \ 157 pxor RNOT, x4; \ 158 pxor x1, x3; \ 159 pand x0, x1; \ 160 pxor x4, x1; \ 161 pxor x0, x2; \ 162 pxor x3, x0; \ 163 por x0, x4; \ 164 pxor x2, x0; \ 165 pand x1, x2; \ 166 pxor x2, x3; \ 167 pxor RNOT, x1; \ 168 pxor x4, x2; \ 169 pxor x2, x1; 170 171#define S1(x0, x1, x2, x3, x4) \ 172 movdqa x1, x4; \ 173 pxor x0, x1; \ 174 pxor x3, x0; \ 175 pxor RNOT, x3; \ 176 pand x1, x4; \ 177 por x1, x0; \ 178 pxor x2, x3; \ 179 pxor x3, x0; \ 180 pxor x3, x1; \ 181 pxor x4, x3; \ 182 por x4, x1; \ 183 pxor x2, x4; \ 184 pand x0, x2; \ 185 pxor x1, x2; \ 186 por x0, x1; \ 187 pxor RNOT, x0; \ 188 pxor x2, x0; \ 189 pxor x1, x4; 190 191#define S2(x0, x1, x2, x3, x4) \ 192 pxor RNOT, x3; \ 193 pxor x0, x1; \ 194 movdqa x0, x4; \ 195 pand x2, x0; \ 196 pxor x3, x0; \ 197 por x4, x3; \ 198 pxor x1, x2; \ 199 pxor x1, x3; \ 200 pand x0, x1; \ 201 pxor x2, x0; \ 202 pand x3, x2; \ 203 por x1, x3; \ 204 pxor RNOT, x0; \ 205 pxor x0, x3; \ 206 pxor x0, x4; \ 207 pxor x2, x0; \ 208 por x2, x1; 209 210#define S3(x0, x1, x2, x3, x4) \ 211 movdqa x1, x4; \ 212 pxor x3, x1; \ 213 por x0, x3; \ 214 pand x0, x4; \ 215 pxor x2, x0; \ 216 pxor x1, x2; \ 217 pand x3, x1; \ 218 pxor x3, x2; \ 219 por x4, x0; \ 220 pxor x3, x4; \ 221 pxor x0, x1; \ 222 pand x3, x0; \ 223 pand x4, x3; \ 224 pxor x2, x3; \ 225 por x1, x4; \ 226 pand x1, x2; \ 227 pxor x3, x4; \ 228 pxor x3, x0; \ 229 pxor x2, x3; 230 231#define S4(x0, x1, x2, x3, x4) \ 232 movdqa x3, x4; \ 233 pand x0, x3; \ 234 pxor x4, x0; \ 235 pxor x2, x3; \ 236 por x4, x2; \ 237 pxor x1, x0; \ 238 pxor x3, x4; \ 239 por x0, x2; \ 240 pxor x1, x2; \ 241 pand x0, x1; \ 242 pxor x4, x1; \ 243 pand x2, x4; \ 244 pxor x3, x2; \ 245 pxor x0, x4; \ 246 por x1, x3; \ 247 pxor RNOT, x1; \ 248 pxor x0, x3; 249 250#define S5(x0, x1, x2, x3, x4) \ 251 movdqa x1, x4; \ 252 por x0, x1; \ 253 pxor x1, x2; \ 254 pxor RNOT, x3; \ 255 pxor x0, x4; \ 256 pxor x2, x0; \ 257 pand x4, x1; \ 258 por x3, x4; \ 259 pxor x0, x4; \ 260 pand x3, x0; \ 261 pxor x3, x1; \ 262 pxor x2, x3; \ 263 pxor x1, x0; \ 264 pand x4, x2; \ 265 pxor x2, x1; \ 266 pand x0, x2; \ 267 pxor x2, x3; 268 269#define S6(x0, x1, x2, x3, x4) \ 270 movdqa x1, x4; \ 271 pxor x0, x3; \ 272 pxor x2, x1; \ 273 pxor x0, x2; \ 274 pand x3, x0; \ 275 por x3, x1; \ 276 pxor RNOT, x4; \ 277 pxor x1, x0; \ 278 pxor x2, x1; \ 279 pxor x4, x3; \ 280 pxor x0, x4; \ 281 pand x0, x2; \ 282 pxor x1, x4; \ 283 pxor x3, x2; \ 284 pand x1, x3; \ 285 pxor x0, x3; \ 286 pxor x2, x1; 287 288#define S7(x0, x1, x2, x3, x4) \ 289 pxor RNOT, x1; \ 290 movdqa x1, x4; \ 291 pxor RNOT, x0; \ 292 pand x2, x1; \ 293 pxor x3, x1; \ 294 por x4, x3; \ 295 pxor x2, x4; \ 296 pxor x3, x2; \ 297 pxor x0, x3; \ 298 por x1, x0; \ 299 pand x0, x2; \ 300 pxor x4, x0; \ 301 pxor x3, x4; \ 302 pand x0, x3; \ 303 pxor x1, x4; \ 304 pxor x4, x2; \ 305 pxor x1, x3; \ 306 por x0, x4; \ 307 pxor x1, x4; 308 309#define SI0(x0, x1, x2, x3, x4) \ 310 movdqa x3, x4; \ 311 pxor x0, x1; \ 312 por x1, x3; \ 313 pxor x1, x4; \ 314 pxor RNOT, x0; \ 315 pxor x3, x2; \ 316 pxor x0, x3; \ 317 pand x1, x0; \ 318 pxor x2, x0; \ 319 pand x3, x2; \ 320 pxor x4, x3; \ 321 pxor x3, x2; \ 322 pxor x3, x1; \ 323 pand x0, x3; \ 324 pxor x0, x1; \ 325 pxor x2, x0; \ 326 pxor x3, x4; 327 328#define SI1(x0, x1, x2, x3, x4) \ 329 pxor x3, x1; \ 330 movdqa x0, x4; \ 331 pxor x2, x0; \ 332 pxor RNOT, x2; \ 333 por x1, x4; \ 334 pxor x3, x4; \ 335 pand x1, x3; \ 336 pxor x2, x1; \ 337 pand x4, x2; \ 338 pxor x1, x4; \ 339 por x3, x1; \ 340 pxor x0, x3; \ 341 pxor x0, x2; \ 342 por x4, x0; \ 343 pxor x4, x2; \ 344 pxor x0, x1; \ 345 pxor x1, x4; 346 347#define SI2(x0, x1, x2, x3, x4) \ 348 pxor x1, x2; \ 349 movdqa x3, x4; \ 350 pxor RNOT, x3; \ 351 por x2, x3; \ 352 pxor x4, x2; \ 353 pxor x0, x4; \ 354 pxor x1, x3; \ 355 por x2, x1; \ 356 pxor x0, x2; \ 357 pxor x4, x1; \ 358 por x3, x4; \ 359 pxor x3, x2; \ 360 pxor x2, x4; \ 361 pand x1, x2; \ 362 pxor x3, x2; \ 363 pxor x4, x3; \ 364 pxor x0, x4; 365 366#define SI3(x0, x1, x2, x3, x4) \ 367 pxor x1, x2; \ 368 movdqa x1, x4; \ 369 pand x2, x1; \ 370 pxor x0, x1; \ 371 por x4, x0; \ 372 pxor x3, x4; \ 373 pxor x3, x0; \ 374 por x1, x3; \ 375 pxor x2, x1; \ 376 pxor x3, x1; \ 377 pxor x2, x0; \ 378 pxor x3, x2; \ 379 pand x1, x3; \ 380 pxor x0, x1; \ 381 pand x2, x0; \ 382 pxor x3, x4; \ 383 pxor x0, x3; \ 384 pxor x1, x0; 385 386#define SI4(x0, x1, x2, x3, x4) \ 387 pxor x3, x2; \ 388 movdqa x0, x4; \ 389 pand x1, x0; \ 390 pxor x2, x0; \ 391 por x3, x2; \ 392 pxor RNOT, x4; \ 393 pxor x0, x1; \ 394 pxor x2, x0; \ 395 pand x4, x2; \ 396 pxor x0, x2; \ 397 por x4, x0; \ 398 pxor x3, x0; \ 399 pand x2, x3; \ 400 pxor x3, x4; \ 401 pxor x1, x3; \ 402 pand x0, x1; \ 403 pxor x1, x4; \ 404 pxor x3, x0; 405 406#define SI5(x0, x1, x2, x3, x4) \ 407 movdqa x1, x4; \ 408 por x2, x1; \ 409 pxor x4, x2; \ 410 pxor x3, x1; \ 411 pand x4, x3; \ 412 pxor x3, x2; \ 413 por x0, x3; \ 414 pxor RNOT, x0; \ 415 pxor x2, x3; \ 416 por x0, x2; \ 417 pxor x1, x4; \ 418 pxor x4, x2; \ 419 pand x0, x4; \ 420 pxor x1, x0; \ 421 pxor x3, x1; \ 422 pand x2, x0; \ 423 pxor x3, x2; \ 424 pxor x2, x0; \ 425 pxor x4, x2; \ 426 pxor x3, x4; 427 428#define SI6(x0, x1, x2, x3, x4) \ 429 pxor x2, x0; \ 430 movdqa x0, x4; \ 431 pand x3, x0; \ 432 pxor x3, x2; \ 433 pxor x2, x0; \ 434 pxor x1, x3; \ 435 por x4, x2; \ 436 pxor x3, x2; \ 437 pand x0, x3; \ 438 pxor RNOT, x0; \ 439 pxor x1, x3; \ 440 pand x2, x1; \ 441 pxor x0, x4; \ 442 pxor x4, x3; \ 443 pxor x2, x4; \ 444 pxor x1, x0; \ 445 pxor x0, x2; 446 447#define SI7(x0, x1, x2, x3, x4) \ 448 movdqa x3, x4; \ 449 pand x0, x3; \ 450 pxor x2, x0; \ 451 por x4, x2; \ 452 pxor x1, x4; \ 453 pxor RNOT, x0; \ 454 por x3, x1; \ 455 pxor x0, x4; \ 456 pand x2, x0; \ 457 pxor x1, x0; \ 458 pand x2, x1; \ 459 pxor x2, x3; \ 460 pxor x3, x4; \ 461 pand x3, x2; \ 462 por x0, x3; \ 463 pxor x4, x1; \ 464 pxor x4, x3; \ 465 pand x0, x4; \ 466 pxor x2, x4; 467 468#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ 469 movdqa x0, t2; \ 470 punpckldq x1, x0; \ 471 punpckhdq x1, t2; \ 472 movdqa x2, t1; \ 473 punpckhdq x3, x2; \ 474 punpckldq x3, t1; \ 475 movdqa x0, x1; \ 476 punpcklqdq t1, x0; \ 477 punpckhqdq t1, x1; \ 478 movdqa t2, x3; \ 479 punpcklqdq x2, t2; \ 480 punpckhqdq x2, x3; \ 481 movdqa t2, x2; 482 483#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ 484 movdqu (0*4*4)(in), x0; \ 485 movdqu (1*4*4)(in), x1; \ 486 movdqu (2*4*4)(in), x2; \ 487 movdqu (3*4*4)(in), x3; \ 488 \ 489 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) 490 491#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ 492 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ 493 \ 494 movdqu x0, (0*4*4)(out); \ 495 movdqu x1, (1*4*4)(out); \ 496 movdqu x2, (2*4*4)(out); \ 497 movdqu x3, (3*4*4)(out); 498 499#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ 500 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ 501 \ 502 movdqu (0*4*4)(out), t0; \ 503 pxor t0, x0; \ 504 movdqu x0, (0*4*4)(out); \ 505 movdqu (1*4*4)(out), t0; \ 506 pxor t0, x1; \ 507 movdqu x1, (1*4*4)(out); \ 508 movdqu (2*4*4)(out), t0; \ 509 pxor t0, x2; \ 510 movdqu x2, (2*4*4)(out); \ 511 movdqu (3*4*4)(out), t0; \ 512 pxor t0, x3; \ 513 movdqu x3, (3*4*4)(out); 514 515ENTRY(__serpent_enc_blk_4way) 516 /* input: 517 * arg_ctx(%esp): ctx, CTX 518 * arg_dst(%esp): dst 519 * arg_src(%esp): src 520 * arg_xor(%esp): bool, if true: xor output 521 */ 522 523 pcmpeqd RNOT, RNOT; 524 525 movl arg_ctx(%esp), CTX; 526 527 movl arg_src(%esp), %eax; 528 read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); 529 530 K(RA, RB, RC, RD, RE, 0); 531 S0(RA, RB, RC, RD, RE); LK(RC, RB, RD, RA, RE, 1); 532 S1(RC, RB, RD, RA, RE); LK(RE, RD, RA, RC, RB, 2); 533 S2(RE, RD, RA, RC, RB); LK(RB, RD, RE, RC, RA, 3); 534 S3(RB, RD, RE, RC, RA); LK(RC, RA, RD, RB, RE, 4); 535 S4(RC, RA, RD, RB, RE); LK(RA, RD, RB, RE, RC, 5); 536 S5(RA, RD, RB, RE, RC); LK(RC, RA, RD, RE, RB, 6); 537 S6(RC, RA, RD, RE, RB); LK(RD, RB, RA, RE, RC, 7); 538 S7(RD, RB, RA, RE, RC); LK(RC, RA, RE, RD, RB, 8); 539 S0(RC, RA, RE, RD, RB); LK(RE, RA, RD, RC, RB, 9); 540 S1(RE, RA, RD, RC, RB); LK(RB, RD, RC, RE, RA, 10); 541 S2(RB, RD, RC, RE, RA); LK(RA, RD, RB, RE, RC, 11); 542 S3(RA, RD, RB, RE, RC); LK(RE, RC, RD, RA, RB, 12); 543 S4(RE, RC, RD, RA, RB); LK(RC, RD, RA, RB, RE, 13); 544 S5(RC, RD, RA, RB, RE); LK(RE, RC, RD, RB, RA, 14); 545 S6(RE, RC, RD, RB, RA); LK(RD, RA, RC, RB, RE, 15); 546 S7(RD, RA, RC, RB, RE); LK(RE, RC, RB, RD, RA, 16); 547 S0(RE, RC, RB, RD, RA); LK(RB, RC, RD, RE, RA, 17); 548 S1(RB, RC, RD, RE, RA); LK(RA, RD, RE, RB, RC, 18); 549 S2(RA, RD, RE, RB, RC); LK(RC, RD, RA, RB, RE, 19); 550 S3(RC, RD, RA, RB, RE); LK(RB, RE, RD, RC, RA, 20); 551 S4(RB, RE, RD, RC, RA); LK(RE, RD, RC, RA, RB, 21); 552 S5(RE, RD, RC, RA, RB); LK(RB, RE, RD, RA, RC, 22); 553 S6(RB, RE, RD, RA, RC); LK(RD, RC, RE, RA, RB, 23); 554 S7(RD, RC, RE, RA, RB); LK(RB, RE, RA, RD, RC, 24); 555 S0(RB, RE, RA, RD, RC); LK(RA, RE, RD, RB, RC, 25); 556 S1(RA, RE, RD, RB, RC); LK(RC, RD, RB, RA, RE, 26); 557 S2(RC, RD, RB, RA, RE); LK(RE, RD, RC, RA, RB, 27); 558 S3(RE, RD, RC, RA, RB); LK(RA, RB, RD, RE, RC, 28); 559 S4(RA, RB, RD, RE, RC); LK(RB, RD, RE, RC, RA, 29); 560 S5(RB, RD, RE, RC, RA); LK(RA, RB, RD, RC, RE, 30); 561 S6(RA, RB, RD, RC, RE); LK(RD, RE, RB, RC, RA, 31); 562 S7(RD, RE, RB, RC, RA); K(RA, RB, RC, RD, RE, 32); 563 564 movl arg_dst(%esp), %eax; 565 566 cmpb $0, arg_xor(%esp); 567 jnz .L__enc_xor4; 568 569 write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); 570 571 ret; 572 573.L__enc_xor4: 574 xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); 575 576 ret; 577ENDPROC(__serpent_enc_blk_4way) 578 579ENTRY(serpent_dec_blk_4way) 580 /* input: 581 * arg_ctx(%esp): ctx, CTX 582 * arg_dst(%esp): dst 583 * arg_src(%esp): src 584 */ 585 586 pcmpeqd RNOT, RNOT; 587 588 movl arg_ctx(%esp), CTX; 589 590 movl arg_src(%esp), %eax; 591 read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); 592 593 K(RA, RB, RC, RD, RE, 32); 594 SI7(RA, RB, RC, RD, RE); KL(RB, RD, RA, RE, RC, 31); 595 SI6(RB, RD, RA, RE, RC); KL(RA, RC, RE, RB, RD, 30); 596 SI5(RA, RC, RE, RB, RD); KL(RC, RD, RA, RE, RB, 29); 597 SI4(RC, RD, RA, RE, RB); KL(RC, RA, RB, RE, RD, 28); 598 SI3(RC, RA, RB, RE, RD); KL(RB, RC, RD, RE, RA, 27); 599 SI2(RB, RC, RD, RE, RA); KL(RC, RA, RE, RD, RB, 26); 600 SI1(RC, RA, RE, RD, RB); KL(RB, RA, RE, RD, RC, 25); 601 SI0(RB, RA, RE, RD, RC); KL(RE, RC, RA, RB, RD, 24); 602 SI7(RE, RC, RA, RB, RD); KL(RC, RB, RE, RD, RA, 23); 603 SI6(RC, RB, RE, RD, RA); KL(RE, RA, RD, RC, RB, 22); 604 SI5(RE, RA, RD, RC, RB); KL(RA, RB, RE, RD, RC, 21); 605 SI4(RA, RB, RE, RD, RC); KL(RA, RE, RC, RD, RB, 20); 606 SI3(RA, RE, RC, RD, RB); KL(RC, RA, RB, RD, RE, 19); 607 SI2(RC, RA, RB, RD, RE); KL(RA, RE, RD, RB, RC, 18); 608 SI1(RA, RE, RD, RB, RC); KL(RC, RE, RD, RB, RA, 17); 609 SI0(RC, RE, RD, RB, RA); KL(RD, RA, RE, RC, RB, 16); 610 SI7(RD, RA, RE, RC, RB); KL(RA, RC, RD, RB, RE, 15); 611 SI6(RA, RC, RD, RB, RE); KL(RD, RE, RB, RA, RC, 14); 612 SI5(RD, RE, RB, RA, RC); KL(RE, RC, RD, RB, RA, 13); 613 SI4(RE, RC, RD, RB, RA); KL(RE, RD, RA, RB, RC, 12); 614 SI3(RE, RD, RA, RB, RC); KL(RA, RE, RC, RB, RD, 11); 615 SI2(RA, RE, RC, RB, RD); KL(RE, RD, RB, RC, RA, 10); 616 SI1(RE, RD, RB, RC, RA); KL(RA, RD, RB, RC, RE, 9); 617 SI0(RA, RD, RB, RC, RE); KL(RB, RE, RD, RA, RC, 8); 618 SI7(RB, RE, RD, RA, RC); KL(RE, RA, RB, RC, RD, 7); 619 SI6(RE, RA, RB, RC, RD); KL(RB, RD, RC, RE, RA, 6); 620 SI5(RB, RD, RC, RE, RA); KL(RD, RA, RB, RC, RE, 5); 621 SI4(RD, RA, RB, RC, RE); KL(RD, RB, RE, RC, RA, 4); 622 SI3(RD, RB, RE, RC, RA); KL(RE, RD, RA, RC, RB, 3); 623 SI2(RE, RD, RA, RC, RB); KL(RD, RB, RC, RA, RE, 2); 624 SI1(RD, RB, RC, RA, RE); KL(RE, RB, RC, RA, RD, 1); 625 SI0(RE, RB, RC, RA, RD); K(RC, RD, RB, RE, RA, 0); 626 627 movl arg_dst(%esp), %eax; 628 write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); 629 630 ret; 631ENDPROC(serpent_dec_blk_4way) 632