1/* 2 * Implement AES algorithm in Intel AES-NI instructions. 3 * 4 * The white paper of AES-NI instructions can be downloaded from: 5 * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf 6 * 7 * Copyright (C) 2008, Intel Corp. 8 * Author: Huang Ying <ying.huang@intel.com> 9 * Vinodh Gopal <vinodh.gopal@intel.com> 10 * Kahraman Akdemir 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License as published by 14 * the Free Software Foundation; either version 2 of the License, or 15 * (at your option) any later version. 16 */ 17 18#include <linux/linkage.h> 19#include <asm/inst.h> 20 21.text 22 23#define STATE1 %xmm0 24#define STATE2 %xmm4 25#define STATE3 %xmm5 26#define STATE4 %xmm6 27#define STATE STATE1 28#define IN1 %xmm1 29#define IN2 %xmm7 30#define IN3 %xmm8 31#define IN4 %xmm9 32#define IN IN1 33#define KEY %xmm2 34#define IV %xmm3 35 36#define KEYP %rdi 37#define OUTP %rsi 38#define INP %rdx 39#define LEN %rcx 40#define IVP %r8 41#define KLEN %r9d 42#define T1 %r10 43#define TKEYP T1 44#define T2 %r11 45 46_key_expansion_128: 47_key_expansion_256a: 48 pshufd $0b11111111, %xmm1, %xmm1 49 shufps $0b00010000, %xmm0, %xmm4 50 pxor %xmm4, %xmm0 51 shufps $0b10001100, %xmm0, %xmm4 52 pxor %xmm4, %xmm0 53 pxor %xmm1, %xmm0 54 movaps %xmm0, (%rcx) 55 add $0x10, %rcx 56 ret 57 58_key_expansion_192a: 59 pshufd $0b01010101, %xmm1, %xmm1 60 shufps $0b00010000, %xmm0, %xmm4 61 pxor %xmm4, %xmm0 62 shufps $0b10001100, %xmm0, %xmm4 63 pxor %xmm4, %xmm0 64 pxor %xmm1, %xmm0 65 66 movaps %xmm2, %xmm5 67 movaps %xmm2, %xmm6 68 pslldq $4, %xmm5 69 pshufd $0b11111111, %xmm0, %xmm3 70 pxor %xmm3, %xmm2 71 pxor %xmm5, %xmm2 72 73 movaps %xmm0, %xmm1 74 shufps $0b01000100, %xmm0, %xmm6 75 movaps %xmm6, (%rcx) 76 shufps $0b01001110, %xmm2, %xmm1 77 movaps %xmm1, 16(%rcx) 78 add $0x20, %rcx 79 ret 80 81_key_expansion_192b: 82 pshufd $0b01010101, %xmm1, %xmm1 83 shufps $0b00010000, %xmm0, %xmm4 84 pxor %xmm4, %xmm0 85 shufps $0b10001100, %xmm0, %xmm4 86 pxor %xmm4, %xmm0 87 pxor %xmm1, %xmm0 88 89 movaps %xmm2, %xmm5 90 pslldq $4, %xmm5 91 pshufd $0b11111111, %xmm0, %xmm3 92 pxor %xmm3, %xmm2 93 pxor %xmm5, %xmm2 94 95 movaps %xmm0, (%rcx) 96 add $0x10, %rcx 97 ret 98 99_key_expansion_256b: 100 pshufd $0b10101010, %xmm1, %xmm1 101 shufps $0b00010000, %xmm2, %xmm4 102 pxor %xmm4, %xmm2 103 shufps $0b10001100, %xmm2, %xmm4 104 pxor %xmm4, %xmm2 105 pxor %xmm1, %xmm2 106 movaps %xmm2, (%rcx) 107 add $0x10, %rcx 108 ret 109 110/* 111 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, 112 * unsigned int key_len) 113 */ 114ENTRY(aesni_set_key) 115 movups (%rsi), %xmm0 # user key (first 16 bytes) 116 movaps %xmm0, (%rdi) 117 lea 0x10(%rdi), %rcx # key addr 118 movl %edx, 480(%rdi) 119 pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x 120 cmp $24, %dl 121 jb .Lenc_key128 122 je .Lenc_key192 123 movups 0x10(%rsi), %xmm2 # other user key 124 movaps %xmm2, (%rcx) 125 add $0x10, %rcx 126 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 127 call _key_expansion_256a 128 AESKEYGENASSIST 0x1 %xmm0 %xmm1 129 call _key_expansion_256b 130 AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 131 call _key_expansion_256a 132 AESKEYGENASSIST 0x2 %xmm0 %xmm1 133 call _key_expansion_256b 134 AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 135 call _key_expansion_256a 136 AESKEYGENASSIST 0x4 %xmm0 %xmm1 137 call _key_expansion_256b 138 AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 139 call _key_expansion_256a 140 AESKEYGENASSIST 0x8 %xmm0 %xmm1 141 call _key_expansion_256b 142 AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 143 call _key_expansion_256a 144 AESKEYGENASSIST 0x10 %xmm0 %xmm1 145 call _key_expansion_256b 146 AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 147 call _key_expansion_256a 148 AESKEYGENASSIST 0x20 %xmm0 %xmm1 149 call _key_expansion_256b 150 AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 151 call _key_expansion_256a 152 jmp .Ldec_key 153.Lenc_key192: 154 movq 0x10(%rsi), %xmm2 # other user key 155 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 156 call _key_expansion_192a 157 AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 158 call _key_expansion_192b 159 AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 160 call _key_expansion_192a 161 AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 162 call _key_expansion_192b 163 AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 164 call _key_expansion_192a 165 AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 166 call _key_expansion_192b 167 AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 168 call _key_expansion_192a 169 AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8 170 call _key_expansion_192b 171 jmp .Ldec_key 172.Lenc_key128: 173 AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1 174 call _key_expansion_128 175 AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2 176 call _key_expansion_128 177 AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3 178 call _key_expansion_128 179 AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4 180 call _key_expansion_128 181 AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5 182 call _key_expansion_128 183 AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6 184 call _key_expansion_128 185 AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7 186 call _key_expansion_128 187 AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8 188 call _key_expansion_128 189 AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9 190 call _key_expansion_128 191 AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 192 call _key_expansion_128 193.Ldec_key: 194 sub $0x10, %rcx 195 movaps (%rdi), %xmm0 196 movaps (%rcx), %xmm1 197 movaps %xmm0, 240(%rcx) 198 movaps %xmm1, 240(%rdi) 199 add $0x10, %rdi 200 lea 240-16(%rcx), %rsi 201.align 4 202.Ldec_key_loop: 203 movaps (%rdi), %xmm0 204 AESIMC %xmm0 %xmm1 205 movaps %xmm1, (%rsi) 206 add $0x10, %rdi 207 sub $0x10, %rsi 208 cmp %rcx, %rdi 209 jb .Ldec_key_loop 210 xor %rax, %rax 211 ret 212 213/* 214 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) 215 */ 216ENTRY(aesni_enc) 217 movl 480(KEYP), KLEN # key length 218 movups (INP), STATE # input 219 call _aesni_enc1 220 movups STATE, (OUTP) # output 221 ret 222 223/* 224 * _aesni_enc1: internal ABI 225 * input: 226 * KEYP: key struct pointer 227 * KLEN: round count 228 * STATE: initial state (input) 229 * output: 230 * STATE: finial state (output) 231 * changed: 232 * KEY 233 * TKEYP (T1) 234 */ 235_aesni_enc1: 236 movaps (KEYP), KEY # key 237 mov KEYP, TKEYP 238 pxor KEY, STATE # round 0 239 add $0x30, TKEYP 240 cmp $24, KLEN 241 jb .Lenc128 242 lea 0x20(TKEYP), TKEYP 243 je .Lenc192 244 add $0x20, TKEYP 245 movaps -0x60(TKEYP), KEY 246 AESENC KEY STATE 247 movaps -0x50(TKEYP), KEY 248 AESENC KEY STATE 249.align 4 250.Lenc192: 251 movaps -0x40(TKEYP), KEY 252 AESENC KEY STATE 253 movaps -0x30(TKEYP), KEY 254 AESENC KEY STATE 255.align 4 256.Lenc128: 257 movaps -0x20(TKEYP), KEY 258 AESENC KEY STATE 259 movaps -0x10(TKEYP), KEY 260 AESENC KEY STATE 261 movaps (TKEYP), KEY 262 AESENC KEY STATE 263 movaps 0x10(TKEYP), KEY 264 AESENC KEY STATE 265 movaps 0x20(TKEYP), KEY 266 AESENC KEY STATE 267 movaps 0x30(TKEYP), KEY 268 AESENC KEY STATE 269 movaps 0x40(TKEYP), KEY 270 AESENC KEY STATE 271 movaps 0x50(TKEYP), KEY 272 AESENC KEY STATE 273 movaps 0x60(TKEYP), KEY 274 AESENC KEY STATE 275 movaps 0x70(TKEYP), KEY 276 AESENCLAST KEY STATE 277 ret 278 279/* 280 * _aesni_enc4: internal ABI 281 * input: 282 * KEYP: key struct pointer 283 * KLEN: round count 284 * STATE1: initial state (input) 285 * STATE2 286 * STATE3 287 * STATE4 288 * output: 289 * STATE1: finial state (output) 290 * STATE2 291 * STATE3 292 * STATE4 293 * changed: 294 * KEY 295 * TKEYP (T1) 296 */ 297_aesni_enc4: 298 movaps (KEYP), KEY # key 299 mov KEYP, TKEYP 300 pxor KEY, STATE1 # round 0 301 pxor KEY, STATE2 302 pxor KEY, STATE3 303 pxor KEY, STATE4 304 add $0x30, TKEYP 305 cmp $24, KLEN 306 jb .L4enc128 307 lea 0x20(TKEYP), TKEYP 308 je .L4enc192 309 add $0x20, TKEYP 310 movaps -0x60(TKEYP), KEY 311 AESENC KEY STATE1 312 AESENC KEY STATE2 313 AESENC KEY STATE3 314 AESENC KEY STATE4 315 movaps -0x50(TKEYP), KEY 316 AESENC KEY STATE1 317 AESENC KEY STATE2 318 AESENC KEY STATE3 319 AESENC KEY STATE4 320#.align 4 321.L4enc192: 322 movaps -0x40(TKEYP), KEY 323 AESENC KEY STATE1 324 AESENC KEY STATE2 325 AESENC KEY STATE3 326 AESENC KEY STATE4 327 movaps -0x30(TKEYP), KEY 328 AESENC KEY STATE1 329 AESENC KEY STATE2 330 AESENC KEY STATE3 331 AESENC KEY STATE4 332#.align 4 333.L4enc128: 334 movaps -0x20(TKEYP), KEY 335 AESENC KEY STATE1 336 AESENC KEY STATE2 337 AESENC KEY STATE3 338 AESENC KEY STATE4 339 movaps -0x10(TKEYP), KEY 340 AESENC KEY STATE1 341 AESENC KEY STATE2 342 AESENC KEY STATE3 343 AESENC KEY STATE4 344 movaps (TKEYP), KEY 345 AESENC KEY STATE1 346 AESENC KEY STATE2 347 AESENC KEY STATE3 348 AESENC KEY STATE4 349 movaps 0x10(TKEYP), KEY 350 AESENC KEY STATE1 351 AESENC KEY STATE2 352 AESENC KEY STATE3 353 AESENC KEY STATE4 354 movaps 0x20(TKEYP), KEY 355 AESENC KEY STATE1 356 AESENC KEY STATE2 357 AESENC KEY STATE3 358 AESENC KEY STATE4 359 movaps 0x30(TKEYP), KEY 360 AESENC KEY STATE1 361 AESENC KEY STATE2 362 AESENC KEY STATE3 363 AESENC KEY STATE4 364 movaps 0x40(TKEYP), KEY 365 AESENC KEY STATE1 366 AESENC KEY STATE2 367 AESENC KEY STATE3 368 AESENC KEY STATE4 369 movaps 0x50(TKEYP), KEY 370 AESENC KEY STATE1 371 AESENC KEY STATE2 372 AESENC KEY STATE3 373 AESENC KEY STATE4 374 movaps 0x60(TKEYP), KEY 375 AESENC KEY STATE1 376 AESENC KEY STATE2 377 AESENC KEY STATE3 378 AESENC KEY STATE4 379 movaps 0x70(TKEYP), KEY 380 AESENCLAST KEY STATE1 # last round 381 AESENCLAST KEY STATE2 382 AESENCLAST KEY STATE3 383 AESENCLAST KEY STATE4 384 ret 385 386/* 387 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) 388 */ 389ENTRY(aesni_dec) 390 mov 480(KEYP), KLEN # key length 391 add $240, KEYP 392 movups (INP), STATE # input 393 call _aesni_dec1 394 movups STATE, (OUTP) #output 395 ret 396 397/* 398 * _aesni_dec1: internal ABI 399 * input: 400 * KEYP: key struct pointer 401 * KLEN: key length 402 * STATE: initial state (input) 403 * output: 404 * STATE: finial state (output) 405 * changed: 406 * KEY 407 * TKEYP (T1) 408 */ 409_aesni_dec1: 410 movaps (KEYP), KEY # key 411 mov KEYP, TKEYP 412 pxor KEY, STATE # round 0 413 add $0x30, TKEYP 414 cmp $24, KLEN 415 jb .Ldec128 416 lea 0x20(TKEYP), TKEYP 417 je .Ldec192 418 add $0x20, TKEYP 419 movaps -0x60(TKEYP), KEY 420 AESDEC KEY STATE 421 movaps -0x50(TKEYP), KEY 422 AESDEC KEY STATE 423.align 4 424.Ldec192: 425 movaps -0x40(TKEYP), KEY 426 AESDEC KEY STATE 427 movaps -0x30(TKEYP), KEY 428 AESDEC KEY STATE 429.align 4 430.Ldec128: 431 movaps -0x20(TKEYP), KEY 432 AESDEC KEY STATE 433 movaps -0x10(TKEYP), KEY 434 AESDEC KEY STATE 435 movaps (TKEYP), KEY 436 AESDEC KEY STATE 437 movaps 0x10(TKEYP), KEY 438 AESDEC KEY STATE 439 movaps 0x20(TKEYP), KEY 440 AESDEC KEY STATE 441 movaps 0x30(TKEYP), KEY 442 AESDEC KEY STATE 443 movaps 0x40(TKEYP), KEY 444 AESDEC KEY STATE 445 movaps 0x50(TKEYP), KEY 446 AESDEC KEY STATE 447 movaps 0x60(TKEYP), KEY 448 AESDEC KEY STATE 449 movaps 0x70(TKEYP), KEY 450 AESDECLAST KEY STATE 451 ret 452 453/* 454 * _aesni_dec4: internal ABI 455 * input: 456 * KEYP: key struct pointer 457 * KLEN: key length 458 * STATE1: initial state (input) 459 * STATE2 460 * STATE3 461 * STATE4 462 * output: 463 * STATE1: finial state (output) 464 * STATE2 465 * STATE3 466 * STATE4 467 * changed: 468 * KEY 469 * TKEYP (T1) 470 */ 471_aesni_dec4: 472 movaps (KEYP), KEY # key 473 mov KEYP, TKEYP 474 pxor KEY, STATE1 # round 0 475 pxor KEY, STATE2 476 pxor KEY, STATE3 477 pxor KEY, STATE4 478 add $0x30, TKEYP 479 cmp $24, KLEN 480 jb .L4dec128 481 lea 0x20(TKEYP), TKEYP 482 je .L4dec192 483 add $0x20, TKEYP 484 movaps -0x60(TKEYP), KEY 485 AESDEC KEY STATE1 486 AESDEC KEY STATE2 487 AESDEC KEY STATE3 488 AESDEC KEY STATE4 489 movaps -0x50(TKEYP), KEY 490 AESDEC KEY STATE1 491 AESDEC KEY STATE2 492 AESDEC KEY STATE3 493 AESDEC KEY STATE4 494.align 4 495.L4dec192: 496 movaps -0x40(TKEYP), KEY 497 AESDEC KEY STATE1 498 AESDEC KEY STATE2 499 AESDEC KEY STATE3 500 AESDEC KEY STATE4 501 movaps -0x30(TKEYP), KEY 502 AESDEC KEY STATE1 503 AESDEC KEY STATE2 504 AESDEC KEY STATE3 505 AESDEC KEY STATE4 506.align 4 507.L4dec128: 508 movaps -0x20(TKEYP), KEY 509 AESDEC KEY STATE1 510 AESDEC KEY STATE2 511 AESDEC KEY STATE3 512 AESDEC KEY STATE4 513 movaps -0x10(TKEYP), KEY 514 AESDEC KEY STATE1 515 AESDEC KEY STATE2 516 AESDEC KEY STATE3 517 AESDEC KEY STATE4 518 movaps (TKEYP), KEY 519 AESDEC KEY STATE1 520 AESDEC KEY STATE2 521 AESDEC KEY STATE3 522 AESDEC KEY STATE4 523 movaps 0x10(TKEYP), KEY 524 AESDEC KEY STATE1 525 AESDEC KEY STATE2 526 AESDEC KEY STATE3 527 AESDEC KEY STATE4 528 movaps 0x20(TKEYP), KEY 529 AESDEC KEY STATE1 530 AESDEC KEY STATE2 531 AESDEC KEY STATE3 532 AESDEC KEY STATE4 533 movaps 0x30(TKEYP), KEY 534 AESDEC KEY STATE1 535 AESDEC KEY STATE2 536 AESDEC KEY STATE3 537 AESDEC KEY STATE4 538 movaps 0x40(TKEYP), KEY 539 AESDEC KEY STATE1 540 AESDEC KEY STATE2 541 AESDEC KEY STATE3 542 AESDEC KEY STATE4 543 movaps 0x50(TKEYP), KEY 544 AESDEC KEY STATE1 545 AESDEC KEY STATE2 546 AESDEC KEY STATE3 547 AESDEC KEY STATE4 548 movaps 0x60(TKEYP), KEY 549 AESDEC KEY STATE1 550 AESDEC KEY STATE2 551 AESDEC KEY STATE3 552 AESDEC KEY STATE4 553 movaps 0x70(TKEYP), KEY 554 AESDECLAST KEY STATE1 # last round 555 AESDECLAST KEY STATE2 556 AESDECLAST KEY STATE3 557 AESDECLAST KEY STATE4 558 ret 559 560/* 561 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 562 * size_t len) 563 */ 564ENTRY(aesni_ecb_enc) 565 test LEN, LEN # check length 566 jz .Lecb_enc_ret 567 mov 480(KEYP), KLEN 568 cmp $16, LEN 569 jb .Lecb_enc_ret 570 cmp $64, LEN 571 jb .Lecb_enc_loop1 572.align 4 573.Lecb_enc_loop4: 574 movups (INP), STATE1 575 movups 0x10(INP), STATE2 576 movups 0x20(INP), STATE3 577 movups 0x30(INP), STATE4 578 call _aesni_enc4 579 movups STATE1, (OUTP) 580 movups STATE2, 0x10(OUTP) 581 movups STATE3, 0x20(OUTP) 582 movups STATE4, 0x30(OUTP) 583 sub $64, LEN 584 add $64, INP 585 add $64, OUTP 586 cmp $64, LEN 587 jge .Lecb_enc_loop4 588 cmp $16, LEN 589 jb .Lecb_enc_ret 590.align 4 591.Lecb_enc_loop1: 592 movups (INP), STATE1 593 call _aesni_enc1 594 movups STATE1, (OUTP) 595 sub $16, LEN 596 add $16, INP 597 add $16, OUTP 598 cmp $16, LEN 599 jge .Lecb_enc_loop1 600.Lecb_enc_ret: 601 ret 602 603/* 604 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 605 * size_t len); 606 */ 607ENTRY(aesni_ecb_dec) 608 test LEN, LEN 609 jz .Lecb_dec_ret 610 mov 480(KEYP), KLEN 611 add $240, KEYP 612 cmp $16, LEN 613 jb .Lecb_dec_ret 614 cmp $64, LEN 615 jb .Lecb_dec_loop1 616.align 4 617.Lecb_dec_loop4: 618 movups (INP), STATE1 619 movups 0x10(INP), STATE2 620 movups 0x20(INP), STATE3 621 movups 0x30(INP), STATE4 622 call _aesni_dec4 623 movups STATE1, (OUTP) 624 movups STATE2, 0x10(OUTP) 625 movups STATE3, 0x20(OUTP) 626 movups STATE4, 0x30(OUTP) 627 sub $64, LEN 628 add $64, INP 629 add $64, OUTP 630 cmp $64, LEN 631 jge .Lecb_dec_loop4 632 cmp $16, LEN 633 jb .Lecb_dec_ret 634.align 4 635.Lecb_dec_loop1: 636 movups (INP), STATE1 637 call _aesni_dec1 638 movups STATE1, (OUTP) 639 sub $16, LEN 640 add $16, INP 641 add $16, OUTP 642 cmp $16, LEN 643 jge .Lecb_dec_loop1 644.Lecb_dec_ret: 645 ret 646 647/* 648 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 649 * size_t len, u8 *iv) 650 */ 651ENTRY(aesni_cbc_enc) 652 cmp $16, LEN 653 jb .Lcbc_enc_ret 654 mov 480(KEYP), KLEN 655 movups (IVP), STATE # load iv as initial state 656.align 4 657.Lcbc_enc_loop: 658 movups (INP), IN # load input 659 pxor IN, STATE 660 call _aesni_enc1 661 movups STATE, (OUTP) # store output 662 sub $16, LEN 663 add $16, INP 664 add $16, OUTP 665 cmp $16, LEN 666 jge .Lcbc_enc_loop 667 movups STATE, (IVP) 668.Lcbc_enc_ret: 669 ret 670 671/* 672 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 673 * size_t len, u8 *iv) 674 */ 675ENTRY(aesni_cbc_dec) 676 cmp $16, LEN 677 jb .Lcbc_dec_just_ret 678 mov 480(KEYP), KLEN 679 add $240, KEYP 680 movups (IVP), IV 681 cmp $64, LEN 682 jb .Lcbc_dec_loop1 683.align 4 684.Lcbc_dec_loop4: 685 movups (INP), IN1 686 movaps IN1, STATE1 687 movups 0x10(INP), IN2 688 movaps IN2, STATE2 689 movups 0x20(INP), IN3 690 movaps IN3, STATE3 691 movups 0x30(INP), IN4 692 movaps IN4, STATE4 693 call _aesni_dec4 694 pxor IV, STATE1 695 pxor IN1, STATE2 696 pxor IN2, STATE3 697 pxor IN3, STATE4 698 movaps IN4, IV 699 movups STATE1, (OUTP) 700 movups STATE2, 0x10(OUTP) 701 movups STATE3, 0x20(OUTP) 702 movups STATE4, 0x30(OUTP) 703 sub $64, LEN 704 add $64, INP 705 add $64, OUTP 706 cmp $64, LEN 707 jge .Lcbc_dec_loop4 708 cmp $16, LEN 709 jb .Lcbc_dec_ret 710.align 4 711.Lcbc_dec_loop1: 712 movups (INP), IN 713 movaps IN, STATE 714 call _aesni_dec1 715 pxor IV, STATE 716 movups STATE, (OUTP) 717 movaps IN, IV 718 sub $16, LEN 719 add $16, INP 720 add $16, OUTP 721 cmp $16, LEN 722 jge .Lcbc_dec_loop1 723.Lcbc_dec_ret: 724 movups IV, (IVP) 725.Lcbc_dec_just_ret: 726 ret 727