1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * AES-NI + SSE2 implementation of AEGIS-128 4 * 5 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> 6 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. 7 */ 8 9#include <linux/linkage.h> 10#include <linux/cfi_types.h> 11#include <asm/frame.h> 12 13#define STATE0 %xmm0 14#define STATE1 %xmm1 15#define STATE2 %xmm2 16#define STATE3 %xmm3 17#define STATE4 %xmm4 18#define KEY %xmm5 19#define MSG %xmm5 20#define T0 %xmm6 21#define T1 %xmm7 22 23#define STATEP %rdi 24#define LEN %esi 25#define SRC %rdx 26#define DST %rcx 27 28.section .rodata.cst16.aegis128_const, "aM", @progbits, 32 29.align 16 30.Laegis128_const_0: 31 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d 32 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 33.Laegis128_const_1: 34 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 35 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd 36 37.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16 38.align 16 39.Laegis128_counter: 40 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 41 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f 42 43.text 44 45/* 46 * aegis128_update 47 * input: 48 * STATE[0-4] - input state 49 * output: 50 * STATE[0-4] - output state (shifted positions) 51 * changed: 52 * T0 53 */ 54.macro aegis128_update 55 movdqa STATE4, T0 56 aesenc STATE0, STATE4 57 aesenc STATE1, STATE0 58 aesenc STATE2, STATE1 59 aesenc STATE3, STATE2 60 aesenc T0, STATE3 61.endm 62 63/* 64 * __load_partial: internal ABI 65 * input: 66 * LEN - bytes 67 * SRC - src 68 * output: 69 * MSG - message block 70 * changed: 71 * T0 72 * %r8 73 * %r9 74 */ 75SYM_FUNC_START_LOCAL(__load_partial) 76 xor %r9d, %r9d 77 pxor MSG, MSG 78 79 mov LEN, %r8d 80 and $0x1, %r8 81 jz .Lld_partial_1 82 83 mov LEN, %r8d 84 and $0x1E, %r8 85 add SRC, %r8 86 mov (%r8), %r9b 87 88.Lld_partial_1: 89 mov LEN, %r8d 90 and $0x2, %r8 91 jz .Lld_partial_2 92 93 mov LEN, %r8d 94 and $0x1C, %r8 95 add SRC, %r8 96 shl $0x10, %r9 97 mov (%r8), %r9w 98 99.Lld_partial_2: 100 mov LEN, %r8d 101 and $0x4, %r8 102 jz .Lld_partial_4 103 104 mov LEN, %r8d 105 and $0x18, %r8 106 add SRC, %r8 107 shl $32, %r9 108 mov (%r8), %r8d 109 xor %r8, %r9 110 111.Lld_partial_4: 112 movq %r9, MSG 113 114 mov LEN, %r8d 115 and $0x8, %r8 116 jz .Lld_partial_8 117 118 mov LEN, %r8d 119 and $0x10, %r8 120 add SRC, %r8 121 pslldq $8, MSG 122 movq (%r8), T0 123 pxor T0, MSG 124 125.Lld_partial_8: 126 RET 127SYM_FUNC_END(__load_partial) 128 129/* 130 * __store_partial: internal ABI 131 * input: 132 * LEN - bytes 133 * DST - dst 134 * output: 135 * T0 - message block 136 * changed: 137 * %r8 138 * %r9 139 * %r10 140 */ 141SYM_FUNC_START_LOCAL(__store_partial) 142 mov LEN, %r8d 143 mov DST, %r9 144 145 movq T0, %r10 146 147 cmp $8, %r8 148 jl .Lst_partial_8 149 150 mov %r10, (%r9) 151 psrldq $8, T0 152 movq T0, %r10 153 154 sub $8, %r8 155 add $8, %r9 156 157.Lst_partial_8: 158 cmp $4, %r8 159 jl .Lst_partial_4 160 161 mov %r10d, (%r9) 162 shr $32, %r10 163 164 sub $4, %r8 165 add $4, %r9 166 167.Lst_partial_4: 168 cmp $2, %r8 169 jl .Lst_partial_2 170 171 mov %r10w, (%r9) 172 shr $0x10, %r10 173 174 sub $2, %r8 175 add $2, %r9 176 177.Lst_partial_2: 178 cmp $1, %r8 179 jl .Lst_partial_1 180 181 mov %r10b, (%r9) 182 183.Lst_partial_1: 184 RET 185SYM_FUNC_END(__store_partial) 186 187/* 188 * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv); 189 */ 190SYM_FUNC_START(crypto_aegis128_aesni_init) 191 FRAME_BEGIN 192 193 /* load IV: */ 194 movdqu (%rdx), T1 195 196 /* load key: */ 197 movdqa (%rsi), KEY 198 pxor KEY, T1 199 movdqa T1, STATE0 200 movdqa KEY, STATE3 201 movdqa KEY, STATE4 202 203 /* load the constants: */ 204 movdqa .Laegis128_const_0(%rip), STATE2 205 movdqa .Laegis128_const_1(%rip), STATE1 206 pxor STATE2, STATE3 207 pxor STATE1, STATE4 208 209 /* update 10 times with KEY / KEY xor IV: */ 210 aegis128_update; pxor KEY, STATE4 211 aegis128_update; pxor T1, STATE3 212 aegis128_update; pxor KEY, STATE2 213 aegis128_update; pxor T1, STATE1 214 aegis128_update; pxor KEY, STATE0 215 aegis128_update; pxor T1, STATE4 216 aegis128_update; pxor KEY, STATE3 217 aegis128_update; pxor T1, STATE2 218 aegis128_update; pxor KEY, STATE1 219 aegis128_update; pxor T1, STATE0 220 221 /* store the state: */ 222 movdqu STATE0, 0x00(STATEP) 223 movdqu STATE1, 0x10(STATEP) 224 movdqu STATE2, 0x20(STATEP) 225 movdqu STATE3, 0x30(STATEP) 226 movdqu STATE4, 0x40(STATEP) 227 228 FRAME_END 229 RET 230SYM_FUNC_END(crypto_aegis128_aesni_init) 231 232/* 233 * void crypto_aegis128_aesni_ad(void *state, unsigned int length, 234 * const void *data); 235 */ 236SYM_FUNC_START(crypto_aegis128_aesni_ad) 237 FRAME_BEGIN 238 239 cmp $0x10, LEN 240 jb .Lad_out 241 242 /* load the state: */ 243 movdqu 0x00(STATEP), STATE0 244 movdqu 0x10(STATEP), STATE1 245 movdqu 0x20(STATEP), STATE2 246 movdqu 0x30(STATEP), STATE3 247 movdqu 0x40(STATEP), STATE4 248 249 mov SRC, %r8 250 and $0xF, %r8 251 jnz .Lad_u_loop 252 253.align 8 254.Lad_a_loop: 255 movdqa 0x00(SRC), MSG 256 aegis128_update 257 pxor MSG, STATE4 258 sub $0x10, LEN 259 cmp $0x10, LEN 260 jl .Lad_out_1 261 262 movdqa 0x10(SRC), MSG 263 aegis128_update 264 pxor MSG, STATE3 265 sub $0x10, LEN 266 cmp $0x10, LEN 267 jl .Lad_out_2 268 269 movdqa 0x20(SRC), MSG 270 aegis128_update 271 pxor MSG, STATE2 272 sub $0x10, LEN 273 cmp $0x10, LEN 274 jl .Lad_out_3 275 276 movdqa 0x30(SRC), MSG 277 aegis128_update 278 pxor MSG, STATE1 279 sub $0x10, LEN 280 cmp $0x10, LEN 281 jl .Lad_out_4 282 283 movdqa 0x40(SRC), MSG 284 aegis128_update 285 pxor MSG, STATE0 286 sub $0x10, LEN 287 cmp $0x10, LEN 288 jl .Lad_out_0 289 290 add $0x50, SRC 291 jmp .Lad_a_loop 292 293.align 8 294.Lad_u_loop: 295 movdqu 0x00(SRC), MSG 296 aegis128_update 297 pxor MSG, STATE4 298 sub $0x10, LEN 299 cmp $0x10, LEN 300 jl .Lad_out_1 301 302 movdqu 0x10(SRC), MSG 303 aegis128_update 304 pxor MSG, STATE3 305 sub $0x10, LEN 306 cmp $0x10, LEN 307 jl .Lad_out_2 308 309 movdqu 0x20(SRC), MSG 310 aegis128_update 311 pxor MSG, STATE2 312 sub $0x10, LEN 313 cmp $0x10, LEN 314 jl .Lad_out_3 315 316 movdqu 0x30(SRC), MSG 317 aegis128_update 318 pxor MSG, STATE1 319 sub $0x10, LEN 320 cmp $0x10, LEN 321 jl .Lad_out_4 322 323 movdqu 0x40(SRC), MSG 324 aegis128_update 325 pxor MSG, STATE0 326 sub $0x10, LEN 327 cmp $0x10, LEN 328 jl .Lad_out_0 329 330 add $0x50, SRC 331 jmp .Lad_u_loop 332 333 /* store the state: */ 334.Lad_out_0: 335 movdqu STATE0, 0x00(STATEP) 336 movdqu STATE1, 0x10(STATEP) 337 movdqu STATE2, 0x20(STATEP) 338 movdqu STATE3, 0x30(STATEP) 339 movdqu STATE4, 0x40(STATEP) 340 FRAME_END 341 RET 342 343.Lad_out_1: 344 movdqu STATE4, 0x00(STATEP) 345 movdqu STATE0, 0x10(STATEP) 346 movdqu STATE1, 0x20(STATEP) 347 movdqu STATE2, 0x30(STATEP) 348 movdqu STATE3, 0x40(STATEP) 349 FRAME_END 350 RET 351 352.Lad_out_2: 353 movdqu STATE3, 0x00(STATEP) 354 movdqu STATE4, 0x10(STATEP) 355 movdqu STATE0, 0x20(STATEP) 356 movdqu STATE1, 0x30(STATEP) 357 movdqu STATE2, 0x40(STATEP) 358 FRAME_END 359 RET 360 361.Lad_out_3: 362 movdqu STATE2, 0x00(STATEP) 363 movdqu STATE3, 0x10(STATEP) 364 movdqu STATE4, 0x20(STATEP) 365 movdqu STATE0, 0x30(STATEP) 366 movdqu STATE1, 0x40(STATEP) 367 FRAME_END 368 RET 369 370.Lad_out_4: 371 movdqu STATE1, 0x00(STATEP) 372 movdqu STATE2, 0x10(STATEP) 373 movdqu STATE3, 0x20(STATEP) 374 movdqu STATE4, 0x30(STATEP) 375 movdqu STATE0, 0x40(STATEP) 376 FRAME_END 377 RET 378 379.Lad_out: 380 FRAME_END 381 RET 382SYM_FUNC_END(crypto_aegis128_aesni_ad) 383 384.macro encrypt_block a s0 s1 s2 s3 s4 i 385 movdq\a (\i * 0x10)(SRC), MSG 386 movdqa MSG, T0 387 pxor \s1, T0 388 pxor \s4, T0 389 movdqa \s2, T1 390 pand \s3, T1 391 pxor T1, T0 392 movdq\a T0, (\i * 0x10)(DST) 393 394 aegis128_update 395 pxor MSG, \s4 396 397 sub $0x10, LEN 398 cmp $0x10, LEN 399 jl .Lenc_out_\i 400.endm 401 402/* 403 * void crypto_aegis128_aesni_enc(void *state, unsigned int length, 404 * const void *src, void *dst); 405 */ 406SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc) 407 FRAME_BEGIN 408 409 cmp $0x10, LEN 410 jb .Lenc_out 411 412 /* load the state: */ 413 movdqu 0x00(STATEP), STATE0 414 movdqu 0x10(STATEP), STATE1 415 movdqu 0x20(STATEP), STATE2 416 movdqu 0x30(STATEP), STATE3 417 movdqu 0x40(STATEP), STATE4 418 419 mov SRC, %r8 420 or DST, %r8 421 and $0xF, %r8 422 jnz .Lenc_u_loop 423 424.align 8 425.Lenc_a_loop: 426 encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 427 encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 428 encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 429 encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 430 encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 431 432 add $0x50, SRC 433 add $0x50, DST 434 jmp .Lenc_a_loop 435 436.align 8 437.Lenc_u_loop: 438 encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 439 encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 440 encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 441 encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 442 encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 443 444 add $0x50, SRC 445 add $0x50, DST 446 jmp .Lenc_u_loop 447 448 /* store the state: */ 449.Lenc_out_0: 450 movdqu STATE4, 0x00(STATEP) 451 movdqu STATE0, 0x10(STATEP) 452 movdqu STATE1, 0x20(STATEP) 453 movdqu STATE2, 0x30(STATEP) 454 movdqu STATE3, 0x40(STATEP) 455 FRAME_END 456 RET 457 458.Lenc_out_1: 459 movdqu STATE3, 0x00(STATEP) 460 movdqu STATE4, 0x10(STATEP) 461 movdqu STATE0, 0x20(STATEP) 462 movdqu STATE1, 0x30(STATEP) 463 movdqu STATE2, 0x40(STATEP) 464 FRAME_END 465 RET 466 467.Lenc_out_2: 468 movdqu STATE2, 0x00(STATEP) 469 movdqu STATE3, 0x10(STATEP) 470 movdqu STATE4, 0x20(STATEP) 471 movdqu STATE0, 0x30(STATEP) 472 movdqu STATE1, 0x40(STATEP) 473 FRAME_END 474 RET 475 476.Lenc_out_3: 477 movdqu STATE1, 0x00(STATEP) 478 movdqu STATE2, 0x10(STATEP) 479 movdqu STATE3, 0x20(STATEP) 480 movdqu STATE4, 0x30(STATEP) 481 movdqu STATE0, 0x40(STATEP) 482 FRAME_END 483 RET 484 485.Lenc_out_4: 486 movdqu STATE0, 0x00(STATEP) 487 movdqu STATE1, 0x10(STATEP) 488 movdqu STATE2, 0x20(STATEP) 489 movdqu STATE3, 0x30(STATEP) 490 movdqu STATE4, 0x40(STATEP) 491 FRAME_END 492 RET 493 494.Lenc_out: 495 FRAME_END 496 RET 497SYM_FUNC_END(crypto_aegis128_aesni_enc) 498 499/* 500 * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length, 501 * const void *src, void *dst); 502 */ 503SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail) 504 FRAME_BEGIN 505 506 /* load the state: */ 507 movdqu 0x00(STATEP), STATE0 508 movdqu 0x10(STATEP), STATE1 509 movdqu 0x20(STATEP), STATE2 510 movdqu 0x30(STATEP), STATE3 511 movdqu 0x40(STATEP), STATE4 512 513 /* encrypt message: */ 514 call __load_partial 515 516 movdqa MSG, T0 517 pxor STATE1, T0 518 pxor STATE4, T0 519 movdqa STATE2, T1 520 pand STATE3, T1 521 pxor T1, T0 522 523 call __store_partial 524 525 aegis128_update 526 pxor MSG, STATE4 527 528 /* store the state: */ 529 movdqu STATE4, 0x00(STATEP) 530 movdqu STATE0, 0x10(STATEP) 531 movdqu STATE1, 0x20(STATEP) 532 movdqu STATE2, 0x30(STATEP) 533 movdqu STATE3, 0x40(STATEP) 534 535 FRAME_END 536 RET 537SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) 538 539.macro decrypt_block a s0 s1 s2 s3 s4 i 540 movdq\a (\i * 0x10)(SRC), MSG 541 pxor \s1, MSG 542 pxor \s4, MSG 543 movdqa \s2, T1 544 pand \s3, T1 545 pxor T1, MSG 546 movdq\a MSG, (\i * 0x10)(DST) 547 548 aegis128_update 549 pxor MSG, \s4 550 551 sub $0x10, LEN 552 cmp $0x10, LEN 553 jl .Ldec_out_\i 554.endm 555 556/* 557 * void crypto_aegis128_aesni_dec(void *state, unsigned int length, 558 * const void *src, void *dst); 559 */ 560SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec) 561 FRAME_BEGIN 562 563 cmp $0x10, LEN 564 jb .Ldec_out 565 566 /* load the state: */ 567 movdqu 0x00(STATEP), STATE0 568 movdqu 0x10(STATEP), STATE1 569 movdqu 0x20(STATEP), STATE2 570 movdqu 0x30(STATEP), STATE3 571 movdqu 0x40(STATEP), STATE4 572 573 mov SRC, %r8 574 or DST, %r8 575 and $0xF, %r8 576 jnz .Ldec_u_loop 577 578.align 8 579.Ldec_a_loop: 580 decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 581 decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 582 decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 583 decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 584 decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 585 586 add $0x50, SRC 587 add $0x50, DST 588 jmp .Ldec_a_loop 589 590.align 8 591.Ldec_u_loop: 592 decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 593 decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 594 decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 595 decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 596 decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 597 598 add $0x50, SRC 599 add $0x50, DST 600 jmp .Ldec_u_loop 601 602 /* store the state: */ 603.Ldec_out_0: 604 movdqu STATE4, 0x00(STATEP) 605 movdqu STATE0, 0x10(STATEP) 606 movdqu STATE1, 0x20(STATEP) 607 movdqu STATE2, 0x30(STATEP) 608 movdqu STATE3, 0x40(STATEP) 609 FRAME_END 610 RET 611 612.Ldec_out_1: 613 movdqu STATE3, 0x00(STATEP) 614 movdqu STATE4, 0x10(STATEP) 615 movdqu STATE0, 0x20(STATEP) 616 movdqu STATE1, 0x30(STATEP) 617 movdqu STATE2, 0x40(STATEP) 618 FRAME_END 619 RET 620 621.Ldec_out_2: 622 movdqu STATE2, 0x00(STATEP) 623 movdqu STATE3, 0x10(STATEP) 624 movdqu STATE4, 0x20(STATEP) 625 movdqu STATE0, 0x30(STATEP) 626 movdqu STATE1, 0x40(STATEP) 627 FRAME_END 628 RET 629 630.Ldec_out_3: 631 movdqu STATE1, 0x00(STATEP) 632 movdqu STATE2, 0x10(STATEP) 633 movdqu STATE3, 0x20(STATEP) 634 movdqu STATE4, 0x30(STATEP) 635 movdqu STATE0, 0x40(STATEP) 636 FRAME_END 637 RET 638 639.Ldec_out_4: 640 movdqu STATE0, 0x00(STATEP) 641 movdqu STATE1, 0x10(STATEP) 642 movdqu STATE2, 0x20(STATEP) 643 movdqu STATE3, 0x30(STATEP) 644 movdqu STATE4, 0x40(STATEP) 645 FRAME_END 646 RET 647 648.Ldec_out: 649 FRAME_END 650 RET 651SYM_FUNC_END(crypto_aegis128_aesni_dec) 652 653/* 654 * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length, 655 * const void *src, void *dst); 656 */ 657SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) 658 FRAME_BEGIN 659 660 /* load the state: */ 661 movdqu 0x00(STATEP), STATE0 662 movdqu 0x10(STATEP), STATE1 663 movdqu 0x20(STATEP), STATE2 664 movdqu 0x30(STATEP), STATE3 665 movdqu 0x40(STATEP), STATE4 666 667 /* decrypt message: */ 668 call __load_partial 669 670 pxor STATE1, MSG 671 pxor STATE4, MSG 672 movdqa STATE2, T1 673 pand STATE3, T1 674 pxor T1, MSG 675 676 movdqa MSG, T0 677 call __store_partial 678 679 /* mask with byte count: */ 680 movd LEN, T0 681 punpcklbw T0, T0 682 punpcklbw T0, T0 683 punpcklbw T0, T0 684 punpcklbw T0, T0 685 movdqa .Laegis128_counter(%rip), T1 686 pcmpgtb T1, T0 687 pand T0, MSG 688 689 aegis128_update 690 pxor MSG, STATE4 691 692 /* store the state: */ 693 movdqu STATE4, 0x00(STATEP) 694 movdqu STATE0, 0x10(STATEP) 695 movdqu STATE1, 0x20(STATEP) 696 movdqu STATE2, 0x30(STATEP) 697 movdqu STATE3, 0x40(STATEP) 698 699 FRAME_END 700 RET 701SYM_FUNC_END(crypto_aegis128_aesni_dec_tail) 702 703/* 704 * void crypto_aegis128_aesni_final(void *state, void *tag_xor, 705 * unsigned int assoclen, 706 * unsigned int cryptlen); 707 */ 708SYM_FUNC_START(crypto_aegis128_aesni_final) 709 FRAME_BEGIN 710 711 /* load the state: */ 712 movdqu 0x00(STATEP), STATE0 713 movdqu 0x10(STATEP), STATE1 714 movdqu 0x20(STATEP), STATE2 715 movdqu 0x30(STATEP), STATE3 716 movdqu 0x40(STATEP), STATE4 717 718 /* prepare length block: */ 719 movd %edx, MSG 720 movd %ecx, T0 721 pslldq $8, T0 722 pxor T0, MSG 723 psllq $3, MSG /* multiply by 8 (to get bit count) */ 724 725 pxor STATE3, MSG 726 727 /* update state: */ 728 aegis128_update; pxor MSG, STATE4 729 aegis128_update; pxor MSG, STATE3 730 aegis128_update; pxor MSG, STATE2 731 aegis128_update; pxor MSG, STATE1 732 aegis128_update; pxor MSG, STATE0 733 aegis128_update; pxor MSG, STATE4 734 aegis128_update; pxor MSG, STATE3 735 736 /* xor tag: */ 737 movdqu (%rsi), MSG 738 739 pxor STATE0, MSG 740 pxor STATE1, MSG 741 pxor STATE2, MSG 742 pxor STATE3, MSG 743 pxor STATE4, MSG 744 745 movdqu MSG, (%rsi) 746 747 FRAME_END 748 RET 749SYM_FUNC_END(crypto_aegis128_aesni_final) 750