1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES 4 * 5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 6 */ 7 8/* included by aes-ce.S and aes-neon.S */ 9 10 .text 11 .align 4 12 13#ifndef MAX_STRIDE 14#define MAX_STRIDE 4 15#endif 16 17#if MAX_STRIDE == 4 18#define ST4(x...) x 19#define ST5(x...) 20#else 21#define ST4(x...) 22#define ST5(x...) x 23#endif 24 25aes_encrypt_block4x: 26 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 27 ret 28ENDPROC(aes_encrypt_block4x) 29 30aes_decrypt_block4x: 31 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 32 ret 33ENDPROC(aes_decrypt_block4x) 34 35#if MAX_STRIDE == 5 36aes_encrypt_block5x: 37 encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 38 ret 39ENDPROC(aes_encrypt_block5x) 40 41aes_decrypt_block5x: 42 decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 43 ret 44ENDPROC(aes_decrypt_block5x) 45#endif 46 47 /* 48 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 49 * int blocks) 50 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 51 * int blocks) 52 */ 53 54AES_ENTRY(aes_ecb_encrypt) 55 stp x29, x30, [sp, #-16]! 56 mov x29, sp 57 58 enc_prepare w3, x2, x5 59 60.LecbencloopNx: 61 subs w4, w4, #MAX_STRIDE 62 bmi .Lecbenc1x 63 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 64ST4( bl aes_encrypt_block4x ) 65ST5( ld1 {v4.16b}, [x1], #16 ) 66ST5( bl aes_encrypt_block5x ) 67 st1 {v0.16b-v3.16b}, [x0], #64 68ST5( st1 {v4.16b}, [x0], #16 ) 69 b .LecbencloopNx 70.Lecbenc1x: 71 adds w4, w4, #MAX_STRIDE 72 beq .Lecbencout 73.Lecbencloop: 74 ld1 {v0.16b}, [x1], #16 /* get next pt block */ 75 encrypt_block v0, w3, x2, x5, w6 76 st1 {v0.16b}, [x0], #16 77 subs w4, w4, #1 78 bne .Lecbencloop 79.Lecbencout: 80 ldp x29, x30, [sp], #16 81 ret 82AES_ENDPROC(aes_ecb_encrypt) 83 84 85AES_ENTRY(aes_ecb_decrypt) 86 stp x29, x30, [sp, #-16]! 87 mov x29, sp 88 89 dec_prepare w3, x2, x5 90 91.LecbdecloopNx: 92 subs w4, w4, #MAX_STRIDE 93 bmi .Lecbdec1x 94 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 95ST4( bl aes_decrypt_block4x ) 96ST5( ld1 {v4.16b}, [x1], #16 ) 97ST5( bl aes_decrypt_block5x ) 98 st1 {v0.16b-v3.16b}, [x0], #64 99ST5( st1 {v4.16b}, [x0], #16 ) 100 b .LecbdecloopNx 101.Lecbdec1x: 102 adds w4, w4, #MAX_STRIDE 103 beq .Lecbdecout 104.Lecbdecloop: 105 ld1 {v0.16b}, [x1], #16 /* get next ct block */ 106 decrypt_block v0, w3, x2, x5, w6 107 st1 {v0.16b}, [x0], #16 108 subs w4, w4, #1 109 bne .Lecbdecloop 110.Lecbdecout: 111 ldp x29, x30, [sp], #16 112 ret 113AES_ENDPROC(aes_ecb_decrypt) 114 115 116 /* 117 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 118 * int blocks, u8 iv[]) 119 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 120 * int blocks, u8 iv[]) 121 */ 122 123AES_ENTRY(aes_cbc_encrypt) 124 ld1 {v4.16b}, [x5] /* get iv */ 125 enc_prepare w3, x2, x6 126 127.Lcbcencloop4x: 128 subs w4, w4, #4 129 bmi .Lcbcenc1x 130 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 131 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ 132 encrypt_block v0, w3, x2, x6, w7 133 eor v1.16b, v1.16b, v0.16b 134 encrypt_block v1, w3, x2, x6, w7 135 eor v2.16b, v2.16b, v1.16b 136 encrypt_block v2, w3, x2, x6, w7 137 eor v3.16b, v3.16b, v2.16b 138 encrypt_block v3, w3, x2, x6, w7 139 st1 {v0.16b-v3.16b}, [x0], #64 140 mov v4.16b, v3.16b 141 b .Lcbcencloop4x 142.Lcbcenc1x: 143 adds w4, w4, #4 144 beq .Lcbcencout 145.Lcbcencloop: 146 ld1 {v0.16b}, [x1], #16 /* get next pt block */ 147 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ 148 encrypt_block v4, w3, x2, x6, w7 149 st1 {v4.16b}, [x0], #16 150 subs w4, w4, #1 151 bne .Lcbcencloop 152.Lcbcencout: 153 st1 {v4.16b}, [x5] /* return iv */ 154 ret 155AES_ENDPROC(aes_cbc_encrypt) 156 157 158AES_ENTRY(aes_cbc_decrypt) 159 stp x29, x30, [sp, #-16]! 160 mov x29, sp 161 162 ld1 {cbciv.16b}, [x5] /* get iv */ 163 dec_prepare w3, x2, x6 164 165.LcbcdecloopNx: 166 subs w4, w4, #MAX_STRIDE 167 bmi .Lcbcdec1x 168 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 169#if MAX_STRIDE == 5 170 ld1 {v4.16b}, [x1], #16 /* get 1 ct block */ 171 mov v5.16b, v0.16b 172 mov v6.16b, v1.16b 173 mov v7.16b, v2.16b 174 bl aes_decrypt_block5x 175 sub x1, x1, #32 176 eor v0.16b, v0.16b, cbciv.16b 177 eor v1.16b, v1.16b, v5.16b 178 ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */ 179 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ 180 eor v2.16b, v2.16b, v6.16b 181 eor v3.16b, v3.16b, v7.16b 182 eor v4.16b, v4.16b, v5.16b 183#else 184 mov v4.16b, v0.16b 185 mov v5.16b, v1.16b 186 mov v6.16b, v2.16b 187 bl aes_decrypt_block4x 188 sub x1, x1, #16 189 eor v0.16b, v0.16b, cbciv.16b 190 eor v1.16b, v1.16b, v4.16b 191 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ 192 eor v2.16b, v2.16b, v5.16b 193 eor v3.16b, v3.16b, v6.16b 194#endif 195 st1 {v0.16b-v3.16b}, [x0], #64 196ST5( st1 {v4.16b}, [x0], #16 ) 197 b .LcbcdecloopNx 198.Lcbcdec1x: 199 adds w4, w4, #MAX_STRIDE 200 beq .Lcbcdecout 201.Lcbcdecloop: 202 ld1 {v1.16b}, [x1], #16 /* get next ct block */ 203 mov v0.16b, v1.16b /* ...and copy to v0 */ 204 decrypt_block v0, w3, x2, x6, w7 205 eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */ 206 mov cbciv.16b, v1.16b /* ct is next iv */ 207 st1 {v0.16b}, [x0], #16 208 subs w4, w4, #1 209 bne .Lcbcdecloop 210.Lcbcdecout: 211 st1 {cbciv.16b}, [x5] /* return iv */ 212 ldp x29, x30, [sp], #16 213 ret 214AES_ENDPROC(aes_cbc_decrypt) 215 216 217 /* 218 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[], 219 * int rounds, int bytes, u8 const iv[]) 220 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], 221 * int rounds, int bytes, u8 const iv[]) 222 */ 223 224AES_ENTRY(aes_cbc_cts_encrypt) 225 adr_l x8, .Lcts_permute_table 226 sub x4, x4, #16 227 add x9, x8, #32 228 add x8, x8, x4 229 sub x9, x9, x4 230 ld1 {v3.16b}, [x8] 231 ld1 {v4.16b}, [x9] 232 233 ld1 {v0.16b}, [x1], x4 /* overlapping loads */ 234 ld1 {v1.16b}, [x1] 235 236 ld1 {v5.16b}, [x5] /* get iv */ 237 enc_prepare w3, x2, x6 238 239 eor v0.16b, v0.16b, v5.16b /* xor with iv */ 240 tbl v1.16b, {v1.16b}, v4.16b 241 encrypt_block v0, w3, x2, x6, w7 242 243 eor v1.16b, v1.16b, v0.16b 244 tbl v0.16b, {v0.16b}, v3.16b 245 encrypt_block v1, w3, x2, x6, w7 246 247 add x4, x0, x4 248 st1 {v0.16b}, [x4] /* overlapping stores */ 249 st1 {v1.16b}, [x0] 250 ret 251AES_ENDPROC(aes_cbc_cts_encrypt) 252 253AES_ENTRY(aes_cbc_cts_decrypt) 254 adr_l x8, .Lcts_permute_table 255 sub x4, x4, #16 256 add x9, x8, #32 257 add x8, x8, x4 258 sub x9, x9, x4 259 ld1 {v3.16b}, [x8] 260 ld1 {v4.16b}, [x9] 261 262 ld1 {v0.16b}, [x1], x4 /* overlapping loads */ 263 ld1 {v1.16b}, [x1] 264 265 ld1 {v5.16b}, [x5] /* get iv */ 266 dec_prepare w3, x2, x6 267 268 tbl v2.16b, {v1.16b}, v4.16b 269 decrypt_block v0, w3, x2, x6, w7 270 eor v2.16b, v2.16b, v0.16b 271 272 tbx v0.16b, {v1.16b}, v4.16b 273 tbl v2.16b, {v2.16b}, v3.16b 274 decrypt_block v0, w3, x2, x6, w7 275 eor v0.16b, v0.16b, v5.16b /* xor with iv */ 276 277 add x4, x0, x4 278 st1 {v2.16b}, [x4] /* overlapping stores */ 279 st1 {v0.16b}, [x0] 280 ret 281AES_ENDPROC(aes_cbc_cts_decrypt) 282 283 .section ".rodata", "a" 284 .align 6 285.Lcts_permute_table: 286 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 287 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 288 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 289 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 290 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 291 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 292 .previous 293 294 295 /* 296 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 297 * int blocks, u8 ctr[]) 298 */ 299 300AES_ENTRY(aes_ctr_encrypt) 301 stp x29, x30, [sp, #-16]! 302 mov x29, sp 303 304 enc_prepare w3, x2, x6 305 ld1 {vctr.16b}, [x5] 306 307 umov x6, vctr.d[1] /* keep swabbed ctr in reg */ 308 rev x6, x6 309 cmn w6, w4 /* 32 bit overflow? */ 310 bcs .Lctrloop 311.LctrloopNx: 312 subs w4, w4, #MAX_STRIDE 313 bmi .Lctr1x 314 add w7, w6, #1 315 mov v0.16b, vctr.16b 316 add w8, w6, #2 317 mov v1.16b, vctr.16b 318 add w9, w6, #3 319 mov v2.16b, vctr.16b 320 add w9, w6, #3 321 rev w7, w7 322 mov v3.16b, vctr.16b 323 rev w8, w8 324ST5( mov v4.16b, vctr.16b ) 325 mov v1.s[3], w7 326 rev w9, w9 327ST5( add w10, w6, #4 ) 328 mov v2.s[3], w8 329ST5( rev w10, w10 ) 330 mov v3.s[3], w9 331ST5( mov v4.s[3], w10 ) 332 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ 333ST4( bl aes_encrypt_block4x ) 334ST5( bl aes_encrypt_block5x ) 335 eor v0.16b, v5.16b, v0.16b 336ST4( ld1 {v5.16b}, [x1], #16 ) 337 eor v1.16b, v6.16b, v1.16b 338ST5( ld1 {v5.16b-v6.16b}, [x1], #32 ) 339 eor v2.16b, v7.16b, v2.16b 340 eor v3.16b, v5.16b, v3.16b 341ST5( eor v4.16b, v6.16b, v4.16b ) 342 st1 {v0.16b-v3.16b}, [x0], #64 343ST5( st1 {v4.16b}, [x0], #16 ) 344 add x6, x6, #MAX_STRIDE 345 rev x7, x6 346 ins vctr.d[1], x7 347 cbz w4, .Lctrout 348 b .LctrloopNx 349.Lctr1x: 350 adds w4, w4, #MAX_STRIDE 351 beq .Lctrout 352.Lctrloop: 353 mov v0.16b, vctr.16b 354 encrypt_block v0, w3, x2, x8, w7 355 356 adds x6, x6, #1 /* increment BE ctr */ 357 rev x7, x6 358 ins vctr.d[1], x7 359 bcs .Lctrcarry /* overflow? */ 360 361.Lctrcarrydone: 362 subs w4, w4, #1 363 bmi .Lctrtailblock /* blocks <0 means tail block */ 364 ld1 {v3.16b}, [x1], #16 365 eor v3.16b, v0.16b, v3.16b 366 st1 {v3.16b}, [x0], #16 367 bne .Lctrloop 368 369.Lctrout: 370 st1 {vctr.16b}, [x5] /* return next CTR value */ 371 ldp x29, x30, [sp], #16 372 ret 373 374.Lctrtailblock: 375 st1 {v0.16b}, [x0] 376 b .Lctrout 377 378.Lctrcarry: 379 umov x7, vctr.d[0] /* load upper word of ctr */ 380 rev x7, x7 /* ... to handle the carry */ 381 add x7, x7, #1 382 rev x7, x7 383 ins vctr.d[0], x7 384 b .Lctrcarrydone 385AES_ENDPROC(aes_ctr_encrypt) 386 387 388 /* 389 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 390 * int blocks, u8 const rk2[], u8 iv[], int first) 391 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 392 * int blocks, u8 const rk2[], u8 iv[], int first) 393 */ 394 395 .macro next_tweak, out, in, tmp 396 sshr \tmp\().2d, \in\().2d, #63 397 and \tmp\().16b, \tmp\().16b, xtsmask.16b 398 add \out\().2d, \in\().2d, \in\().2d 399 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 400 eor \out\().16b, \out\().16b, \tmp\().16b 401 .endm 402 403 .macro xts_load_mask, tmp 404 movi xtsmask.2s, #0x1 405 movi \tmp\().2s, #0x87 406 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s 407 .endm 408 409AES_ENTRY(aes_xts_encrypt) 410 stp x29, x30, [sp, #-16]! 411 mov x29, sp 412 413 ld1 {v4.16b}, [x6] 414 xts_load_mask v8 415 cbz w7, .Lxtsencnotfirst 416 417 enc_prepare w3, x5, x8 418 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 419 enc_switch_key w3, x2, x8 420 b .LxtsencNx 421 422.Lxtsencnotfirst: 423 enc_prepare w3, x2, x8 424.LxtsencloopNx: 425 next_tweak v4, v4, v8 426.LxtsencNx: 427 subs w4, w4, #4 428 bmi .Lxtsenc1x 429 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 430 next_tweak v5, v4, v8 431 eor v0.16b, v0.16b, v4.16b 432 next_tweak v6, v5, v8 433 eor v1.16b, v1.16b, v5.16b 434 eor v2.16b, v2.16b, v6.16b 435 next_tweak v7, v6, v8 436 eor v3.16b, v3.16b, v7.16b 437 bl aes_encrypt_block4x 438 eor v3.16b, v3.16b, v7.16b 439 eor v0.16b, v0.16b, v4.16b 440 eor v1.16b, v1.16b, v5.16b 441 eor v2.16b, v2.16b, v6.16b 442 st1 {v0.16b-v3.16b}, [x0], #64 443 mov v4.16b, v7.16b 444 cbz w4, .Lxtsencout 445 xts_reload_mask v8 446 b .LxtsencloopNx 447.Lxtsenc1x: 448 adds w4, w4, #4 449 beq .Lxtsencout 450.Lxtsencloop: 451 ld1 {v1.16b}, [x1], #16 452 eor v0.16b, v1.16b, v4.16b 453 encrypt_block v0, w3, x2, x8, w7 454 eor v0.16b, v0.16b, v4.16b 455 st1 {v0.16b}, [x0], #16 456 subs w4, w4, #1 457 beq .Lxtsencout 458 next_tweak v4, v4, v8 459 b .Lxtsencloop 460.Lxtsencout: 461 st1 {v4.16b}, [x6] 462 ldp x29, x30, [sp], #16 463 ret 464AES_ENDPROC(aes_xts_encrypt) 465 466 467AES_ENTRY(aes_xts_decrypt) 468 stp x29, x30, [sp, #-16]! 469 mov x29, sp 470 471 ld1 {v4.16b}, [x6] 472 xts_load_mask v8 473 cbz w7, .Lxtsdecnotfirst 474 475 enc_prepare w3, x5, x8 476 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 477 dec_prepare w3, x2, x8 478 b .LxtsdecNx 479 480.Lxtsdecnotfirst: 481 dec_prepare w3, x2, x8 482.LxtsdecloopNx: 483 next_tweak v4, v4, v8 484.LxtsdecNx: 485 subs w4, w4, #4 486 bmi .Lxtsdec1x 487 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 488 next_tweak v5, v4, v8 489 eor v0.16b, v0.16b, v4.16b 490 next_tweak v6, v5, v8 491 eor v1.16b, v1.16b, v5.16b 492 eor v2.16b, v2.16b, v6.16b 493 next_tweak v7, v6, v8 494 eor v3.16b, v3.16b, v7.16b 495 bl aes_decrypt_block4x 496 eor v3.16b, v3.16b, v7.16b 497 eor v0.16b, v0.16b, v4.16b 498 eor v1.16b, v1.16b, v5.16b 499 eor v2.16b, v2.16b, v6.16b 500 st1 {v0.16b-v3.16b}, [x0], #64 501 mov v4.16b, v7.16b 502 cbz w4, .Lxtsdecout 503 xts_reload_mask v8 504 b .LxtsdecloopNx 505.Lxtsdec1x: 506 adds w4, w4, #4 507 beq .Lxtsdecout 508.Lxtsdecloop: 509 ld1 {v1.16b}, [x1], #16 510 eor v0.16b, v1.16b, v4.16b 511 decrypt_block v0, w3, x2, x8, w7 512 eor v0.16b, v0.16b, v4.16b 513 st1 {v0.16b}, [x0], #16 514 subs w4, w4, #1 515 beq .Lxtsdecout 516 next_tweak v4, v4, v8 517 b .Lxtsdecloop 518.Lxtsdecout: 519 st1 {v4.16b}, [x6] 520 ldp x29, x30, [sp], #16 521 ret 522AES_ENDPROC(aes_xts_decrypt) 523 524 /* 525 * aes_mac_update(u8 const in[], u32 const rk[], int rounds, 526 * int blocks, u8 dg[], int enc_before, int enc_after) 527 */ 528AES_ENTRY(aes_mac_update) 529 frame_push 6 530 531 mov x19, x0 532 mov x20, x1 533 mov x21, x2 534 mov x22, x3 535 mov x23, x4 536 mov x24, x6 537 538 ld1 {v0.16b}, [x23] /* get dg */ 539 enc_prepare w2, x1, x7 540 cbz w5, .Lmacloop4x 541 542 encrypt_block v0, w2, x1, x7, w8 543 544.Lmacloop4x: 545 subs w22, w22, #4 546 bmi .Lmac1x 547 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */ 548 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 549 encrypt_block v0, w21, x20, x7, w8 550 eor v0.16b, v0.16b, v2.16b 551 encrypt_block v0, w21, x20, x7, w8 552 eor v0.16b, v0.16b, v3.16b 553 encrypt_block v0, w21, x20, x7, w8 554 eor v0.16b, v0.16b, v4.16b 555 cmp w22, wzr 556 csinv x5, x24, xzr, eq 557 cbz w5, .Lmacout 558 encrypt_block v0, w21, x20, x7, w8 559 st1 {v0.16b}, [x23] /* return dg */ 560 cond_yield_neon .Lmacrestart 561 b .Lmacloop4x 562.Lmac1x: 563 add w22, w22, #4 564.Lmacloop: 565 cbz w22, .Lmacout 566 ld1 {v1.16b}, [x19], #16 /* get next pt block */ 567 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 568 569 subs w22, w22, #1 570 csinv x5, x24, xzr, eq 571 cbz w5, .Lmacout 572 573.Lmacenc: 574 encrypt_block v0, w21, x20, x7, w8 575 b .Lmacloop 576 577.Lmacout: 578 st1 {v0.16b}, [x23] /* return dg */ 579 frame_pop 580 ret 581 582.Lmacrestart: 583 ld1 {v0.16b}, [x23] /* get dg */ 584 enc_prepare w21, x20, x0 585 b .Lmacloop4x 586AES_ENDPROC(aes_mac_update) 587