1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES 4 * 5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 6 */ 7 8/* included by aes-ce.S and aes-neon.S */ 9 10 .text 11 .align 4 12 13#ifndef MAX_STRIDE 14#define MAX_STRIDE 4 15#endif 16 17#if MAX_STRIDE == 4 18#define ST4(x...) x 19#define ST5(x...) 20#else 21#define ST4(x...) 22#define ST5(x...) x 23#endif 24 25SYM_FUNC_START_LOCAL(aes_encrypt_block4x) 26 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 27 ret 28SYM_FUNC_END(aes_encrypt_block4x) 29 30SYM_FUNC_START_LOCAL(aes_decrypt_block4x) 31 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 32 ret 33SYM_FUNC_END(aes_decrypt_block4x) 34 35#if MAX_STRIDE == 5 36SYM_FUNC_START_LOCAL(aes_encrypt_block5x) 37 encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 38 ret 39SYM_FUNC_END(aes_encrypt_block5x) 40 41SYM_FUNC_START_LOCAL(aes_decrypt_block5x) 42 decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 43 ret 44SYM_FUNC_END(aes_decrypt_block5x) 45#endif 46 47 /* 48 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 49 * int blocks) 50 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 51 * int blocks) 52 */ 53 54AES_FUNC_START(aes_ecb_encrypt) 55 stp x29, x30, [sp, #-16]! 56 mov x29, sp 57 58 enc_prepare w3, x2, x5 59 60.LecbencloopNx: 61 subs w4, w4, #MAX_STRIDE 62 bmi .Lecbenc1x 63 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 64ST4( bl aes_encrypt_block4x ) 65ST5( ld1 {v4.16b}, [x1], #16 ) 66ST5( bl aes_encrypt_block5x ) 67 st1 {v0.16b-v3.16b}, [x0], #64 68ST5( st1 {v4.16b}, [x0], #16 ) 69 b .LecbencloopNx 70.Lecbenc1x: 71 adds w4, w4, #MAX_STRIDE 72 beq .Lecbencout 73.Lecbencloop: 74 ld1 {v0.16b}, [x1], #16 /* get next pt block */ 75 encrypt_block v0, w3, x2, x5, w6 76 st1 {v0.16b}, [x0], #16 77 subs w4, w4, #1 78 bne .Lecbencloop 79.Lecbencout: 80 ldp x29, x30, [sp], #16 81 ret 82AES_FUNC_END(aes_ecb_encrypt) 83 84 85AES_FUNC_START(aes_ecb_decrypt) 86 stp x29, x30, [sp, #-16]! 87 mov x29, sp 88 89 dec_prepare w3, x2, x5 90 91.LecbdecloopNx: 92 subs w4, w4, #MAX_STRIDE 93 bmi .Lecbdec1x 94 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 95ST4( bl aes_decrypt_block4x ) 96ST5( ld1 {v4.16b}, [x1], #16 ) 97ST5( bl aes_decrypt_block5x ) 98 st1 {v0.16b-v3.16b}, [x0], #64 99ST5( st1 {v4.16b}, [x0], #16 ) 100 b .LecbdecloopNx 101.Lecbdec1x: 102 adds w4, w4, #MAX_STRIDE 103 beq .Lecbdecout 104.Lecbdecloop: 105 ld1 {v0.16b}, [x1], #16 /* get next ct block */ 106 decrypt_block v0, w3, x2, x5, w6 107 st1 {v0.16b}, [x0], #16 108 subs w4, w4, #1 109 bne .Lecbdecloop 110.Lecbdecout: 111 ldp x29, x30, [sp], #16 112 ret 113AES_FUNC_END(aes_ecb_decrypt) 114 115 116 /* 117 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 118 * int blocks, u8 iv[]) 119 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 120 * int blocks, u8 iv[]) 121 * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[], 122 * int rounds, int blocks, u8 iv[], 123 * u32 const rk2[]); 124 * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[], 125 * int rounds, int blocks, u8 iv[], 126 * u32 const rk2[]); 127 */ 128 129AES_FUNC_START(aes_essiv_cbc_encrypt) 130 ld1 {v4.16b}, [x5] /* get iv */ 131 132 mov w8, #14 /* AES-256: 14 rounds */ 133 enc_prepare w8, x6, x7 134 encrypt_block v4, w8, x6, x7, w9 135 enc_switch_key w3, x2, x6 136 b .Lcbcencloop4x 137 138AES_FUNC_START(aes_cbc_encrypt) 139 ld1 {v4.16b}, [x5] /* get iv */ 140 enc_prepare w3, x2, x6 141 142.Lcbcencloop4x: 143 subs w4, w4, #4 144 bmi .Lcbcenc1x 145 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 146 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ 147 encrypt_block v0, w3, x2, x6, w7 148 eor v1.16b, v1.16b, v0.16b 149 encrypt_block v1, w3, x2, x6, w7 150 eor v2.16b, v2.16b, v1.16b 151 encrypt_block v2, w3, x2, x6, w7 152 eor v3.16b, v3.16b, v2.16b 153 encrypt_block v3, w3, x2, x6, w7 154 st1 {v0.16b-v3.16b}, [x0], #64 155 mov v4.16b, v3.16b 156 b .Lcbcencloop4x 157.Lcbcenc1x: 158 adds w4, w4, #4 159 beq .Lcbcencout 160.Lcbcencloop: 161 ld1 {v0.16b}, [x1], #16 /* get next pt block */ 162 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ 163 encrypt_block v4, w3, x2, x6, w7 164 st1 {v4.16b}, [x0], #16 165 subs w4, w4, #1 166 bne .Lcbcencloop 167.Lcbcencout: 168 st1 {v4.16b}, [x5] /* return iv */ 169 ret 170AES_FUNC_END(aes_cbc_encrypt) 171AES_FUNC_END(aes_essiv_cbc_encrypt) 172 173AES_FUNC_START(aes_essiv_cbc_decrypt) 174 stp x29, x30, [sp, #-16]! 175 mov x29, sp 176 177 ld1 {cbciv.16b}, [x5] /* get iv */ 178 179 mov w8, #14 /* AES-256: 14 rounds */ 180 enc_prepare w8, x6, x7 181 encrypt_block cbciv, w8, x6, x7, w9 182 b .Lessivcbcdecstart 183 184AES_FUNC_START(aes_cbc_decrypt) 185 stp x29, x30, [sp, #-16]! 186 mov x29, sp 187 188 ld1 {cbciv.16b}, [x5] /* get iv */ 189.Lessivcbcdecstart: 190 dec_prepare w3, x2, x6 191 192.LcbcdecloopNx: 193 subs w4, w4, #MAX_STRIDE 194 bmi .Lcbcdec1x 195 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 196#if MAX_STRIDE == 5 197 ld1 {v4.16b}, [x1], #16 /* get 1 ct block */ 198 mov v5.16b, v0.16b 199 mov v6.16b, v1.16b 200 mov v7.16b, v2.16b 201 bl aes_decrypt_block5x 202 sub x1, x1, #32 203 eor v0.16b, v0.16b, cbciv.16b 204 eor v1.16b, v1.16b, v5.16b 205 ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */ 206 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ 207 eor v2.16b, v2.16b, v6.16b 208 eor v3.16b, v3.16b, v7.16b 209 eor v4.16b, v4.16b, v5.16b 210#else 211 mov v4.16b, v0.16b 212 mov v5.16b, v1.16b 213 mov v6.16b, v2.16b 214 bl aes_decrypt_block4x 215 sub x1, x1, #16 216 eor v0.16b, v0.16b, cbciv.16b 217 eor v1.16b, v1.16b, v4.16b 218 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ 219 eor v2.16b, v2.16b, v5.16b 220 eor v3.16b, v3.16b, v6.16b 221#endif 222 st1 {v0.16b-v3.16b}, [x0], #64 223ST5( st1 {v4.16b}, [x0], #16 ) 224 b .LcbcdecloopNx 225.Lcbcdec1x: 226 adds w4, w4, #MAX_STRIDE 227 beq .Lcbcdecout 228.Lcbcdecloop: 229 ld1 {v1.16b}, [x1], #16 /* get next ct block */ 230 mov v0.16b, v1.16b /* ...and copy to v0 */ 231 decrypt_block v0, w3, x2, x6, w7 232 eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */ 233 mov cbciv.16b, v1.16b /* ct is next iv */ 234 st1 {v0.16b}, [x0], #16 235 subs w4, w4, #1 236 bne .Lcbcdecloop 237.Lcbcdecout: 238 st1 {cbciv.16b}, [x5] /* return iv */ 239 ldp x29, x30, [sp], #16 240 ret 241AES_FUNC_END(aes_cbc_decrypt) 242AES_FUNC_END(aes_essiv_cbc_decrypt) 243 244 245 /* 246 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[], 247 * int rounds, int bytes, u8 const iv[]) 248 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], 249 * int rounds, int bytes, u8 const iv[]) 250 */ 251 252AES_FUNC_START(aes_cbc_cts_encrypt) 253 adr_l x8, .Lcts_permute_table 254 sub x4, x4, #16 255 add x9, x8, #32 256 add x8, x8, x4 257 sub x9, x9, x4 258 ld1 {v3.16b}, [x8] 259 ld1 {v4.16b}, [x9] 260 261 ld1 {v0.16b}, [x1], x4 /* overlapping loads */ 262 ld1 {v1.16b}, [x1] 263 264 ld1 {v5.16b}, [x5] /* get iv */ 265 enc_prepare w3, x2, x6 266 267 eor v0.16b, v0.16b, v5.16b /* xor with iv */ 268 tbl v1.16b, {v1.16b}, v4.16b 269 encrypt_block v0, w3, x2, x6, w7 270 271 eor v1.16b, v1.16b, v0.16b 272 tbl v0.16b, {v0.16b}, v3.16b 273 encrypt_block v1, w3, x2, x6, w7 274 275 add x4, x0, x4 276 st1 {v0.16b}, [x4] /* overlapping stores */ 277 st1 {v1.16b}, [x0] 278 ret 279AES_FUNC_END(aes_cbc_cts_encrypt) 280 281AES_FUNC_START(aes_cbc_cts_decrypt) 282 adr_l x8, .Lcts_permute_table 283 sub x4, x4, #16 284 add x9, x8, #32 285 add x8, x8, x4 286 sub x9, x9, x4 287 ld1 {v3.16b}, [x8] 288 ld1 {v4.16b}, [x9] 289 290 ld1 {v0.16b}, [x1], x4 /* overlapping loads */ 291 ld1 {v1.16b}, [x1] 292 293 ld1 {v5.16b}, [x5] /* get iv */ 294 dec_prepare w3, x2, x6 295 296 decrypt_block v0, w3, x2, x6, w7 297 tbl v2.16b, {v0.16b}, v3.16b 298 eor v2.16b, v2.16b, v1.16b 299 300 tbx v0.16b, {v1.16b}, v4.16b 301 decrypt_block v0, w3, x2, x6, w7 302 eor v0.16b, v0.16b, v5.16b /* xor with iv */ 303 304 add x4, x0, x4 305 st1 {v2.16b}, [x4] /* overlapping stores */ 306 st1 {v0.16b}, [x0] 307 ret 308AES_FUNC_END(aes_cbc_cts_decrypt) 309 310 .section ".rodata", "a" 311 .align 6 312.Lcts_permute_table: 313 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 314 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 315 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 316 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 317 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 318 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 319 .previous 320 321 322 /* 323 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 324 * int blocks, u8 ctr[]) 325 */ 326 327AES_FUNC_START(aes_ctr_encrypt) 328 stp x29, x30, [sp, #-16]! 329 mov x29, sp 330 331 enc_prepare w3, x2, x6 332 ld1 {vctr.16b}, [x5] 333 334 umov x6, vctr.d[1] /* keep swabbed ctr in reg */ 335 rev x6, x6 336 cmn w6, w4 /* 32 bit overflow? */ 337 bcs .Lctrloop 338.LctrloopNx: 339 subs w4, w4, #MAX_STRIDE 340 bmi .Lctr1x 341 add w7, w6, #1 342 mov v0.16b, vctr.16b 343 add w8, w6, #2 344 mov v1.16b, vctr.16b 345 add w9, w6, #3 346 mov v2.16b, vctr.16b 347 add w9, w6, #3 348 rev w7, w7 349 mov v3.16b, vctr.16b 350 rev w8, w8 351ST5( mov v4.16b, vctr.16b ) 352 mov v1.s[3], w7 353 rev w9, w9 354ST5( add w10, w6, #4 ) 355 mov v2.s[3], w8 356ST5( rev w10, w10 ) 357 mov v3.s[3], w9 358ST5( mov v4.s[3], w10 ) 359 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ 360ST4( bl aes_encrypt_block4x ) 361ST5( bl aes_encrypt_block5x ) 362 eor v0.16b, v5.16b, v0.16b 363ST4( ld1 {v5.16b}, [x1], #16 ) 364 eor v1.16b, v6.16b, v1.16b 365ST5( ld1 {v5.16b-v6.16b}, [x1], #32 ) 366 eor v2.16b, v7.16b, v2.16b 367 eor v3.16b, v5.16b, v3.16b 368ST5( eor v4.16b, v6.16b, v4.16b ) 369 st1 {v0.16b-v3.16b}, [x0], #64 370ST5( st1 {v4.16b}, [x0], #16 ) 371 add x6, x6, #MAX_STRIDE 372 rev x7, x6 373 ins vctr.d[1], x7 374 cbz w4, .Lctrout 375 b .LctrloopNx 376.Lctr1x: 377 adds w4, w4, #MAX_STRIDE 378 beq .Lctrout 379.Lctrloop: 380 mov v0.16b, vctr.16b 381 encrypt_block v0, w3, x2, x8, w7 382 383 adds x6, x6, #1 /* increment BE ctr */ 384 rev x7, x6 385 ins vctr.d[1], x7 386 bcs .Lctrcarry /* overflow? */ 387 388.Lctrcarrydone: 389 subs w4, w4, #1 390 bmi .Lctrtailblock /* blocks <0 means tail block */ 391 ld1 {v3.16b}, [x1], #16 392 eor v3.16b, v0.16b, v3.16b 393 st1 {v3.16b}, [x0], #16 394 bne .Lctrloop 395 396.Lctrout: 397 st1 {vctr.16b}, [x5] /* return next CTR value */ 398 ldp x29, x30, [sp], #16 399 ret 400 401.Lctrtailblock: 402 st1 {v0.16b}, [x0] 403 b .Lctrout 404 405.Lctrcarry: 406 umov x7, vctr.d[0] /* load upper word of ctr */ 407 rev x7, x7 /* ... to handle the carry */ 408 add x7, x7, #1 409 rev x7, x7 410 ins vctr.d[0], x7 411 b .Lctrcarrydone 412AES_FUNC_END(aes_ctr_encrypt) 413 414 415 /* 416 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 417 * int bytes, u8 const rk2[], u8 iv[], int first) 418 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 419 * int bytes, u8 const rk2[], u8 iv[], int first) 420 */ 421 422 .macro next_tweak, out, in, tmp 423 sshr \tmp\().2d, \in\().2d, #63 424 and \tmp\().16b, \tmp\().16b, xtsmask.16b 425 add \out\().2d, \in\().2d, \in\().2d 426 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 427 eor \out\().16b, \out\().16b, \tmp\().16b 428 .endm 429 430 .macro xts_load_mask, tmp 431 movi xtsmask.2s, #0x1 432 movi \tmp\().2s, #0x87 433 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s 434 .endm 435 436AES_FUNC_START(aes_xts_encrypt) 437 stp x29, x30, [sp, #-16]! 438 mov x29, sp 439 440 ld1 {v4.16b}, [x6] 441 xts_load_mask v8 442 cbz w7, .Lxtsencnotfirst 443 444 enc_prepare w3, x5, x8 445 xts_cts_skip_tw w7, .LxtsencNx 446 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 447 enc_switch_key w3, x2, x8 448 b .LxtsencNx 449 450.Lxtsencnotfirst: 451 enc_prepare w3, x2, x8 452.LxtsencloopNx: 453 next_tweak v4, v4, v8 454.LxtsencNx: 455 subs w4, w4, #64 456 bmi .Lxtsenc1x 457 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 458 next_tweak v5, v4, v8 459 eor v0.16b, v0.16b, v4.16b 460 next_tweak v6, v5, v8 461 eor v1.16b, v1.16b, v5.16b 462 eor v2.16b, v2.16b, v6.16b 463 next_tweak v7, v6, v8 464 eor v3.16b, v3.16b, v7.16b 465 bl aes_encrypt_block4x 466 eor v3.16b, v3.16b, v7.16b 467 eor v0.16b, v0.16b, v4.16b 468 eor v1.16b, v1.16b, v5.16b 469 eor v2.16b, v2.16b, v6.16b 470 st1 {v0.16b-v3.16b}, [x0], #64 471 mov v4.16b, v7.16b 472 cbz w4, .Lxtsencret 473 xts_reload_mask v8 474 b .LxtsencloopNx 475.Lxtsenc1x: 476 adds w4, w4, #64 477 beq .Lxtsencout 478 subs w4, w4, #16 479 bmi .LxtsencctsNx 480.Lxtsencloop: 481 ld1 {v0.16b}, [x1], #16 482.Lxtsencctsout: 483 eor v0.16b, v0.16b, v4.16b 484 encrypt_block v0, w3, x2, x8, w7 485 eor v0.16b, v0.16b, v4.16b 486 cbz w4, .Lxtsencout 487 subs w4, w4, #16 488 next_tweak v4, v4, v8 489 bmi .Lxtsenccts 490 st1 {v0.16b}, [x0], #16 491 b .Lxtsencloop 492.Lxtsencout: 493 st1 {v0.16b}, [x0] 494.Lxtsencret: 495 st1 {v4.16b}, [x6] 496 ldp x29, x30, [sp], #16 497 ret 498 499.LxtsencctsNx: 500 mov v0.16b, v3.16b 501 sub x0, x0, #16 502.Lxtsenccts: 503 adr_l x8, .Lcts_permute_table 504 505 add x1, x1, w4, sxtw /* rewind input pointer */ 506 add w4, w4, #16 /* # bytes in final block */ 507 add x9, x8, #32 508 add x8, x8, x4 509 sub x9, x9, x4 510 add x4, x0, x4 /* output address of final block */ 511 512 ld1 {v1.16b}, [x1] /* load final block */ 513 ld1 {v2.16b}, [x8] 514 ld1 {v3.16b}, [x9] 515 516 tbl v2.16b, {v0.16b}, v2.16b 517 tbx v0.16b, {v1.16b}, v3.16b 518 st1 {v2.16b}, [x4] /* overlapping stores */ 519 mov w4, wzr 520 b .Lxtsencctsout 521AES_FUNC_END(aes_xts_encrypt) 522 523AES_FUNC_START(aes_xts_decrypt) 524 stp x29, x30, [sp, #-16]! 525 mov x29, sp 526 527 /* subtract 16 bytes if we are doing CTS */ 528 sub w8, w4, #0x10 529 tst w4, #0xf 530 csel w4, w4, w8, eq 531 532 ld1 {v4.16b}, [x6] 533 xts_load_mask v8 534 xts_cts_skip_tw w7, .Lxtsdecskiptw 535 cbz w7, .Lxtsdecnotfirst 536 537 enc_prepare w3, x5, x8 538 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 539.Lxtsdecskiptw: 540 dec_prepare w3, x2, x8 541 b .LxtsdecNx 542 543.Lxtsdecnotfirst: 544 dec_prepare w3, x2, x8 545.LxtsdecloopNx: 546 next_tweak v4, v4, v8 547.LxtsdecNx: 548 subs w4, w4, #64 549 bmi .Lxtsdec1x 550 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 551 next_tweak v5, v4, v8 552 eor v0.16b, v0.16b, v4.16b 553 next_tweak v6, v5, v8 554 eor v1.16b, v1.16b, v5.16b 555 eor v2.16b, v2.16b, v6.16b 556 next_tweak v7, v6, v8 557 eor v3.16b, v3.16b, v7.16b 558 bl aes_decrypt_block4x 559 eor v3.16b, v3.16b, v7.16b 560 eor v0.16b, v0.16b, v4.16b 561 eor v1.16b, v1.16b, v5.16b 562 eor v2.16b, v2.16b, v6.16b 563 st1 {v0.16b-v3.16b}, [x0], #64 564 mov v4.16b, v7.16b 565 cbz w4, .Lxtsdecout 566 xts_reload_mask v8 567 b .LxtsdecloopNx 568.Lxtsdec1x: 569 adds w4, w4, #64 570 beq .Lxtsdecout 571 subs w4, w4, #16 572.Lxtsdecloop: 573 ld1 {v0.16b}, [x1], #16 574 bmi .Lxtsdeccts 575.Lxtsdecctsout: 576 eor v0.16b, v0.16b, v4.16b 577 decrypt_block v0, w3, x2, x8, w7 578 eor v0.16b, v0.16b, v4.16b 579 st1 {v0.16b}, [x0], #16 580 cbz w4, .Lxtsdecout 581 subs w4, w4, #16 582 next_tweak v4, v4, v8 583 b .Lxtsdecloop 584.Lxtsdecout: 585 st1 {v4.16b}, [x6] 586 ldp x29, x30, [sp], #16 587 ret 588 589.Lxtsdeccts: 590 adr_l x8, .Lcts_permute_table 591 592 add x1, x1, w4, sxtw /* rewind input pointer */ 593 add w4, w4, #16 /* # bytes in final block */ 594 add x9, x8, #32 595 add x8, x8, x4 596 sub x9, x9, x4 597 add x4, x0, x4 /* output address of final block */ 598 599 next_tweak v5, v4, v8 600 601 ld1 {v1.16b}, [x1] /* load final block */ 602 ld1 {v2.16b}, [x8] 603 ld1 {v3.16b}, [x9] 604 605 eor v0.16b, v0.16b, v5.16b 606 decrypt_block v0, w3, x2, x8, w7 607 eor v0.16b, v0.16b, v5.16b 608 609 tbl v2.16b, {v0.16b}, v2.16b 610 tbx v0.16b, {v1.16b}, v3.16b 611 612 st1 {v2.16b}, [x4] /* overlapping stores */ 613 mov w4, wzr 614 b .Lxtsdecctsout 615AES_FUNC_END(aes_xts_decrypt) 616 617 /* 618 * aes_mac_update(u8 const in[], u32 const rk[], int rounds, 619 * int blocks, u8 dg[], int enc_before, int enc_after) 620 */ 621AES_FUNC_START(aes_mac_update) 622 frame_push 6 623 624 mov x19, x0 625 mov x20, x1 626 mov x21, x2 627 mov x22, x3 628 mov x23, x4 629 mov x24, x6 630 631 ld1 {v0.16b}, [x23] /* get dg */ 632 enc_prepare w2, x1, x7 633 cbz w5, .Lmacloop4x 634 635 encrypt_block v0, w2, x1, x7, w8 636 637.Lmacloop4x: 638 subs w22, w22, #4 639 bmi .Lmac1x 640 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */ 641 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 642 encrypt_block v0, w21, x20, x7, w8 643 eor v0.16b, v0.16b, v2.16b 644 encrypt_block v0, w21, x20, x7, w8 645 eor v0.16b, v0.16b, v3.16b 646 encrypt_block v0, w21, x20, x7, w8 647 eor v0.16b, v0.16b, v4.16b 648 cmp w22, wzr 649 csinv x5, x24, xzr, eq 650 cbz w5, .Lmacout 651 encrypt_block v0, w21, x20, x7, w8 652 st1 {v0.16b}, [x23] /* return dg */ 653 cond_yield_neon .Lmacrestart 654 b .Lmacloop4x 655.Lmac1x: 656 add w22, w22, #4 657.Lmacloop: 658 cbz w22, .Lmacout 659 ld1 {v1.16b}, [x19], #16 /* get next pt block */ 660 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 661 662 subs w22, w22, #1 663 csinv x5, x24, xzr, eq 664 cbz w5, .Lmacout 665 666.Lmacenc: 667 encrypt_block v0, w21, x20, x7, w8 668 b .Lmacloop 669 670.Lmacout: 671 st1 {v0.16b}, [x23] /* return dg */ 672 frame_pop 673 ret 674 675.Lmacrestart: 676 ld1 {v0.16b}, [x23] /* get dg */ 677 enc_prepare w21, x20, x0 678 b .Lmacloop4x 679AES_FUNC_END(aes_mac_update) 680