1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES 4 * 5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 6 */ 7 8/* included by aes-ce.S and aes-neon.S */ 9 10 .text 11 .align 4 12 13#ifndef MAX_STRIDE 14#define MAX_STRIDE 4 15#endif 16 17#if MAX_STRIDE == 4 18#define ST4(x...) x 19#define ST5(x...) 20#else 21#define ST4(x...) 22#define ST5(x...) x 23#endif 24 25SYM_FUNC_START_LOCAL(aes_encrypt_block4x) 26 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 27 ret 28SYM_FUNC_END(aes_encrypt_block4x) 29 30SYM_FUNC_START_LOCAL(aes_decrypt_block4x) 31 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 32 ret 33SYM_FUNC_END(aes_decrypt_block4x) 34 35#if MAX_STRIDE == 5 36SYM_FUNC_START_LOCAL(aes_encrypt_block5x) 37 encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 38 ret 39SYM_FUNC_END(aes_encrypt_block5x) 40 41SYM_FUNC_START_LOCAL(aes_decrypt_block5x) 42 decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 43 ret 44SYM_FUNC_END(aes_decrypt_block5x) 45#endif 46 47 /* 48 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 49 * int blocks) 50 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 51 * int blocks) 52 */ 53 54AES_FUNC_START(aes_ecb_encrypt) 55 stp x29, x30, [sp, #-16]! 56 mov x29, sp 57 58 enc_prepare w3, x2, x5 59 60.LecbencloopNx: 61 subs w4, w4, #MAX_STRIDE 62 bmi .Lecbenc1x 63 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 64ST4( bl aes_encrypt_block4x ) 65ST5( ld1 {v4.16b}, [x1], #16 ) 66ST5( bl aes_encrypt_block5x ) 67 st1 {v0.16b-v3.16b}, [x0], #64 68ST5( st1 {v4.16b}, [x0], #16 ) 69 b .LecbencloopNx 70.Lecbenc1x: 71 adds w4, w4, #MAX_STRIDE 72 beq .Lecbencout 73.Lecbencloop: 74 ld1 {v0.16b}, [x1], #16 /* get next pt block */ 75 encrypt_block v0, w3, x2, x5, w6 76 st1 {v0.16b}, [x0], #16 77 subs w4, w4, #1 78 bne .Lecbencloop 79.Lecbencout: 80 ldp x29, x30, [sp], #16 81 ret 82AES_FUNC_END(aes_ecb_encrypt) 83 84 85AES_FUNC_START(aes_ecb_decrypt) 86 stp x29, x30, [sp, #-16]! 87 mov x29, sp 88 89 dec_prepare w3, x2, x5 90 91.LecbdecloopNx: 92 subs w4, w4, #MAX_STRIDE 93 bmi .Lecbdec1x 94 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 95ST4( bl aes_decrypt_block4x ) 96ST5( ld1 {v4.16b}, [x1], #16 ) 97ST5( bl aes_decrypt_block5x ) 98 st1 {v0.16b-v3.16b}, [x0], #64 99ST5( st1 {v4.16b}, [x0], #16 ) 100 b .LecbdecloopNx 101.Lecbdec1x: 102 adds w4, w4, #MAX_STRIDE 103 beq .Lecbdecout 104.Lecbdecloop: 105 ld1 {v0.16b}, [x1], #16 /* get next ct block */ 106 decrypt_block v0, w3, x2, x5, w6 107 st1 {v0.16b}, [x0], #16 108 subs w4, w4, #1 109 bne .Lecbdecloop 110.Lecbdecout: 111 ldp x29, x30, [sp], #16 112 ret 113AES_FUNC_END(aes_ecb_decrypt) 114 115 116 /* 117 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 118 * int blocks, u8 iv[]) 119 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 120 * int blocks, u8 iv[]) 121 * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[], 122 * int rounds, int blocks, u8 iv[], 123 * u32 const rk2[]); 124 * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[], 125 * int rounds, int blocks, u8 iv[], 126 * u32 const rk2[]); 127 */ 128 129AES_FUNC_START(aes_essiv_cbc_encrypt) 130 ld1 {v4.16b}, [x5] /* get iv */ 131 132 mov w8, #14 /* AES-256: 14 rounds */ 133 enc_prepare w8, x6, x7 134 encrypt_block v4, w8, x6, x7, w9 135 enc_switch_key w3, x2, x6 136 b .Lcbcencloop4x 137 138AES_FUNC_START(aes_cbc_encrypt) 139 ld1 {v4.16b}, [x5] /* get iv */ 140 enc_prepare w3, x2, x6 141 142.Lcbcencloop4x: 143 subs w4, w4, #4 144 bmi .Lcbcenc1x 145 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 146 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ 147 encrypt_block v0, w3, x2, x6, w7 148 eor v1.16b, v1.16b, v0.16b 149 encrypt_block v1, w3, x2, x6, w7 150 eor v2.16b, v2.16b, v1.16b 151 encrypt_block v2, w3, x2, x6, w7 152 eor v3.16b, v3.16b, v2.16b 153 encrypt_block v3, w3, x2, x6, w7 154 st1 {v0.16b-v3.16b}, [x0], #64 155 mov v4.16b, v3.16b 156 b .Lcbcencloop4x 157.Lcbcenc1x: 158 adds w4, w4, #4 159 beq .Lcbcencout 160.Lcbcencloop: 161 ld1 {v0.16b}, [x1], #16 /* get next pt block */ 162 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ 163 encrypt_block v4, w3, x2, x6, w7 164 st1 {v4.16b}, [x0], #16 165 subs w4, w4, #1 166 bne .Lcbcencloop 167.Lcbcencout: 168 st1 {v4.16b}, [x5] /* return iv */ 169 ret 170AES_FUNC_END(aes_cbc_encrypt) 171AES_FUNC_END(aes_essiv_cbc_encrypt) 172 173AES_FUNC_START(aes_essiv_cbc_decrypt) 174 stp x29, x30, [sp, #-16]! 175 mov x29, sp 176 177 ld1 {cbciv.16b}, [x5] /* get iv */ 178 179 mov w8, #14 /* AES-256: 14 rounds */ 180 enc_prepare w8, x6, x7 181 encrypt_block cbciv, w8, x6, x7, w9 182 b .Lessivcbcdecstart 183 184AES_FUNC_START(aes_cbc_decrypt) 185 stp x29, x30, [sp, #-16]! 186 mov x29, sp 187 188 ld1 {cbciv.16b}, [x5] /* get iv */ 189.Lessivcbcdecstart: 190 dec_prepare w3, x2, x6 191 192.LcbcdecloopNx: 193 subs w4, w4, #MAX_STRIDE 194 bmi .Lcbcdec1x 195 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 196#if MAX_STRIDE == 5 197 ld1 {v4.16b}, [x1], #16 /* get 1 ct block */ 198 mov v5.16b, v0.16b 199 mov v6.16b, v1.16b 200 mov v7.16b, v2.16b 201 bl aes_decrypt_block5x 202 sub x1, x1, #32 203 eor v0.16b, v0.16b, cbciv.16b 204 eor v1.16b, v1.16b, v5.16b 205 ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */ 206 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ 207 eor v2.16b, v2.16b, v6.16b 208 eor v3.16b, v3.16b, v7.16b 209 eor v4.16b, v4.16b, v5.16b 210#else 211 mov v4.16b, v0.16b 212 mov v5.16b, v1.16b 213 mov v6.16b, v2.16b 214 bl aes_decrypt_block4x 215 sub x1, x1, #16 216 eor v0.16b, v0.16b, cbciv.16b 217 eor v1.16b, v1.16b, v4.16b 218 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ 219 eor v2.16b, v2.16b, v5.16b 220 eor v3.16b, v3.16b, v6.16b 221#endif 222 st1 {v0.16b-v3.16b}, [x0], #64 223ST5( st1 {v4.16b}, [x0], #16 ) 224 b .LcbcdecloopNx 225.Lcbcdec1x: 226 adds w4, w4, #MAX_STRIDE 227 beq .Lcbcdecout 228.Lcbcdecloop: 229 ld1 {v1.16b}, [x1], #16 /* get next ct block */ 230 mov v0.16b, v1.16b /* ...and copy to v0 */ 231 decrypt_block v0, w3, x2, x6, w7 232 eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */ 233 mov cbciv.16b, v1.16b /* ct is next iv */ 234 st1 {v0.16b}, [x0], #16 235 subs w4, w4, #1 236 bne .Lcbcdecloop 237.Lcbcdecout: 238 st1 {cbciv.16b}, [x5] /* return iv */ 239 ldp x29, x30, [sp], #16 240 ret 241AES_FUNC_END(aes_cbc_decrypt) 242AES_FUNC_END(aes_essiv_cbc_decrypt) 243 244 245 /* 246 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[], 247 * int rounds, int bytes, u8 const iv[]) 248 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], 249 * int rounds, int bytes, u8 const iv[]) 250 */ 251 252AES_FUNC_START(aes_cbc_cts_encrypt) 253 adr_l x8, .Lcts_permute_table 254 sub x4, x4, #16 255 add x9, x8, #32 256 add x8, x8, x4 257 sub x9, x9, x4 258 ld1 {v3.16b}, [x8] 259 ld1 {v4.16b}, [x9] 260 261 ld1 {v0.16b}, [x1], x4 /* overlapping loads */ 262 ld1 {v1.16b}, [x1] 263 264 ld1 {v5.16b}, [x5] /* get iv */ 265 enc_prepare w3, x2, x6 266 267 eor v0.16b, v0.16b, v5.16b /* xor with iv */ 268 tbl v1.16b, {v1.16b}, v4.16b 269 encrypt_block v0, w3, x2, x6, w7 270 271 eor v1.16b, v1.16b, v0.16b 272 tbl v0.16b, {v0.16b}, v3.16b 273 encrypt_block v1, w3, x2, x6, w7 274 275 add x4, x0, x4 276 st1 {v0.16b}, [x4] /* overlapping stores */ 277 st1 {v1.16b}, [x0] 278 ret 279AES_FUNC_END(aes_cbc_cts_encrypt) 280 281AES_FUNC_START(aes_cbc_cts_decrypt) 282 adr_l x8, .Lcts_permute_table 283 sub x4, x4, #16 284 add x9, x8, #32 285 add x8, x8, x4 286 sub x9, x9, x4 287 ld1 {v3.16b}, [x8] 288 ld1 {v4.16b}, [x9] 289 290 ld1 {v0.16b}, [x1], x4 /* overlapping loads */ 291 ld1 {v1.16b}, [x1] 292 293 ld1 {v5.16b}, [x5] /* get iv */ 294 dec_prepare w3, x2, x6 295 296 decrypt_block v0, w3, x2, x6, w7 297 tbl v2.16b, {v0.16b}, v3.16b 298 eor v2.16b, v2.16b, v1.16b 299 300 tbx v0.16b, {v1.16b}, v4.16b 301 decrypt_block v0, w3, x2, x6, w7 302 eor v0.16b, v0.16b, v5.16b /* xor with iv */ 303 304 add x4, x0, x4 305 st1 {v2.16b}, [x4] /* overlapping stores */ 306 st1 {v0.16b}, [x0] 307 ret 308AES_FUNC_END(aes_cbc_cts_decrypt) 309 310 .section ".rodata", "a" 311 .align 6 312.Lcts_permute_table: 313 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 314 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 315 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 316 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 317 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 318 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 319 .previous 320 321 322 /* 323 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 324 * int bytes, u8 ctr[], u8 finalbuf[]) 325 */ 326 327AES_FUNC_START(aes_ctr_encrypt) 328 stp x29, x30, [sp, #-16]! 329 mov x29, sp 330 331 enc_prepare w3, x2, x12 332 ld1 {vctr.16b}, [x5] 333 334 umov x12, vctr.d[1] /* keep swabbed ctr in reg */ 335 rev x12, x12 336 337.LctrloopNx: 338 add w7, w4, #15 339 sub w4, w4, #MAX_STRIDE << 4 340 lsr w7, w7, #4 341 mov w8, #MAX_STRIDE 342 cmp w7, w8 343 csel w7, w7, w8, lt 344 adds x12, x12, x7 345 346 mov v0.16b, vctr.16b 347 mov v1.16b, vctr.16b 348 mov v2.16b, vctr.16b 349 mov v3.16b, vctr.16b 350ST5( mov v4.16b, vctr.16b ) 351 bcs 0f 352 353 .subsection 1 354 /* apply carry to outgoing counter */ 3550: umov x8, vctr.d[0] 356 rev x8, x8 357 add x8, x8, #1 358 rev x8, x8 359 ins vctr.d[0], x8 360 361 /* apply carry to N counter blocks for N := x12 */ 362 cbz x12, 2f 363 adr x16, 1f 364 sub x16, x16, x12, lsl #3 365 br x16 366 hint 34 // bti c 367 mov v0.d[0], vctr.d[0] 368 hint 34 // bti c 369 mov v1.d[0], vctr.d[0] 370 hint 34 // bti c 371 mov v2.d[0], vctr.d[0] 372 hint 34 // bti c 373 mov v3.d[0], vctr.d[0] 374ST5( hint 34 ) 375ST5( mov v4.d[0], vctr.d[0] ) 3761: b 2f 377 .previous 378 3792: rev x7, x12 380 ins vctr.d[1], x7 381 sub x7, x12, #MAX_STRIDE - 1 382 sub x8, x12, #MAX_STRIDE - 2 383 sub x9, x12, #MAX_STRIDE - 3 384 rev x7, x7 385 rev x8, x8 386 mov v1.d[1], x7 387 rev x9, x9 388ST5( sub x10, x12, #MAX_STRIDE - 4 ) 389 mov v2.d[1], x8 390ST5( rev x10, x10 ) 391 mov v3.d[1], x9 392ST5( mov v4.d[1], x10 ) 393 tbnz w4, #31, .Lctrtail 394 ld1 {v5.16b-v7.16b}, [x1], #48 395ST4( bl aes_encrypt_block4x ) 396ST5( bl aes_encrypt_block5x ) 397 eor v0.16b, v5.16b, v0.16b 398ST4( ld1 {v5.16b}, [x1], #16 ) 399 eor v1.16b, v6.16b, v1.16b 400ST5( ld1 {v5.16b-v6.16b}, [x1], #32 ) 401 eor v2.16b, v7.16b, v2.16b 402 eor v3.16b, v5.16b, v3.16b 403ST5( eor v4.16b, v6.16b, v4.16b ) 404 st1 {v0.16b-v3.16b}, [x0], #64 405ST5( st1 {v4.16b}, [x0], #16 ) 406 cbz w4, .Lctrout 407 b .LctrloopNx 408 409.Lctrout: 410 st1 {vctr.16b}, [x5] /* return next CTR value */ 411 ldp x29, x30, [sp], #16 412 ret 413 414.Lctrtail: 415 /* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */ 416 mov x16, #16 417 ands x13, x4, #0xf 418 csel x13, x13, x16, ne 419 420ST5( cmp w4, #64 - (MAX_STRIDE << 4) ) 421ST5( csel x14, x16, xzr, gt ) 422 cmp w4, #48 - (MAX_STRIDE << 4) 423 csel x15, x16, xzr, gt 424 cmp w4, #32 - (MAX_STRIDE << 4) 425 csel x16, x16, xzr, gt 426 cmp w4, #16 - (MAX_STRIDE << 4) 427 ble .Lctrtail1x 428 429 adr_l x12, .Lcts_permute_table 430 add x12, x12, x13 431 432ST5( ld1 {v5.16b}, [x1], x14 ) 433 ld1 {v6.16b}, [x1], x15 434 ld1 {v7.16b}, [x1], x16 435 436ST4( bl aes_encrypt_block4x ) 437ST5( bl aes_encrypt_block5x ) 438 439 ld1 {v8.16b}, [x1], x13 440 ld1 {v9.16b}, [x1] 441 ld1 {v10.16b}, [x12] 442 443ST4( eor v6.16b, v6.16b, v0.16b ) 444ST4( eor v7.16b, v7.16b, v1.16b ) 445ST4( tbl v3.16b, {v3.16b}, v10.16b ) 446ST4( eor v8.16b, v8.16b, v2.16b ) 447ST4( eor v9.16b, v9.16b, v3.16b ) 448 449ST5( eor v5.16b, v5.16b, v0.16b ) 450ST5( eor v6.16b, v6.16b, v1.16b ) 451ST5( tbl v4.16b, {v4.16b}, v10.16b ) 452ST5( eor v7.16b, v7.16b, v2.16b ) 453ST5( eor v8.16b, v8.16b, v3.16b ) 454ST5( eor v9.16b, v9.16b, v4.16b ) 455 456ST5( st1 {v5.16b}, [x0], x14 ) 457 st1 {v6.16b}, [x0], x15 458 st1 {v7.16b}, [x0], x16 459 add x13, x13, x0 460 st1 {v9.16b}, [x13] // overlapping stores 461 st1 {v8.16b}, [x0] 462 b .Lctrout 463 464.Lctrtail1x: 465 csel x0, x0, x6, eq // use finalbuf if less than a full block 466 ld1 {v5.16b}, [x1] 467ST5( mov v3.16b, v4.16b ) 468 encrypt_block v3, w3, x2, x8, w7 469 eor v5.16b, v5.16b, v3.16b 470 st1 {v5.16b}, [x0] 471 b .Lctrout 472AES_FUNC_END(aes_ctr_encrypt) 473 474 475 /* 476 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 477 * int bytes, u8 const rk2[], u8 iv[], int first) 478 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 479 * int bytes, u8 const rk2[], u8 iv[], int first) 480 */ 481 482 .macro next_tweak, out, in, tmp 483 sshr \tmp\().2d, \in\().2d, #63 484 and \tmp\().16b, \tmp\().16b, xtsmask.16b 485 add \out\().2d, \in\().2d, \in\().2d 486 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 487 eor \out\().16b, \out\().16b, \tmp\().16b 488 .endm 489 490 .macro xts_load_mask, tmp 491 movi xtsmask.2s, #0x1 492 movi \tmp\().2s, #0x87 493 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s 494 .endm 495 496AES_FUNC_START(aes_xts_encrypt) 497 stp x29, x30, [sp, #-16]! 498 mov x29, sp 499 500 ld1 {v4.16b}, [x6] 501 xts_load_mask v8 502 cbz w7, .Lxtsencnotfirst 503 504 enc_prepare w3, x5, x8 505 xts_cts_skip_tw w7, .LxtsencNx 506 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 507 enc_switch_key w3, x2, x8 508 b .LxtsencNx 509 510.Lxtsencnotfirst: 511 enc_prepare w3, x2, x8 512.LxtsencloopNx: 513 next_tweak v4, v4, v8 514.LxtsencNx: 515 subs w4, w4, #64 516 bmi .Lxtsenc1x 517 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 518 next_tweak v5, v4, v8 519 eor v0.16b, v0.16b, v4.16b 520 next_tweak v6, v5, v8 521 eor v1.16b, v1.16b, v5.16b 522 eor v2.16b, v2.16b, v6.16b 523 next_tweak v7, v6, v8 524 eor v3.16b, v3.16b, v7.16b 525 bl aes_encrypt_block4x 526 eor v3.16b, v3.16b, v7.16b 527 eor v0.16b, v0.16b, v4.16b 528 eor v1.16b, v1.16b, v5.16b 529 eor v2.16b, v2.16b, v6.16b 530 st1 {v0.16b-v3.16b}, [x0], #64 531 mov v4.16b, v7.16b 532 cbz w4, .Lxtsencret 533 xts_reload_mask v8 534 b .LxtsencloopNx 535.Lxtsenc1x: 536 adds w4, w4, #64 537 beq .Lxtsencout 538 subs w4, w4, #16 539 bmi .LxtsencctsNx 540.Lxtsencloop: 541 ld1 {v0.16b}, [x1], #16 542.Lxtsencctsout: 543 eor v0.16b, v0.16b, v4.16b 544 encrypt_block v0, w3, x2, x8, w7 545 eor v0.16b, v0.16b, v4.16b 546 cbz w4, .Lxtsencout 547 subs w4, w4, #16 548 next_tweak v4, v4, v8 549 bmi .Lxtsenccts 550 st1 {v0.16b}, [x0], #16 551 b .Lxtsencloop 552.Lxtsencout: 553 st1 {v0.16b}, [x0] 554.Lxtsencret: 555 st1 {v4.16b}, [x6] 556 ldp x29, x30, [sp], #16 557 ret 558 559.LxtsencctsNx: 560 mov v0.16b, v3.16b 561 sub x0, x0, #16 562.Lxtsenccts: 563 adr_l x8, .Lcts_permute_table 564 565 add x1, x1, w4, sxtw /* rewind input pointer */ 566 add w4, w4, #16 /* # bytes in final block */ 567 add x9, x8, #32 568 add x8, x8, x4 569 sub x9, x9, x4 570 add x4, x0, x4 /* output address of final block */ 571 572 ld1 {v1.16b}, [x1] /* load final block */ 573 ld1 {v2.16b}, [x8] 574 ld1 {v3.16b}, [x9] 575 576 tbl v2.16b, {v0.16b}, v2.16b 577 tbx v0.16b, {v1.16b}, v3.16b 578 st1 {v2.16b}, [x4] /* overlapping stores */ 579 mov w4, wzr 580 b .Lxtsencctsout 581AES_FUNC_END(aes_xts_encrypt) 582 583AES_FUNC_START(aes_xts_decrypt) 584 stp x29, x30, [sp, #-16]! 585 mov x29, sp 586 587 /* subtract 16 bytes if we are doing CTS */ 588 sub w8, w4, #0x10 589 tst w4, #0xf 590 csel w4, w4, w8, eq 591 592 ld1 {v4.16b}, [x6] 593 xts_load_mask v8 594 xts_cts_skip_tw w7, .Lxtsdecskiptw 595 cbz w7, .Lxtsdecnotfirst 596 597 enc_prepare w3, x5, x8 598 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 599.Lxtsdecskiptw: 600 dec_prepare w3, x2, x8 601 b .LxtsdecNx 602 603.Lxtsdecnotfirst: 604 dec_prepare w3, x2, x8 605.LxtsdecloopNx: 606 next_tweak v4, v4, v8 607.LxtsdecNx: 608 subs w4, w4, #64 609 bmi .Lxtsdec1x 610 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 611 next_tweak v5, v4, v8 612 eor v0.16b, v0.16b, v4.16b 613 next_tweak v6, v5, v8 614 eor v1.16b, v1.16b, v5.16b 615 eor v2.16b, v2.16b, v6.16b 616 next_tweak v7, v6, v8 617 eor v3.16b, v3.16b, v7.16b 618 bl aes_decrypt_block4x 619 eor v3.16b, v3.16b, v7.16b 620 eor v0.16b, v0.16b, v4.16b 621 eor v1.16b, v1.16b, v5.16b 622 eor v2.16b, v2.16b, v6.16b 623 st1 {v0.16b-v3.16b}, [x0], #64 624 mov v4.16b, v7.16b 625 cbz w4, .Lxtsdecout 626 xts_reload_mask v8 627 b .LxtsdecloopNx 628.Lxtsdec1x: 629 adds w4, w4, #64 630 beq .Lxtsdecout 631 subs w4, w4, #16 632.Lxtsdecloop: 633 ld1 {v0.16b}, [x1], #16 634 bmi .Lxtsdeccts 635.Lxtsdecctsout: 636 eor v0.16b, v0.16b, v4.16b 637 decrypt_block v0, w3, x2, x8, w7 638 eor v0.16b, v0.16b, v4.16b 639 st1 {v0.16b}, [x0], #16 640 cbz w4, .Lxtsdecout 641 subs w4, w4, #16 642 next_tweak v4, v4, v8 643 b .Lxtsdecloop 644.Lxtsdecout: 645 st1 {v4.16b}, [x6] 646 ldp x29, x30, [sp], #16 647 ret 648 649.Lxtsdeccts: 650 adr_l x8, .Lcts_permute_table 651 652 add x1, x1, w4, sxtw /* rewind input pointer */ 653 add w4, w4, #16 /* # bytes in final block */ 654 add x9, x8, #32 655 add x8, x8, x4 656 sub x9, x9, x4 657 add x4, x0, x4 /* output address of final block */ 658 659 next_tweak v5, v4, v8 660 661 ld1 {v1.16b}, [x1] /* load final block */ 662 ld1 {v2.16b}, [x8] 663 ld1 {v3.16b}, [x9] 664 665 eor v0.16b, v0.16b, v5.16b 666 decrypt_block v0, w3, x2, x8, w7 667 eor v0.16b, v0.16b, v5.16b 668 669 tbl v2.16b, {v0.16b}, v2.16b 670 tbx v0.16b, {v1.16b}, v3.16b 671 672 st1 {v2.16b}, [x4] /* overlapping stores */ 673 mov w4, wzr 674 b .Lxtsdecctsout 675AES_FUNC_END(aes_xts_decrypt) 676 677 /* 678 * aes_mac_update(u8 const in[], u32 const rk[], int rounds, 679 * int blocks, u8 dg[], int enc_before, int enc_after) 680 */ 681AES_FUNC_START(aes_mac_update) 682 ld1 {v0.16b}, [x4] /* get dg */ 683 enc_prepare w2, x1, x7 684 cbz w5, .Lmacloop4x 685 686 encrypt_block v0, w2, x1, x7, w8 687 688.Lmacloop4x: 689 subs w3, w3, #4 690 bmi .Lmac1x 691 ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */ 692 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 693 encrypt_block v0, w2, x1, x7, w8 694 eor v0.16b, v0.16b, v2.16b 695 encrypt_block v0, w2, x1, x7, w8 696 eor v0.16b, v0.16b, v3.16b 697 encrypt_block v0, w2, x1, x7, w8 698 eor v0.16b, v0.16b, v4.16b 699 cmp w3, wzr 700 csinv x5, x6, xzr, eq 701 cbz w5, .Lmacout 702 encrypt_block v0, w2, x1, x7, w8 703 st1 {v0.16b}, [x4] /* return dg */ 704 cond_yield .Lmacout, x7, x8 705 b .Lmacloop4x 706.Lmac1x: 707 add w3, w3, #4 708.Lmacloop: 709 cbz w3, .Lmacout 710 ld1 {v1.16b}, [x0], #16 /* get next pt block */ 711 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 712 713 subs w3, w3, #1 714 csinv x5, x6, xzr, eq 715 cbz w5, .Lmacout 716 717.Lmacenc: 718 encrypt_block v0, w2, x1, x7, w8 719 b .Lmacloop 720 721.Lmacout: 722 st1 {v0.16b}, [x4] /* return dg */ 723 mov w0, w3 724 ret 725AES_FUNC_END(aes_mac_update) 726