1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES 4 * 5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 6 */ 7 8/* included by aes-ce.S and aes-neon.S */ 9 10 .text 11 .align 4 12 13#ifndef MAX_STRIDE 14#define MAX_STRIDE 4 15#endif 16 17#if MAX_STRIDE == 4 18#define ST4(x...) x 19#define ST5(x...) 20#else 21#define ST4(x...) 22#define ST5(x...) x 23#endif 24 25SYM_FUNC_START_LOCAL(aes_encrypt_block4x) 26 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 27 ret 28SYM_FUNC_END(aes_encrypt_block4x) 29 30SYM_FUNC_START_LOCAL(aes_decrypt_block4x) 31 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 32 ret 33SYM_FUNC_END(aes_decrypt_block4x) 34 35#if MAX_STRIDE == 5 36SYM_FUNC_START_LOCAL(aes_encrypt_block5x) 37 encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 38 ret 39SYM_FUNC_END(aes_encrypt_block5x) 40 41SYM_FUNC_START_LOCAL(aes_decrypt_block5x) 42 decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 43 ret 44SYM_FUNC_END(aes_decrypt_block5x) 45#endif 46 47 /* 48 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 49 * int blocks) 50 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 51 * int blocks) 52 */ 53 54AES_FUNC_START(aes_ecb_encrypt) 55 stp x29, x30, [sp, #-16]! 56 mov x29, sp 57 58 enc_prepare w3, x2, x5 59 60.LecbencloopNx: 61 subs w4, w4, #MAX_STRIDE 62 bmi .Lecbenc1x 63 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 64ST4( bl aes_encrypt_block4x ) 65ST5( ld1 {v4.16b}, [x1], #16 ) 66ST5( bl aes_encrypt_block5x ) 67 st1 {v0.16b-v3.16b}, [x0], #64 68ST5( st1 {v4.16b}, [x0], #16 ) 69 b .LecbencloopNx 70.Lecbenc1x: 71 adds w4, w4, #MAX_STRIDE 72 beq .Lecbencout 73.Lecbencloop: 74 ld1 {v0.16b}, [x1], #16 /* get next pt block */ 75 encrypt_block v0, w3, x2, x5, w6 76 st1 {v0.16b}, [x0], #16 77 subs w4, w4, #1 78 bne .Lecbencloop 79.Lecbencout: 80 ldp x29, x30, [sp], #16 81 ret 82AES_FUNC_END(aes_ecb_encrypt) 83 84 85AES_FUNC_START(aes_ecb_decrypt) 86 stp x29, x30, [sp, #-16]! 87 mov x29, sp 88 89 dec_prepare w3, x2, x5 90 91.LecbdecloopNx: 92 subs w4, w4, #MAX_STRIDE 93 bmi .Lecbdec1x 94 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 95ST4( bl aes_decrypt_block4x ) 96ST5( ld1 {v4.16b}, [x1], #16 ) 97ST5( bl aes_decrypt_block5x ) 98 st1 {v0.16b-v3.16b}, [x0], #64 99ST5( st1 {v4.16b}, [x0], #16 ) 100 b .LecbdecloopNx 101.Lecbdec1x: 102 adds w4, w4, #MAX_STRIDE 103 beq .Lecbdecout 104.Lecbdecloop: 105 ld1 {v0.16b}, [x1], #16 /* get next ct block */ 106 decrypt_block v0, w3, x2, x5, w6 107 st1 {v0.16b}, [x0], #16 108 subs w4, w4, #1 109 bne .Lecbdecloop 110.Lecbdecout: 111 ldp x29, x30, [sp], #16 112 ret 113AES_FUNC_END(aes_ecb_decrypt) 114 115 116 /* 117 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 118 * int blocks, u8 iv[]) 119 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 120 * int blocks, u8 iv[]) 121 * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[], 122 * int rounds, int blocks, u8 iv[], 123 * u32 const rk2[]); 124 * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[], 125 * int rounds, int blocks, u8 iv[], 126 * u32 const rk2[]); 127 */ 128 129AES_FUNC_START(aes_essiv_cbc_encrypt) 130 ld1 {v4.16b}, [x5] /* get iv */ 131 132 mov w8, #14 /* AES-256: 14 rounds */ 133 enc_prepare w8, x6, x7 134 encrypt_block v4, w8, x6, x7, w9 135 enc_switch_key w3, x2, x6 136 b .Lcbcencloop4x 137 138AES_FUNC_START(aes_cbc_encrypt) 139 ld1 {v4.16b}, [x5] /* get iv */ 140 enc_prepare w3, x2, x6 141 142.Lcbcencloop4x: 143 subs w4, w4, #4 144 bmi .Lcbcenc1x 145 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 146 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ 147 encrypt_block v0, w3, x2, x6, w7 148 eor v1.16b, v1.16b, v0.16b 149 encrypt_block v1, w3, x2, x6, w7 150 eor v2.16b, v2.16b, v1.16b 151 encrypt_block v2, w3, x2, x6, w7 152 eor v3.16b, v3.16b, v2.16b 153 encrypt_block v3, w3, x2, x6, w7 154 st1 {v0.16b-v3.16b}, [x0], #64 155 mov v4.16b, v3.16b 156 b .Lcbcencloop4x 157.Lcbcenc1x: 158 adds w4, w4, #4 159 beq .Lcbcencout 160.Lcbcencloop: 161 ld1 {v0.16b}, [x1], #16 /* get next pt block */ 162 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ 163 encrypt_block v4, w3, x2, x6, w7 164 st1 {v4.16b}, [x0], #16 165 subs w4, w4, #1 166 bne .Lcbcencloop 167.Lcbcencout: 168 st1 {v4.16b}, [x5] /* return iv */ 169 ret 170AES_FUNC_END(aes_cbc_encrypt) 171AES_FUNC_END(aes_essiv_cbc_encrypt) 172 173AES_FUNC_START(aes_essiv_cbc_decrypt) 174 stp x29, x30, [sp, #-16]! 175 mov x29, sp 176 177 ld1 {cbciv.16b}, [x5] /* get iv */ 178 179 mov w8, #14 /* AES-256: 14 rounds */ 180 enc_prepare w8, x6, x7 181 encrypt_block cbciv, w8, x6, x7, w9 182 b .Lessivcbcdecstart 183 184AES_FUNC_START(aes_cbc_decrypt) 185 stp x29, x30, [sp, #-16]! 186 mov x29, sp 187 188 ld1 {cbciv.16b}, [x5] /* get iv */ 189.Lessivcbcdecstart: 190 dec_prepare w3, x2, x6 191 192.LcbcdecloopNx: 193 subs w4, w4, #MAX_STRIDE 194 bmi .Lcbcdec1x 195 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 196#if MAX_STRIDE == 5 197 ld1 {v4.16b}, [x1], #16 /* get 1 ct block */ 198 mov v5.16b, v0.16b 199 mov v6.16b, v1.16b 200 mov v7.16b, v2.16b 201 bl aes_decrypt_block5x 202 sub x1, x1, #32 203 eor v0.16b, v0.16b, cbciv.16b 204 eor v1.16b, v1.16b, v5.16b 205 ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */ 206 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ 207 eor v2.16b, v2.16b, v6.16b 208 eor v3.16b, v3.16b, v7.16b 209 eor v4.16b, v4.16b, v5.16b 210#else 211 mov v4.16b, v0.16b 212 mov v5.16b, v1.16b 213 mov v6.16b, v2.16b 214 bl aes_decrypt_block4x 215 sub x1, x1, #16 216 eor v0.16b, v0.16b, cbciv.16b 217 eor v1.16b, v1.16b, v4.16b 218 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ 219 eor v2.16b, v2.16b, v5.16b 220 eor v3.16b, v3.16b, v6.16b 221#endif 222 st1 {v0.16b-v3.16b}, [x0], #64 223ST5( st1 {v4.16b}, [x0], #16 ) 224 b .LcbcdecloopNx 225.Lcbcdec1x: 226 adds w4, w4, #MAX_STRIDE 227 beq .Lcbcdecout 228.Lcbcdecloop: 229 ld1 {v1.16b}, [x1], #16 /* get next ct block */ 230 mov v0.16b, v1.16b /* ...and copy to v0 */ 231 decrypt_block v0, w3, x2, x6, w7 232 eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */ 233 mov cbciv.16b, v1.16b /* ct is next iv */ 234 st1 {v0.16b}, [x0], #16 235 subs w4, w4, #1 236 bne .Lcbcdecloop 237.Lcbcdecout: 238 st1 {cbciv.16b}, [x5] /* return iv */ 239 ldp x29, x30, [sp], #16 240 ret 241AES_FUNC_END(aes_cbc_decrypt) 242AES_FUNC_END(aes_essiv_cbc_decrypt) 243 244 245 /* 246 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[], 247 * int rounds, int bytes, u8 const iv[]) 248 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], 249 * int rounds, int bytes, u8 const iv[]) 250 */ 251 252AES_FUNC_START(aes_cbc_cts_encrypt) 253 adr_l x8, .Lcts_permute_table 254 sub x4, x4, #16 255 add x9, x8, #32 256 add x8, x8, x4 257 sub x9, x9, x4 258 ld1 {v3.16b}, [x8] 259 ld1 {v4.16b}, [x9] 260 261 ld1 {v0.16b}, [x1], x4 /* overlapping loads */ 262 ld1 {v1.16b}, [x1] 263 264 ld1 {v5.16b}, [x5] /* get iv */ 265 enc_prepare w3, x2, x6 266 267 eor v0.16b, v0.16b, v5.16b /* xor with iv */ 268 tbl v1.16b, {v1.16b}, v4.16b 269 encrypt_block v0, w3, x2, x6, w7 270 271 eor v1.16b, v1.16b, v0.16b 272 tbl v0.16b, {v0.16b}, v3.16b 273 encrypt_block v1, w3, x2, x6, w7 274 275 add x4, x0, x4 276 st1 {v0.16b}, [x4] /* overlapping stores */ 277 st1 {v1.16b}, [x0] 278 ret 279AES_FUNC_END(aes_cbc_cts_encrypt) 280 281AES_FUNC_START(aes_cbc_cts_decrypt) 282 adr_l x8, .Lcts_permute_table 283 sub x4, x4, #16 284 add x9, x8, #32 285 add x8, x8, x4 286 sub x9, x9, x4 287 ld1 {v3.16b}, [x8] 288 ld1 {v4.16b}, [x9] 289 290 ld1 {v0.16b}, [x1], x4 /* overlapping loads */ 291 ld1 {v1.16b}, [x1] 292 293 ld1 {v5.16b}, [x5] /* get iv */ 294 dec_prepare w3, x2, x6 295 296 decrypt_block v0, w3, x2, x6, w7 297 tbl v2.16b, {v0.16b}, v3.16b 298 eor v2.16b, v2.16b, v1.16b 299 300 tbx v0.16b, {v1.16b}, v4.16b 301 decrypt_block v0, w3, x2, x6, w7 302 eor v0.16b, v0.16b, v5.16b /* xor with iv */ 303 304 add x4, x0, x4 305 st1 {v2.16b}, [x4] /* overlapping stores */ 306 st1 {v0.16b}, [x0] 307 ret 308AES_FUNC_END(aes_cbc_cts_decrypt) 309 310 .section ".rodata", "a" 311 .align 6 312.Lcts_permute_table: 313 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 314 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 315 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 316 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 317 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 318 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 319 .previous 320 321 322 /* 323 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 324 * int bytes, u8 ctr[], u8 finalbuf[]) 325 */ 326 327AES_FUNC_START(aes_ctr_encrypt) 328 stp x29, x30, [sp, #-16]! 329 mov x29, sp 330 331 enc_prepare w3, x2, x12 332 ld1 {vctr.16b}, [x5] 333 334 umov x12, vctr.d[1] /* keep swabbed ctr in reg */ 335 rev x12, x12 336 337.LctrloopNx: 338 add w7, w4, #15 339 sub w4, w4, #MAX_STRIDE << 4 340 lsr w7, w7, #4 341 mov w8, #MAX_STRIDE 342 cmp w7, w8 343 csel w7, w7, w8, lt 344 adds x12, x12, x7 345 346 mov v0.16b, vctr.16b 347 mov v1.16b, vctr.16b 348 mov v2.16b, vctr.16b 349 mov v3.16b, vctr.16b 350ST5( mov v4.16b, vctr.16b ) 351 bcs 0f 352 353 .subsection 1 354 /* apply carry to outgoing counter */ 3550: umov x8, vctr.d[0] 356 rev x8, x8 357 add x8, x8, #1 358 rev x8, x8 359 ins vctr.d[0], x8 360 361 /* apply carry to N counter blocks for N := x12 */ 362 adr x16, 1f 363 sub x16, x16, x12, lsl #3 364 br x16 365 hint 34 // bti c 366 mov v0.d[0], vctr.d[0] 367 hint 34 // bti c 368 mov v1.d[0], vctr.d[0] 369 hint 34 // bti c 370 mov v2.d[0], vctr.d[0] 371 hint 34 // bti c 372 mov v3.d[0], vctr.d[0] 373ST5( hint 34 ) 374ST5( mov v4.d[0], vctr.d[0] ) 3751: b 2f 376 .previous 377 3782: rev x7, x12 379 ins vctr.d[1], x7 380 sub x7, x12, #MAX_STRIDE - 1 381 sub x8, x12, #MAX_STRIDE - 2 382 sub x9, x12, #MAX_STRIDE - 3 383 rev x7, x7 384 rev x8, x8 385 mov v1.d[1], x7 386 rev x9, x9 387ST5( sub x10, x12, #MAX_STRIDE - 4 ) 388 mov v2.d[1], x8 389ST5( rev x10, x10 ) 390 mov v3.d[1], x9 391ST5( mov v4.d[1], x10 ) 392 tbnz w4, #31, .Lctrtail 393 ld1 {v5.16b-v7.16b}, [x1], #48 394ST4( bl aes_encrypt_block4x ) 395ST5( bl aes_encrypt_block5x ) 396 eor v0.16b, v5.16b, v0.16b 397ST4( ld1 {v5.16b}, [x1], #16 ) 398 eor v1.16b, v6.16b, v1.16b 399ST5( ld1 {v5.16b-v6.16b}, [x1], #32 ) 400 eor v2.16b, v7.16b, v2.16b 401 eor v3.16b, v5.16b, v3.16b 402ST5( eor v4.16b, v6.16b, v4.16b ) 403 st1 {v0.16b-v3.16b}, [x0], #64 404ST5( st1 {v4.16b}, [x0], #16 ) 405 cbz w4, .Lctrout 406 b .LctrloopNx 407 408.Lctrout: 409 st1 {vctr.16b}, [x5] /* return next CTR value */ 410 ldp x29, x30, [sp], #16 411 ret 412 413.Lctrtail: 414 /* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */ 415 mov x16, #16 416 ands x13, x4, #0xf 417 csel x13, x13, x16, ne 418 419ST5( cmp w4, #64 - (MAX_STRIDE << 4) ) 420ST5( csel x14, x16, xzr, gt ) 421 cmp w4, #48 - (MAX_STRIDE << 4) 422 csel x15, x16, xzr, gt 423 cmp w4, #32 - (MAX_STRIDE << 4) 424 csel x16, x16, xzr, gt 425 cmp w4, #16 - (MAX_STRIDE << 4) 426 ble .Lctrtail1x 427 428 adr_l x12, .Lcts_permute_table 429 add x12, x12, x13 430 431ST5( ld1 {v5.16b}, [x1], x14 ) 432 ld1 {v6.16b}, [x1], x15 433 ld1 {v7.16b}, [x1], x16 434 435ST4( bl aes_encrypt_block4x ) 436ST5( bl aes_encrypt_block5x ) 437 438 ld1 {v8.16b}, [x1], x13 439 ld1 {v9.16b}, [x1] 440 ld1 {v10.16b}, [x12] 441 442ST4( eor v6.16b, v6.16b, v0.16b ) 443ST4( eor v7.16b, v7.16b, v1.16b ) 444ST4( tbl v3.16b, {v3.16b}, v10.16b ) 445ST4( eor v8.16b, v8.16b, v2.16b ) 446ST4( eor v9.16b, v9.16b, v3.16b ) 447 448ST5( eor v5.16b, v5.16b, v0.16b ) 449ST5( eor v6.16b, v6.16b, v1.16b ) 450ST5( tbl v4.16b, {v4.16b}, v10.16b ) 451ST5( eor v7.16b, v7.16b, v2.16b ) 452ST5( eor v8.16b, v8.16b, v3.16b ) 453ST5( eor v9.16b, v9.16b, v4.16b ) 454 455ST5( st1 {v5.16b}, [x0], x14 ) 456 st1 {v6.16b}, [x0], x15 457 st1 {v7.16b}, [x0], x16 458 add x13, x13, x0 459 st1 {v9.16b}, [x13] // overlapping stores 460 st1 {v8.16b}, [x0] 461 b .Lctrout 462 463.Lctrtail1x: 464 csel x0, x0, x6, eq // use finalbuf if less than a full block 465 ld1 {v5.16b}, [x1] 466ST5( mov v3.16b, v4.16b ) 467 encrypt_block v3, w3, x2, x8, w7 468 eor v5.16b, v5.16b, v3.16b 469 st1 {v5.16b}, [x0] 470 b .Lctrout 471AES_FUNC_END(aes_ctr_encrypt) 472 473 474 /* 475 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 476 * int bytes, u8 const rk2[], u8 iv[], int first) 477 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 478 * int bytes, u8 const rk2[], u8 iv[], int first) 479 */ 480 481 .macro next_tweak, out, in, tmp 482 sshr \tmp\().2d, \in\().2d, #63 483 and \tmp\().16b, \tmp\().16b, xtsmask.16b 484 add \out\().2d, \in\().2d, \in\().2d 485 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 486 eor \out\().16b, \out\().16b, \tmp\().16b 487 .endm 488 489 .macro xts_load_mask, tmp 490 movi xtsmask.2s, #0x1 491 movi \tmp\().2s, #0x87 492 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s 493 .endm 494 495AES_FUNC_START(aes_xts_encrypt) 496 stp x29, x30, [sp, #-16]! 497 mov x29, sp 498 499 ld1 {v4.16b}, [x6] 500 xts_load_mask v8 501 cbz w7, .Lxtsencnotfirst 502 503 enc_prepare w3, x5, x8 504 xts_cts_skip_tw w7, .LxtsencNx 505 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 506 enc_switch_key w3, x2, x8 507 b .LxtsencNx 508 509.Lxtsencnotfirst: 510 enc_prepare w3, x2, x8 511.LxtsencloopNx: 512 next_tweak v4, v4, v8 513.LxtsencNx: 514 subs w4, w4, #64 515 bmi .Lxtsenc1x 516 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 517 next_tweak v5, v4, v8 518 eor v0.16b, v0.16b, v4.16b 519 next_tweak v6, v5, v8 520 eor v1.16b, v1.16b, v5.16b 521 eor v2.16b, v2.16b, v6.16b 522 next_tweak v7, v6, v8 523 eor v3.16b, v3.16b, v7.16b 524 bl aes_encrypt_block4x 525 eor v3.16b, v3.16b, v7.16b 526 eor v0.16b, v0.16b, v4.16b 527 eor v1.16b, v1.16b, v5.16b 528 eor v2.16b, v2.16b, v6.16b 529 st1 {v0.16b-v3.16b}, [x0], #64 530 mov v4.16b, v7.16b 531 cbz w4, .Lxtsencret 532 xts_reload_mask v8 533 b .LxtsencloopNx 534.Lxtsenc1x: 535 adds w4, w4, #64 536 beq .Lxtsencout 537 subs w4, w4, #16 538 bmi .LxtsencctsNx 539.Lxtsencloop: 540 ld1 {v0.16b}, [x1], #16 541.Lxtsencctsout: 542 eor v0.16b, v0.16b, v4.16b 543 encrypt_block v0, w3, x2, x8, w7 544 eor v0.16b, v0.16b, v4.16b 545 cbz w4, .Lxtsencout 546 subs w4, w4, #16 547 next_tweak v4, v4, v8 548 bmi .Lxtsenccts 549 st1 {v0.16b}, [x0], #16 550 b .Lxtsencloop 551.Lxtsencout: 552 st1 {v0.16b}, [x0] 553.Lxtsencret: 554 st1 {v4.16b}, [x6] 555 ldp x29, x30, [sp], #16 556 ret 557 558.LxtsencctsNx: 559 mov v0.16b, v3.16b 560 sub x0, x0, #16 561.Lxtsenccts: 562 adr_l x8, .Lcts_permute_table 563 564 add x1, x1, w4, sxtw /* rewind input pointer */ 565 add w4, w4, #16 /* # bytes in final block */ 566 add x9, x8, #32 567 add x8, x8, x4 568 sub x9, x9, x4 569 add x4, x0, x4 /* output address of final block */ 570 571 ld1 {v1.16b}, [x1] /* load final block */ 572 ld1 {v2.16b}, [x8] 573 ld1 {v3.16b}, [x9] 574 575 tbl v2.16b, {v0.16b}, v2.16b 576 tbx v0.16b, {v1.16b}, v3.16b 577 st1 {v2.16b}, [x4] /* overlapping stores */ 578 mov w4, wzr 579 b .Lxtsencctsout 580AES_FUNC_END(aes_xts_encrypt) 581 582AES_FUNC_START(aes_xts_decrypt) 583 stp x29, x30, [sp, #-16]! 584 mov x29, sp 585 586 /* subtract 16 bytes if we are doing CTS */ 587 sub w8, w4, #0x10 588 tst w4, #0xf 589 csel w4, w4, w8, eq 590 591 ld1 {v4.16b}, [x6] 592 xts_load_mask v8 593 xts_cts_skip_tw w7, .Lxtsdecskiptw 594 cbz w7, .Lxtsdecnotfirst 595 596 enc_prepare w3, x5, x8 597 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 598.Lxtsdecskiptw: 599 dec_prepare w3, x2, x8 600 b .LxtsdecNx 601 602.Lxtsdecnotfirst: 603 dec_prepare w3, x2, x8 604.LxtsdecloopNx: 605 next_tweak v4, v4, v8 606.LxtsdecNx: 607 subs w4, w4, #64 608 bmi .Lxtsdec1x 609 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 610 next_tweak v5, v4, v8 611 eor v0.16b, v0.16b, v4.16b 612 next_tweak v6, v5, v8 613 eor v1.16b, v1.16b, v5.16b 614 eor v2.16b, v2.16b, v6.16b 615 next_tweak v7, v6, v8 616 eor v3.16b, v3.16b, v7.16b 617 bl aes_decrypt_block4x 618 eor v3.16b, v3.16b, v7.16b 619 eor v0.16b, v0.16b, v4.16b 620 eor v1.16b, v1.16b, v5.16b 621 eor v2.16b, v2.16b, v6.16b 622 st1 {v0.16b-v3.16b}, [x0], #64 623 mov v4.16b, v7.16b 624 cbz w4, .Lxtsdecout 625 xts_reload_mask v8 626 b .LxtsdecloopNx 627.Lxtsdec1x: 628 adds w4, w4, #64 629 beq .Lxtsdecout 630 subs w4, w4, #16 631.Lxtsdecloop: 632 ld1 {v0.16b}, [x1], #16 633 bmi .Lxtsdeccts 634.Lxtsdecctsout: 635 eor v0.16b, v0.16b, v4.16b 636 decrypt_block v0, w3, x2, x8, w7 637 eor v0.16b, v0.16b, v4.16b 638 st1 {v0.16b}, [x0], #16 639 cbz w4, .Lxtsdecout 640 subs w4, w4, #16 641 next_tweak v4, v4, v8 642 b .Lxtsdecloop 643.Lxtsdecout: 644 st1 {v4.16b}, [x6] 645 ldp x29, x30, [sp], #16 646 ret 647 648.Lxtsdeccts: 649 adr_l x8, .Lcts_permute_table 650 651 add x1, x1, w4, sxtw /* rewind input pointer */ 652 add w4, w4, #16 /* # bytes in final block */ 653 add x9, x8, #32 654 add x8, x8, x4 655 sub x9, x9, x4 656 add x4, x0, x4 /* output address of final block */ 657 658 next_tweak v5, v4, v8 659 660 ld1 {v1.16b}, [x1] /* load final block */ 661 ld1 {v2.16b}, [x8] 662 ld1 {v3.16b}, [x9] 663 664 eor v0.16b, v0.16b, v5.16b 665 decrypt_block v0, w3, x2, x8, w7 666 eor v0.16b, v0.16b, v5.16b 667 668 tbl v2.16b, {v0.16b}, v2.16b 669 tbx v0.16b, {v1.16b}, v3.16b 670 671 st1 {v2.16b}, [x4] /* overlapping stores */ 672 mov w4, wzr 673 b .Lxtsdecctsout 674AES_FUNC_END(aes_xts_decrypt) 675 676 /* 677 * aes_mac_update(u8 const in[], u32 const rk[], int rounds, 678 * int blocks, u8 dg[], int enc_before, int enc_after) 679 */ 680AES_FUNC_START(aes_mac_update) 681 ld1 {v0.16b}, [x4] /* get dg */ 682 enc_prepare w2, x1, x7 683 cbz w5, .Lmacloop4x 684 685 encrypt_block v0, w2, x1, x7, w8 686 687.Lmacloop4x: 688 subs w3, w3, #4 689 bmi .Lmac1x 690 ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */ 691 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 692 encrypt_block v0, w2, x1, x7, w8 693 eor v0.16b, v0.16b, v2.16b 694 encrypt_block v0, w2, x1, x7, w8 695 eor v0.16b, v0.16b, v3.16b 696 encrypt_block v0, w2, x1, x7, w8 697 eor v0.16b, v0.16b, v4.16b 698 cmp w3, wzr 699 csinv x5, x6, xzr, eq 700 cbz w5, .Lmacout 701 encrypt_block v0, w2, x1, x7, w8 702 st1 {v0.16b}, [x4] /* return dg */ 703 cond_yield .Lmacout, x7 704 b .Lmacloop4x 705.Lmac1x: 706 add w3, w3, #4 707.Lmacloop: 708 cbz w3, .Lmacout 709 ld1 {v1.16b}, [x0], #16 /* get next pt block */ 710 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 711 712 subs w3, w3, #1 713 csinv x5, x6, xzr, eq 714 cbz w5, .Lmacout 715 716.Lmacenc: 717 encrypt_block v0, w2, x1, x7, w8 718 b .Lmacloop 719 720.Lmacout: 721 st1 {v0.16b}, [x4] /* return dg */ 722 mov w0, w3 723 ret 724AES_FUNC_END(aes_mac_update) 725