1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES 4 * 5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 6 */ 7 8/* included by aes-ce.S and aes-neon.S */ 9 10 .text 11 .align 4 12 13aes_encrypt_block4x: 14 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 15 ret 16ENDPROC(aes_encrypt_block4x) 17 18aes_decrypt_block4x: 19 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 20 ret 21ENDPROC(aes_decrypt_block4x) 22 23 /* 24 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 25 * int blocks) 26 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 27 * int blocks) 28 */ 29 30AES_ENTRY(aes_ecb_encrypt) 31 stp x29, x30, [sp, #-16]! 32 mov x29, sp 33 34 enc_prepare w3, x2, x5 35 36.LecbencloopNx: 37 subs w4, w4, #4 38 bmi .Lecbenc1x 39 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 40 bl aes_encrypt_block4x 41 st1 {v0.16b-v3.16b}, [x0], #64 42 b .LecbencloopNx 43.Lecbenc1x: 44 adds w4, w4, #4 45 beq .Lecbencout 46.Lecbencloop: 47 ld1 {v0.16b}, [x1], #16 /* get next pt block */ 48 encrypt_block v0, w3, x2, x5, w6 49 st1 {v0.16b}, [x0], #16 50 subs w4, w4, #1 51 bne .Lecbencloop 52.Lecbencout: 53 ldp x29, x30, [sp], #16 54 ret 55AES_ENDPROC(aes_ecb_encrypt) 56 57 58AES_ENTRY(aes_ecb_decrypt) 59 stp x29, x30, [sp, #-16]! 60 mov x29, sp 61 62 dec_prepare w3, x2, x5 63 64.LecbdecloopNx: 65 subs w4, w4, #4 66 bmi .Lecbdec1x 67 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 68 bl aes_decrypt_block4x 69 st1 {v0.16b-v3.16b}, [x0], #64 70 b .LecbdecloopNx 71.Lecbdec1x: 72 adds w4, w4, #4 73 beq .Lecbdecout 74.Lecbdecloop: 75 ld1 {v0.16b}, [x1], #16 /* get next ct block */ 76 decrypt_block v0, w3, x2, x5, w6 77 st1 {v0.16b}, [x0], #16 78 subs w4, w4, #1 79 bne .Lecbdecloop 80.Lecbdecout: 81 ldp x29, x30, [sp], #16 82 ret 83AES_ENDPROC(aes_ecb_decrypt) 84 85 86 /* 87 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 88 * int blocks, u8 iv[]) 89 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 90 * int blocks, u8 iv[]) 91 */ 92 93AES_ENTRY(aes_cbc_encrypt) 94 ld1 {v4.16b}, [x5] /* get iv */ 95 enc_prepare w3, x2, x6 96 97.Lcbcencloop4x: 98 subs w4, w4, #4 99 bmi .Lcbcenc1x 100 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 101 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ 102 encrypt_block v0, w3, x2, x6, w7 103 eor v1.16b, v1.16b, v0.16b 104 encrypt_block v1, w3, x2, x6, w7 105 eor v2.16b, v2.16b, v1.16b 106 encrypt_block v2, w3, x2, x6, w7 107 eor v3.16b, v3.16b, v2.16b 108 encrypt_block v3, w3, x2, x6, w7 109 st1 {v0.16b-v3.16b}, [x0], #64 110 mov v4.16b, v3.16b 111 b .Lcbcencloop4x 112.Lcbcenc1x: 113 adds w4, w4, #4 114 beq .Lcbcencout 115.Lcbcencloop: 116 ld1 {v0.16b}, [x1], #16 /* get next pt block */ 117 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ 118 encrypt_block v4, w3, x2, x6, w7 119 st1 {v4.16b}, [x0], #16 120 subs w4, w4, #1 121 bne .Lcbcencloop 122.Lcbcencout: 123 st1 {v4.16b}, [x5] /* return iv */ 124 ret 125AES_ENDPROC(aes_cbc_encrypt) 126 127 128AES_ENTRY(aes_cbc_decrypt) 129 stp x29, x30, [sp, #-16]! 130 mov x29, sp 131 132 ld1 {v7.16b}, [x5] /* get iv */ 133 dec_prepare w3, x2, x6 134 135.LcbcdecloopNx: 136 subs w4, w4, #4 137 bmi .Lcbcdec1x 138 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 139 mov v4.16b, v0.16b 140 mov v5.16b, v1.16b 141 mov v6.16b, v2.16b 142 bl aes_decrypt_block4x 143 sub x1, x1, #16 144 eor v0.16b, v0.16b, v7.16b 145 eor v1.16b, v1.16b, v4.16b 146 ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */ 147 eor v2.16b, v2.16b, v5.16b 148 eor v3.16b, v3.16b, v6.16b 149 st1 {v0.16b-v3.16b}, [x0], #64 150 b .LcbcdecloopNx 151.Lcbcdec1x: 152 adds w4, w4, #4 153 beq .Lcbcdecout 154.Lcbcdecloop: 155 ld1 {v1.16b}, [x1], #16 /* get next ct block */ 156 mov v0.16b, v1.16b /* ...and copy to v0 */ 157 decrypt_block v0, w3, x2, x6, w7 158 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ 159 mov v7.16b, v1.16b /* ct is next iv */ 160 st1 {v0.16b}, [x0], #16 161 subs w4, w4, #1 162 bne .Lcbcdecloop 163.Lcbcdecout: 164 st1 {v7.16b}, [x5] /* return iv */ 165 ldp x29, x30, [sp], #16 166 ret 167AES_ENDPROC(aes_cbc_decrypt) 168 169 170 /* 171 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[], 172 * int rounds, int bytes, u8 const iv[]) 173 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], 174 * int rounds, int bytes, u8 const iv[]) 175 */ 176 177AES_ENTRY(aes_cbc_cts_encrypt) 178 adr_l x8, .Lcts_permute_table 179 sub x4, x4, #16 180 add x9, x8, #32 181 add x8, x8, x4 182 sub x9, x9, x4 183 ld1 {v3.16b}, [x8] 184 ld1 {v4.16b}, [x9] 185 186 ld1 {v0.16b}, [x1], x4 /* overlapping loads */ 187 ld1 {v1.16b}, [x1] 188 189 ld1 {v5.16b}, [x5] /* get iv */ 190 enc_prepare w3, x2, x6 191 192 eor v0.16b, v0.16b, v5.16b /* xor with iv */ 193 tbl v1.16b, {v1.16b}, v4.16b 194 encrypt_block v0, w3, x2, x6, w7 195 196 eor v1.16b, v1.16b, v0.16b 197 tbl v0.16b, {v0.16b}, v3.16b 198 encrypt_block v1, w3, x2, x6, w7 199 200 add x4, x0, x4 201 st1 {v0.16b}, [x4] /* overlapping stores */ 202 st1 {v1.16b}, [x0] 203 ret 204AES_ENDPROC(aes_cbc_cts_encrypt) 205 206AES_ENTRY(aes_cbc_cts_decrypt) 207 adr_l x8, .Lcts_permute_table 208 sub x4, x4, #16 209 add x9, x8, #32 210 add x8, x8, x4 211 sub x9, x9, x4 212 ld1 {v3.16b}, [x8] 213 ld1 {v4.16b}, [x9] 214 215 ld1 {v0.16b}, [x1], x4 /* overlapping loads */ 216 ld1 {v1.16b}, [x1] 217 218 ld1 {v5.16b}, [x5] /* get iv */ 219 dec_prepare w3, x2, x6 220 221 tbl v2.16b, {v1.16b}, v4.16b 222 decrypt_block v0, w3, x2, x6, w7 223 eor v2.16b, v2.16b, v0.16b 224 225 tbx v0.16b, {v1.16b}, v4.16b 226 tbl v2.16b, {v2.16b}, v3.16b 227 decrypt_block v0, w3, x2, x6, w7 228 eor v0.16b, v0.16b, v5.16b /* xor with iv */ 229 230 add x4, x0, x4 231 st1 {v2.16b}, [x4] /* overlapping stores */ 232 st1 {v0.16b}, [x0] 233 ret 234AES_ENDPROC(aes_cbc_cts_decrypt) 235 236 .section ".rodata", "a" 237 .align 6 238.Lcts_permute_table: 239 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 240 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 241 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 242 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 243 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 244 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 245 .previous 246 247 248 /* 249 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 250 * int blocks, u8 ctr[]) 251 */ 252 253AES_ENTRY(aes_ctr_encrypt) 254 stp x29, x30, [sp, #-16]! 255 mov x29, sp 256 257 enc_prepare w3, x2, x6 258 ld1 {v4.16b}, [x5] 259 260 umov x6, v4.d[1] /* keep swabbed ctr in reg */ 261 rev x6, x6 262 cmn w6, w4 /* 32 bit overflow? */ 263 bcs .Lctrloop 264.LctrloopNx: 265 subs w4, w4, #4 266 bmi .Lctr1x 267 add w7, w6, #1 268 mov v0.16b, v4.16b 269 add w8, w6, #2 270 mov v1.16b, v4.16b 271 add w9, w6, #3 272 mov v2.16b, v4.16b 273 rev w7, w7 274 mov v3.16b, v4.16b 275 rev w8, w8 276 mov v1.s[3], w7 277 rev w9, w9 278 mov v2.s[3], w8 279 mov v3.s[3], w9 280 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ 281 bl aes_encrypt_block4x 282 eor v0.16b, v5.16b, v0.16b 283 ld1 {v5.16b}, [x1], #16 /* get 1 input block */ 284 eor v1.16b, v6.16b, v1.16b 285 eor v2.16b, v7.16b, v2.16b 286 eor v3.16b, v5.16b, v3.16b 287 st1 {v0.16b-v3.16b}, [x0], #64 288 add x6, x6, #4 289 rev x7, x6 290 ins v4.d[1], x7 291 cbz w4, .Lctrout 292 b .LctrloopNx 293.Lctr1x: 294 adds w4, w4, #4 295 beq .Lctrout 296.Lctrloop: 297 mov v0.16b, v4.16b 298 encrypt_block v0, w3, x2, x8, w7 299 300 adds x6, x6, #1 /* increment BE ctr */ 301 rev x7, x6 302 ins v4.d[1], x7 303 bcs .Lctrcarry /* overflow? */ 304 305.Lctrcarrydone: 306 subs w4, w4, #1 307 bmi .Lctrtailblock /* blocks <0 means tail block */ 308 ld1 {v3.16b}, [x1], #16 309 eor v3.16b, v0.16b, v3.16b 310 st1 {v3.16b}, [x0], #16 311 bne .Lctrloop 312 313.Lctrout: 314 st1 {v4.16b}, [x5] /* return next CTR value */ 315 ldp x29, x30, [sp], #16 316 ret 317 318.Lctrtailblock: 319 st1 {v0.16b}, [x0] 320 b .Lctrout 321 322.Lctrcarry: 323 umov x7, v4.d[0] /* load upper word of ctr */ 324 rev x7, x7 /* ... to handle the carry */ 325 add x7, x7, #1 326 rev x7, x7 327 ins v4.d[0], x7 328 b .Lctrcarrydone 329AES_ENDPROC(aes_ctr_encrypt) 330 331 332 /* 333 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 334 * int blocks, u8 const rk2[], u8 iv[], int first) 335 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 336 * int blocks, u8 const rk2[], u8 iv[], int first) 337 */ 338 339 .macro next_tweak, out, in, tmp 340 sshr \tmp\().2d, \in\().2d, #63 341 and \tmp\().16b, \tmp\().16b, xtsmask.16b 342 add \out\().2d, \in\().2d, \in\().2d 343 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 344 eor \out\().16b, \out\().16b, \tmp\().16b 345 .endm 346 347 .macro xts_load_mask, tmp 348 movi xtsmask.2s, #0x1 349 movi \tmp\().2s, #0x87 350 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s 351 .endm 352 353AES_ENTRY(aes_xts_encrypt) 354 stp x29, x30, [sp, #-16]! 355 mov x29, sp 356 357 ld1 {v4.16b}, [x6] 358 xts_load_mask v8 359 cbz w7, .Lxtsencnotfirst 360 361 enc_prepare w3, x5, x8 362 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 363 enc_switch_key w3, x2, x8 364 b .LxtsencNx 365 366.Lxtsencnotfirst: 367 enc_prepare w3, x2, x8 368.LxtsencloopNx: 369 next_tweak v4, v4, v8 370.LxtsencNx: 371 subs w4, w4, #4 372 bmi .Lxtsenc1x 373 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 374 next_tweak v5, v4, v8 375 eor v0.16b, v0.16b, v4.16b 376 next_tweak v6, v5, v8 377 eor v1.16b, v1.16b, v5.16b 378 eor v2.16b, v2.16b, v6.16b 379 next_tweak v7, v6, v8 380 eor v3.16b, v3.16b, v7.16b 381 bl aes_encrypt_block4x 382 eor v3.16b, v3.16b, v7.16b 383 eor v0.16b, v0.16b, v4.16b 384 eor v1.16b, v1.16b, v5.16b 385 eor v2.16b, v2.16b, v6.16b 386 st1 {v0.16b-v3.16b}, [x0], #64 387 mov v4.16b, v7.16b 388 cbz w4, .Lxtsencout 389 xts_reload_mask v8 390 b .LxtsencloopNx 391.Lxtsenc1x: 392 adds w4, w4, #4 393 beq .Lxtsencout 394.Lxtsencloop: 395 ld1 {v1.16b}, [x1], #16 396 eor v0.16b, v1.16b, v4.16b 397 encrypt_block v0, w3, x2, x8, w7 398 eor v0.16b, v0.16b, v4.16b 399 st1 {v0.16b}, [x0], #16 400 subs w4, w4, #1 401 beq .Lxtsencout 402 next_tweak v4, v4, v8 403 b .Lxtsencloop 404.Lxtsencout: 405 st1 {v4.16b}, [x6] 406 ldp x29, x30, [sp], #16 407 ret 408AES_ENDPROC(aes_xts_encrypt) 409 410 411AES_ENTRY(aes_xts_decrypt) 412 stp x29, x30, [sp, #-16]! 413 mov x29, sp 414 415 ld1 {v4.16b}, [x6] 416 xts_load_mask v8 417 cbz w7, .Lxtsdecnotfirst 418 419 enc_prepare w3, x5, x8 420 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 421 dec_prepare w3, x2, x8 422 b .LxtsdecNx 423 424.Lxtsdecnotfirst: 425 dec_prepare w3, x2, x8 426.LxtsdecloopNx: 427 next_tweak v4, v4, v8 428.LxtsdecNx: 429 subs w4, w4, #4 430 bmi .Lxtsdec1x 431 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 432 next_tweak v5, v4, v8 433 eor v0.16b, v0.16b, v4.16b 434 next_tweak v6, v5, v8 435 eor v1.16b, v1.16b, v5.16b 436 eor v2.16b, v2.16b, v6.16b 437 next_tweak v7, v6, v8 438 eor v3.16b, v3.16b, v7.16b 439 bl aes_decrypt_block4x 440 eor v3.16b, v3.16b, v7.16b 441 eor v0.16b, v0.16b, v4.16b 442 eor v1.16b, v1.16b, v5.16b 443 eor v2.16b, v2.16b, v6.16b 444 st1 {v0.16b-v3.16b}, [x0], #64 445 mov v4.16b, v7.16b 446 cbz w4, .Lxtsdecout 447 xts_reload_mask v8 448 b .LxtsdecloopNx 449.Lxtsdec1x: 450 adds w4, w4, #4 451 beq .Lxtsdecout 452.Lxtsdecloop: 453 ld1 {v1.16b}, [x1], #16 454 eor v0.16b, v1.16b, v4.16b 455 decrypt_block v0, w3, x2, x8, w7 456 eor v0.16b, v0.16b, v4.16b 457 st1 {v0.16b}, [x0], #16 458 subs w4, w4, #1 459 beq .Lxtsdecout 460 next_tweak v4, v4, v8 461 b .Lxtsdecloop 462.Lxtsdecout: 463 st1 {v4.16b}, [x6] 464 ldp x29, x30, [sp], #16 465 ret 466AES_ENDPROC(aes_xts_decrypt) 467 468 /* 469 * aes_mac_update(u8 const in[], u32 const rk[], int rounds, 470 * int blocks, u8 dg[], int enc_before, int enc_after) 471 */ 472AES_ENTRY(aes_mac_update) 473 frame_push 6 474 475 mov x19, x0 476 mov x20, x1 477 mov x21, x2 478 mov x22, x3 479 mov x23, x4 480 mov x24, x6 481 482 ld1 {v0.16b}, [x23] /* get dg */ 483 enc_prepare w2, x1, x7 484 cbz w5, .Lmacloop4x 485 486 encrypt_block v0, w2, x1, x7, w8 487 488.Lmacloop4x: 489 subs w22, w22, #4 490 bmi .Lmac1x 491 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */ 492 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 493 encrypt_block v0, w21, x20, x7, w8 494 eor v0.16b, v0.16b, v2.16b 495 encrypt_block v0, w21, x20, x7, w8 496 eor v0.16b, v0.16b, v3.16b 497 encrypt_block v0, w21, x20, x7, w8 498 eor v0.16b, v0.16b, v4.16b 499 cmp w22, wzr 500 csinv x5, x24, xzr, eq 501 cbz w5, .Lmacout 502 encrypt_block v0, w21, x20, x7, w8 503 st1 {v0.16b}, [x23] /* return dg */ 504 cond_yield_neon .Lmacrestart 505 b .Lmacloop4x 506.Lmac1x: 507 add w22, w22, #4 508.Lmacloop: 509 cbz w22, .Lmacout 510 ld1 {v1.16b}, [x19], #16 /* get next pt block */ 511 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 512 513 subs w22, w22, #1 514 csinv x5, x24, xzr, eq 515 cbz w5, .Lmacout 516 517.Lmacenc: 518 encrypt_block v0, w21, x20, x7, w8 519 b .Lmacloop 520 521.Lmacout: 522 st1 {v0.16b}, [x23] /* return dg */ 523 frame_pop 524 ret 525 526.Lmacrestart: 527 ld1 {v0.16b}, [x23] /* get dg */ 528 enc_prepare w21, x20, x0 529 b .Lmacloop4x 530AES_ENDPROC(aes_mac_update) 531