1/* 2 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES 3 * 4 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11/* included by aes-ce.S and aes-neon.S */ 12 13 .text 14 .align 4 15 16aes_encrypt_block4x: 17 encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7 18 ret 19ENDPROC(aes_encrypt_block4x) 20 21aes_decrypt_block4x: 22 decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7 23 ret 24ENDPROC(aes_decrypt_block4x) 25 26 /* 27 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 28 * int blocks) 29 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 30 * int blocks) 31 */ 32 33AES_ENTRY(aes_ecb_encrypt) 34 frame_push 5 35 36 mov x19, x0 37 mov x20, x1 38 mov x21, x2 39 mov x22, x3 40 mov x23, x4 41 42.Lecbencrestart: 43 enc_prepare w22, x21, x5 44 45.LecbencloopNx: 46 subs w23, w23, #4 47 bmi .Lecbenc1x 48 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */ 49 bl aes_encrypt_block4x 50 st1 {v0.16b-v3.16b}, [x19], #64 51 cond_yield_neon .Lecbencrestart 52 b .LecbencloopNx 53.Lecbenc1x: 54 adds w23, w23, #4 55 beq .Lecbencout 56.Lecbencloop: 57 ld1 {v0.16b}, [x20], #16 /* get next pt block */ 58 encrypt_block v0, w22, x21, x5, w6 59 st1 {v0.16b}, [x19], #16 60 subs w23, w23, #1 61 bne .Lecbencloop 62.Lecbencout: 63 frame_pop 64 ret 65AES_ENDPROC(aes_ecb_encrypt) 66 67 68AES_ENTRY(aes_ecb_decrypt) 69 frame_push 5 70 71 mov x19, x0 72 mov x20, x1 73 mov x21, x2 74 mov x22, x3 75 mov x23, x4 76 77.Lecbdecrestart: 78 dec_prepare w22, x21, x5 79 80.LecbdecloopNx: 81 subs w23, w23, #4 82 bmi .Lecbdec1x 83 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */ 84 bl aes_decrypt_block4x 85 st1 {v0.16b-v3.16b}, [x19], #64 86 cond_yield_neon .Lecbdecrestart 87 b .LecbdecloopNx 88.Lecbdec1x: 89 adds w23, w23, #4 90 beq .Lecbdecout 91.Lecbdecloop: 92 ld1 {v0.16b}, [x20], #16 /* get next ct block */ 93 decrypt_block v0, w22, x21, x5, w6 94 st1 {v0.16b}, [x19], #16 95 subs w23, w23, #1 96 bne .Lecbdecloop 97.Lecbdecout: 98 frame_pop 99 ret 100AES_ENDPROC(aes_ecb_decrypt) 101 102 103 /* 104 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 105 * int blocks, u8 iv[]) 106 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 107 * int blocks, u8 iv[]) 108 */ 109 110AES_ENTRY(aes_cbc_encrypt) 111 frame_push 6 112 113 mov x19, x0 114 mov x20, x1 115 mov x21, x2 116 mov x22, x3 117 mov x23, x4 118 mov x24, x5 119 120.Lcbcencrestart: 121 ld1 {v4.16b}, [x24] /* get iv */ 122 enc_prepare w22, x21, x6 123 124.Lcbcencloop4x: 125 subs w23, w23, #4 126 bmi .Lcbcenc1x 127 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */ 128 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ 129 encrypt_block v0, w22, x21, x6, w7 130 eor v1.16b, v1.16b, v0.16b 131 encrypt_block v1, w22, x21, x6, w7 132 eor v2.16b, v2.16b, v1.16b 133 encrypt_block v2, w22, x21, x6, w7 134 eor v3.16b, v3.16b, v2.16b 135 encrypt_block v3, w22, x21, x6, w7 136 st1 {v0.16b-v3.16b}, [x19], #64 137 mov v4.16b, v3.16b 138 st1 {v4.16b}, [x24] /* return iv */ 139 cond_yield_neon .Lcbcencrestart 140 b .Lcbcencloop4x 141.Lcbcenc1x: 142 adds w23, w23, #4 143 beq .Lcbcencout 144.Lcbcencloop: 145 ld1 {v0.16b}, [x20], #16 /* get next pt block */ 146 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ 147 encrypt_block v4, w22, x21, x6, w7 148 st1 {v4.16b}, [x19], #16 149 subs w23, w23, #1 150 bne .Lcbcencloop 151.Lcbcencout: 152 st1 {v4.16b}, [x24] /* return iv */ 153 frame_pop 154 ret 155AES_ENDPROC(aes_cbc_encrypt) 156 157 158AES_ENTRY(aes_cbc_decrypt) 159 frame_push 6 160 161 mov x19, x0 162 mov x20, x1 163 mov x21, x2 164 mov x22, x3 165 mov x23, x4 166 mov x24, x5 167 168.Lcbcdecrestart: 169 ld1 {v7.16b}, [x24] /* get iv */ 170 dec_prepare w22, x21, x6 171 172.LcbcdecloopNx: 173 subs w23, w23, #4 174 bmi .Lcbcdec1x 175 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */ 176 mov v4.16b, v0.16b 177 mov v5.16b, v1.16b 178 mov v6.16b, v2.16b 179 bl aes_decrypt_block4x 180 sub x20, x20, #16 181 eor v0.16b, v0.16b, v7.16b 182 eor v1.16b, v1.16b, v4.16b 183 ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */ 184 eor v2.16b, v2.16b, v5.16b 185 eor v3.16b, v3.16b, v6.16b 186 st1 {v0.16b-v3.16b}, [x19], #64 187 st1 {v7.16b}, [x24] /* return iv */ 188 cond_yield_neon .Lcbcdecrestart 189 b .LcbcdecloopNx 190.Lcbcdec1x: 191 adds w23, w23, #4 192 beq .Lcbcdecout 193.Lcbcdecloop: 194 ld1 {v1.16b}, [x20], #16 /* get next ct block */ 195 mov v0.16b, v1.16b /* ...and copy to v0 */ 196 decrypt_block v0, w22, x21, x6, w7 197 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ 198 mov v7.16b, v1.16b /* ct is next iv */ 199 st1 {v0.16b}, [x19], #16 200 subs w23, w23, #1 201 bne .Lcbcdecloop 202.Lcbcdecout: 203 st1 {v7.16b}, [x24] /* return iv */ 204 frame_pop 205 ret 206AES_ENDPROC(aes_cbc_decrypt) 207 208 209 /* 210 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 211 * int blocks, u8 ctr[]) 212 */ 213 214AES_ENTRY(aes_ctr_encrypt) 215 frame_push 6 216 217 mov x19, x0 218 mov x20, x1 219 mov x21, x2 220 mov x22, x3 221 mov x23, x4 222 mov x24, x5 223 224.Lctrrestart: 225 enc_prepare w22, x21, x6 226 ld1 {v4.16b}, [x24] 227 228 umov x6, v4.d[1] /* keep swabbed ctr in reg */ 229 rev x6, x6 230.LctrloopNx: 231 subs w23, w23, #4 232 bmi .Lctr1x 233 cmn w6, #4 /* 32 bit overflow? */ 234 bcs .Lctr1x 235 ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ 236 dup v7.4s, w6 237 mov v0.16b, v4.16b 238 add v7.4s, v7.4s, v8.4s 239 mov v1.16b, v4.16b 240 rev32 v8.16b, v7.16b 241 mov v2.16b, v4.16b 242 mov v3.16b, v4.16b 243 mov v1.s[3], v8.s[0] 244 mov v2.s[3], v8.s[1] 245 mov v3.s[3], v8.s[2] 246 ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */ 247 bl aes_encrypt_block4x 248 eor v0.16b, v5.16b, v0.16b 249 ld1 {v5.16b}, [x20], #16 /* get 1 input block */ 250 eor v1.16b, v6.16b, v1.16b 251 eor v2.16b, v7.16b, v2.16b 252 eor v3.16b, v5.16b, v3.16b 253 st1 {v0.16b-v3.16b}, [x19], #64 254 add x6, x6, #4 255 rev x7, x6 256 ins v4.d[1], x7 257 cbz w23, .Lctrout 258 st1 {v4.16b}, [x24] /* return next CTR value */ 259 cond_yield_neon .Lctrrestart 260 b .LctrloopNx 261.Lctr1x: 262 adds w23, w23, #4 263 beq .Lctrout 264.Lctrloop: 265 mov v0.16b, v4.16b 266 encrypt_block v0, w22, x21, x8, w7 267 268 adds x6, x6, #1 /* increment BE ctr */ 269 rev x7, x6 270 ins v4.d[1], x7 271 bcs .Lctrcarry /* overflow? */ 272 273.Lctrcarrydone: 274 subs w23, w23, #1 275 bmi .Lctrtailblock /* blocks <0 means tail block */ 276 ld1 {v3.16b}, [x20], #16 277 eor v3.16b, v0.16b, v3.16b 278 st1 {v3.16b}, [x19], #16 279 bne .Lctrloop 280 281.Lctrout: 282 st1 {v4.16b}, [x24] /* return next CTR value */ 283.Lctrret: 284 frame_pop 285 ret 286 287.Lctrtailblock: 288 st1 {v0.16b}, [x19] 289 b .Lctrret 290 291.Lctrcarry: 292 umov x7, v4.d[0] /* load upper word of ctr */ 293 rev x7, x7 /* ... to handle the carry */ 294 add x7, x7, #1 295 rev x7, x7 296 ins v4.d[0], x7 297 b .Lctrcarrydone 298AES_ENDPROC(aes_ctr_encrypt) 299 .ltorg 300 301 302 /* 303 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 304 * int blocks, u8 const rk2[], u8 iv[], int first) 305 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 306 * int blocks, u8 const rk2[], u8 iv[], int first) 307 */ 308 309 .macro next_tweak, out, in, const, tmp 310 sshr \tmp\().2d, \in\().2d, #63 311 and \tmp\().16b, \tmp\().16b, \const\().16b 312 add \out\().2d, \in\().2d, \in\().2d 313 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 314 eor \out\().16b, \out\().16b, \tmp\().16b 315 .endm 316 317.Lxts_mul_x: 318CPU_LE( .quad 1, 0x87 ) 319CPU_BE( .quad 0x87, 1 ) 320 321AES_ENTRY(aes_xts_encrypt) 322 frame_push 6 323 324 mov x19, x0 325 mov x20, x1 326 mov x21, x2 327 mov x22, x3 328 mov x23, x4 329 mov x24, x6 330 331 ld1 {v4.16b}, [x24] 332 cbz w7, .Lxtsencnotfirst 333 334 enc_prepare w3, x5, x8 335 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 336 enc_switch_key w3, x2, x8 337 ldr q7, .Lxts_mul_x 338 b .LxtsencNx 339 340.Lxtsencrestart: 341 ld1 {v4.16b}, [x24] 342.Lxtsencnotfirst: 343 enc_prepare w22, x21, x8 344.LxtsencloopNx: 345 ldr q7, .Lxts_mul_x 346 next_tweak v4, v4, v7, v8 347.LxtsencNx: 348 subs w23, w23, #4 349 bmi .Lxtsenc1x 350 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */ 351 next_tweak v5, v4, v7, v8 352 eor v0.16b, v0.16b, v4.16b 353 next_tweak v6, v5, v7, v8 354 eor v1.16b, v1.16b, v5.16b 355 eor v2.16b, v2.16b, v6.16b 356 next_tweak v7, v6, v7, v8 357 eor v3.16b, v3.16b, v7.16b 358 bl aes_encrypt_block4x 359 eor v3.16b, v3.16b, v7.16b 360 eor v0.16b, v0.16b, v4.16b 361 eor v1.16b, v1.16b, v5.16b 362 eor v2.16b, v2.16b, v6.16b 363 st1 {v0.16b-v3.16b}, [x19], #64 364 mov v4.16b, v7.16b 365 cbz w23, .Lxtsencout 366 st1 {v4.16b}, [x24] 367 cond_yield_neon .Lxtsencrestart 368 b .LxtsencloopNx 369.Lxtsenc1x: 370 adds w23, w23, #4 371 beq .Lxtsencout 372.Lxtsencloop: 373 ld1 {v1.16b}, [x20], #16 374 eor v0.16b, v1.16b, v4.16b 375 encrypt_block v0, w22, x21, x8, w7 376 eor v0.16b, v0.16b, v4.16b 377 st1 {v0.16b}, [x19], #16 378 subs w23, w23, #1 379 beq .Lxtsencout 380 next_tweak v4, v4, v7, v8 381 b .Lxtsencloop 382.Lxtsencout: 383 st1 {v4.16b}, [x24] 384 frame_pop 385 ret 386AES_ENDPROC(aes_xts_encrypt) 387 388 389AES_ENTRY(aes_xts_decrypt) 390 frame_push 6 391 392 mov x19, x0 393 mov x20, x1 394 mov x21, x2 395 mov x22, x3 396 mov x23, x4 397 mov x24, x6 398 399 ld1 {v4.16b}, [x24] 400 cbz w7, .Lxtsdecnotfirst 401 402 enc_prepare w3, x5, x8 403 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 404 dec_prepare w3, x2, x8 405 ldr q7, .Lxts_mul_x 406 b .LxtsdecNx 407 408.Lxtsdecrestart: 409 ld1 {v4.16b}, [x24] 410.Lxtsdecnotfirst: 411 dec_prepare w22, x21, x8 412.LxtsdecloopNx: 413 ldr q7, .Lxts_mul_x 414 next_tweak v4, v4, v7, v8 415.LxtsdecNx: 416 subs w23, w23, #4 417 bmi .Lxtsdec1x 418 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */ 419 next_tweak v5, v4, v7, v8 420 eor v0.16b, v0.16b, v4.16b 421 next_tweak v6, v5, v7, v8 422 eor v1.16b, v1.16b, v5.16b 423 eor v2.16b, v2.16b, v6.16b 424 next_tweak v7, v6, v7, v8 425 eor v3.16b, v3.16b, v7.16b 426 bl aes_decrypt_block4x 427 eor v3.16b, v3.16b, v7.16b 428 eor v0.16b, v0.16b, v4.16b 429 eor v1.16b, v1.16b, v5.16b 430 eor v2.16b, v2.16b, v6.16b 431 st1 {v0.16b-v3.16b}, [x19], #64 432 mov v4.16b, v7.16b 433 cbz w23, .Lxtsdecout 434 st1 {v4.16b}, [x24] 435 cond_yield_neon .Lxtsdecrestart 436 b .LxtsdecloopNx 437.Lxtsdec1x: 438 adds w23, w23, #4 439 beq .Lxtsdecout 440.Lxtsdecloop: 441 ld1 {v1.16b}, [x20], #16 442 eor v0.16b, v1.16b, v4.16b 443 decrypt_block v0, w22, x21, x8, w7 444 eor v0.16b, v0.16b, v4.16b 445 st1 {v0.16b}, [x19], #16 446 subs w23, w23, #1 447 beq .Lxtsdecout 448 next_tweak v4, v4, v7, v8 449 b .Lxtsdecloop 450.Lxtsdecout: 451 st1 {v4.16b}, [x24] 452 frame_pop 453 ret 454AES_ENDPROC(aes_xts_decrypt) 455 456 /* 457 * aes_mac_update(u8 const in[], u32 const rk[], int rounds, 458 * int blocks, u8 dg[], int enc_before, int enc_after) 459 */ 460AES_ENTRY(aes_mac_update) 461 frame_push 6 462 463 mov x19, x0 464 mov x20, x1 465 mov x21, x2 466 mov x22, x3 467 mov x23, x4 468 mov x24, x6 469 470 ld1 {v0.16b}, [x23] /* get dg */ 471 enc_prepare w2, x1, x7 472 cbz w5, .Lmacloop4x 473 474 encrypt_block v0, w2, x1, x7, w8 475 476.Lmacloop4x: 477 subs w22, w22, #4 478 bmi .Lmac1x 479 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */ 480 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 481 encrypt_block v0, w21, x20, x7, w8 482 eor v0.16b, v0.16b, v2.16b 483 encrypt_block v0, w21, x20, x7, w8 484 eor v0.16b, v0.16b, v3.16b 485 encrypt_block v0, w21, x20, x7, w8 486 eor v0.16b, v0.16b, v4.16b 487 cmp w22, wzr 488 csinv x5, x24, xzr, eq 489 cbz w5, .Lmacout 490 encrypt_block v0, w21, x20, x7, w8 491 st1 {v0.16b}, [x23] /* return dg */ 492 cond_yield_neon .Lmacrestart 493 b .Lmacloop4x 494.Lmac1x: 495 add w22, w22, #4 496.Lmacloop: 497 cbz w22, .Lmacout 498 ld1 {v1.16b}, [x19], #16 /* get next pt block */ 499 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 500 501 subs w22, w22, #1 502 csinv x5, x24, xzr, eq 503 cbz w5, .Lmacout 504 505.Lmacenc: 506 encrypt_block v0, w21, x20, x7, w8 507 b .Lmacloop 508 509.Lmacout: 510 st1 {v0.16b}, [x23] /* return dg */ 511 frame_pop 512 ret 513 514.Lmacrestart: 515 ld1 {v0.16b}, [x23] /* get dg */ 516 enc_prepare w21, x20, x0 517 b .Lmacloop4x 518AES_ENDPROC(aes_mac_update) 519