1/* 2 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES 3 * 4 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11/* included by aes-ce.S and aes-neon.S */ 12 13 .text 14 .align 4 15 16aes_encrypt_block4x: 17 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 18 ret 19ENDPROC(aes_encrypt_block4x) 20 21aes_decrypt_block4x: 22 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 23 ret 24ENDPROC(aes_decrypt_block4x) 25 26 /* 27 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 28 * int blocks) 29 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 30 * int blocks) 31 */ 32 33AES_ENTRY(aes_ecb_encrypt) 34 stp x29, x30, [sp, #-16]! 35 mov x29, sp 36 37 enc_prepare w3, x2, x5 38 39.LecbencloopNx: 40 subs w4, w4, #4 41 bmi .Lecbenc1x 42 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 43 bl aes_encrypt_block4x 44 st1 {v0.16b-v3.16b}, [x0], #64 45 b .LecbencloopNx 46.Lecbenc1x: 47 adds w4, w4, #4 48 beq .Lecbencout 49.Lecbencloop: 50 ld1 {v0.16b}, [x1], #16 /* get next pt block */ 51 encrypt_block v0, w3, x2, x5, w6 52 st1 {v0.16b}, [x0], #16 53 subs w4, w4, #1 54 bne .Lecbencloop 55.Lecbencout: 56 ldp x29, x30, [sp], #16 57 ret 58AES_ENDPROC(aes_ecb_encrypt) 59 60 61AES_ENTRY(aes_ecb_decrypt) 62 stp x29, x30, [sp, #-16]! 63 mov x29, sp 64 65 dec_prepare w3, x2, x5 66 67.LecbdecloopNx: 68 subs w4, w4, #4 69 bmi .Lecbdec1x 70 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 71 bl aes_decrypt_block4x 72 st1 {v0.16b-v3.16b}, [x0], #64 73 b .LecbdecloopNx 74.Lecbdec1x: 75 adds w4, w4, #4 76 beq .Lecbdecout 77.Lecbdecloop: 78 ld1 {v0.16b}, [x1], #16 /* get next ct block */ 79 decrypt_block v0, w3, x2, x5, w6 80 st1 {v0.16b}, [x0], #16 81 subs w4, w4, #1 82 bne .Lecbdecloop 83.Lecbdecout: 84 ldp x29, x30, [sp], #16 85 ret 86AES_ENDPROC(aes_ecb_decrypt) 87 88 89 /* 90 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 91 * int blocks, u8 iv[]) 92 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 93 * int blocks, u8 iv[]) 94 */ 95 96AES_ENTRY(aes_cbc_encrypt) 97 ld1 {v4.16b}, [x5] /* get iv */ 98 enc_prepare w3, x2, x6 99 100.Lcbcencloop4x: 101 subs w4, w4, #4 102 bmi .Lcbcenc1x 103 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 104 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ 105 encrypt_block v0, w3, x2, x6, w7 106 eor v1.16b, v1.16b, v0.16b 107 encrypt_block v1, w3, x2, x6, w7 108 eor v2.16b, v2.16b, v1.16b 109 encrypt_block v2, w3, x2, x6, w7 110 eor v3.16b, v3.16b, v2.16b 111 encrypt_block v3, w3, x2, x6, w7 112 st1 {v0.16b-v3.16b}, [x0], #64 113 mov v4.16b, v3.16b 114 b .Lcbcencloop4x 115.Lcbcenc1x: 116 adds w4, w4, #4 117 beq .Lcbcencout 118.Lcbcencloop: 119 ld1 {v0.16b}, [x1], #16 /* get next pt block */ 120 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ 121 encrypt_block v4, w3, x2, x6, w7 122 st1 {v4.16b}, [x0], #16 123 subs w4, w4, #1 124 bne .Lcbcencloop 125.Lcbcencout: 126 st1 {v4.16b}, [x5] /* return iv */ 127 ret 128AES_ENDPROC(aes_cbc_encrypt) 129 130 131AES_ENTRY(aes_cbc_decrypt) 132 stp x29, x30, [sp, #-16]! 133 mov x29, sp 134 135 ld1 {v7.16b}, [x5] /* get iv */ 136 dec_prepare w3, x2, x6 137 138.LcbcdecloopNx: 139 subs w4, w4, #4 140 bmi .Lcbcdec1x 141 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 142 mov v4.16b, v0.16b 143 mov v5.16b, v1.16b 144 mov v6.16b, v2.16b 145 bl aes_decrypt_block4x 146 sub x1, x1, #16 147 eor v0.16b, v0.16b, v7.16b 148 eor v1.16b, v1.16b, v4.16b 149 ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */ 150 eor v2.16b, v2.16b, v5.16b 151 eor v3.16b, v3.16b, v6.16b 152 st1 {v0.16b-v3.16b}, [x0], #64 153 b .LcbcdecloopNx 154.Lcbcdec1x: 155 adds w4, w4, #4 156 beq .Lcbcdecout 157.Lcbcdecloop: 158 ld1 {v1.16b}, [x1], #16 /* get next ct block */ 159 mov v0.16b, v1.16b /* ...and copy to v0 */ 160 decrypt_block v0, w3, x2, x6, w7 161 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ 162 mov v7.16b, v1.16b /* ct is next iv */ 163 st1 {v0.16b}, [x0], #16 164 subs w4, w4, #1 165 bne .Lcbcdecloop 166.Lcbcdecout: 167 st1 {v7.16b}, [x5] /* return iv */ 168 ldp x29, x30, [sp], #16 169 ret 170AES_ENDPROC(aes_cbc_decrypt) 171 172 173 /* 174 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 175 * int blocks, u8 ctr[]) 176 */ 177 178AES_ENTRY(aes_ctr_encrypt) 179 stp x29, x30, [sp, #-16]! 180 mov x29, sp 181 182 enc_prepare w3, x2, x6 183 ld1 {v4.16b}, [x5] 184 185 umov x6, v4.d[1] /* keep swabbed ctr in reg */ 186 rev x6, x6 187 cmn w6, w4 /* 32 bit overflow? */ 188 bcs .Lctrloop 189.LctrloopNx: 190 subs w4, w4, #4 191 bmi .Lctr1x 192 ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ 193 dup v7.4s, w6 194 mov v0.16b, v4.16b 195 add v7.4s, v7.4s, v8.4s 196 mov v1.16b, v4.16b 197 rev32 v8.16b, v7.16b 198 mov v2.16b, v4.16b 199 mov v3.16b, v4.16b 200 mov v1.s[3], v8.s[0] 201 mov v2.s[3], v8.s[1] 202 mov v3.s[3], v8.s[2] 203 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ 204 bl aes_encrypt_block4x 205 eor v0.16b, v5.16b, v0.16b 206 ld1 {v5.16b}, [x1], #16 /* get 1 input block */ 207 eor v1.16b, v6.16b, v1.16b 208 eor v2.16b, v7.16b, v2.16b 209 eor v3.16b, v5.16b, v3.16b 210 st1 {v0.16b-v3.16b}, [x0], #64 211 add x6, x6, #4 212 rev x7, x6 213 ins v4.d[1], x7 214 cbz w4, .Lctrout 215 b .LctrloopNx 216.Lctr1x: 217 adds w4, w4, #4 218 beq .Lctrout 219.Lctrloop: 220 mov v0.16b, v4.16b 221 encrypt_block v0, w3, x2, x8, w7 222 223 adds x6, x6, #1 /* increment BE ctr */ 224 rev x7, x6 225 ins v4.d[1], x7 226 bcs .Lctrcarry /* overflow? */ 227 228.Lctrcarrydone: 229 subs w4, w4, #1 230 bmi .Lctrtailblock /* blocks <0 means tail block */ 231 ld1 {v3.16b}, [x1], #16 232 eor v3.16b, v0.16b, v3.16b 233 st1 {v3.16b}, [x0], #16 234 bne .Lctrloop 235 236.Lctrout: 237 st1 {v4.16b}, [x5] /* return next CTR value */ 238 ldp x29, x30, [sp], #16 239 ret 240 241.Lctrtailblock: 242 st1 {v0.16b}, [x0] 243 ldp x29, x30, [sp], #16 244 ret 245 246.Lctrcarry: 247 umov x7, v4.d[0] /* load upper word of ctr */ 248 rev x7, x7 /* ... to handle the carry */ 249 add x7, x7, #1 250 rev x7, x7 251 ins v4.d[0], x7 252 b .Lctrcarrydone 253AES_ENDPROC(aes_ctr_encrypt) 254 .ltorg 255 256 257 /* 258 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 259 * int blocks, u8 const rk2[], u8 iv[], int first) 260 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 261 * int blocks, u8 const rk2[], u8 iv[], int first) 262 */ 263 264 .macro next_tweak, out, in, const, tmp 265 sshr \tmp\().2d, \in\().2d, #63 266 and \tmp\().16b, \tmp\().16b, \const\().16b 267 add \out\().2d, \in\().2d, \in\().2d 268 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 269 eor \out\().16b, \out\().16b, \tmp\().16b 270 .endm 271 272.Lxts_mul_x: 273CPU_LE( .quad 1, 0x87 ) 274CPU_BE( .quad 0x87, 1 ) 275 276AES_ENTRY(aes_xts_encrypt) 277 stp x29, x30, [sp, #-16]! 278 mov x29, sp 279 280 ld1 {v4.16b}, [x6] 281 cbz w7, .Lxtsencnotfirst 282 283 enc_prepare w3, x5, x8 284 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 285 enc_switch_key w3, x2, x8 286 ldr q7, .Lxts_mul_x 287 b .LxtsencNx 288 289.Lxtsencnotfirst: 290 enc_prepare w3, x2, x8 291.LxtsencloopNx: 292 ldr q7, .Lxts_mul_x 293 next_tweak v4, v4, v7, v8 294.LxtsencNx: 295 subs w4, w4, #4 296 bmi .Lxtsenc1x 297 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 298 next_tweak v5, v4, v7, v8 299 eor v0.16b, v0.16b, v4.16b 300 next_tweak v6, v5, v7, v8 301 eor v1.16b, v1.16b, v5.16b 302 eor v2.16b, v2.16b, v6.16b 303 next_tweak v7, v6, v7, v8 304 eor v3.16b, v3.16b, v7.16b 305 bl aes_encrypt_block4x 306 eor v3.16b, v3.16b, v7.16b 307 eor v0.16b, v0.16b, v4.16b 308 eor v1.16b, v1.16b, v5.16b 309 eor v2.16b, v2.16b, v6.16b 310 st1 {v0.16b-v3.16b}, [x0], #64 311 mov v4.16b, v7.16b 312 cbz w4, .Lxtsencout 313 b .LxtsencloopNx 314.Lxtsenc1x: 315 adds w4, w4, #4 316 beq .Lxtsencout 317.Lxtsencloop: 318 ld1 {v1.16b}, [x1], #16 319 eor v0.16b, v1.16b, v4.16b 320 encrypt_block v0, w3, x2, x8, w7 321 eor v0.16b, v0.16b, v4.16b 322 st1 {v0.16b}, [x0], #16 323 subs w4, w4, #1 324 beq .Lxtsencout 325 next_tweak v4, v4, v7, v8 326 b .Lxtsencloop 327.Lxtsencout: 328 st1 {v4.16b}, [x6] 329 ldp x29, x30, [sp], #16 330 ret 331AES_ENDPROC(aes_xts_encrypt) 332 333 334AES_ENTRY(aes_xts_decrypt) 335 stp x29, x30, [sp, #-16]! 336 mov x29, sp 337 338 ld1 {v4.16b}, [x6] 339 cbz w7, .Lxtsdecnotfirst 340 341 enc_prepare w3, x5, x8 342 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 343 dec_prepare w3, x2, x8 344 ldr q7, .Lxts_mul_x 345 b .LxtsdecNx 346 347.Lxtsdecnotfirst: 348 dec_prepare w3, x2, x8 349.LxtsdecloopNx: 350 ldr q7, .Lxts_mul_x 351 next_tweak v4, v4, v7, v8 352.LxtsdecNx: 353 subs w4, w4, #4 354 bmi .Lxtsdec1x 355 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 356 next_tweak v5, v4, v7, v8 357 eor v0.16b, v0.16b, v4.16b 358 next_tweak v6, v5, v7, v8 359 eor v1.16b, v1.16b, v5.16b 360 eor v2.16b, v2.16b, v6.16b 361 next_tweak v7, v6, v7, v8 362 eor v3.16b, v3.16b, v7.16b 363 bl aes_decrypt_block4x 364 eor v3.16b, v3.16b, v7.16b 365 eor v0.16b, v0.16b, v4.16b 366 eor v1.16b, v1.16b, v5.16b 367 eor v2.16b, v2.16b, v6.16b 368 st1 {v0.16b-v3.16b}, [x0], #64 369 mov v4.16b, v7.16b 370 cbz w4, .Lxtsdecout 371 b .LxtsdecloopNx 372.Lxtsdec1x: 373 adds w4, w4, #4 374 beq .Lxtsdecout 375.Lxtsdecloop: 376 ld1 {v1.16b}, [x1], #16 377 eor v0.16b, v1.16b, v4.16b 378 decrypt_block v0, w3, x2, x8, w7 379 eor v0.16b, v0.16b, v4.16b 380 st1 {v0.16b}, [x0], #16 381 subs w4, w4, #1 382 beq .Lxtsdecout 383 next_tweak v4, v4, v7, v8 384 b .Lxtsdecloop 385.Lxtsdecout: 386 st1 {v4.16b}, [x6] 387 ldp x29, x30, [sp], #16 388 ret 389AES_ENDPROC(aes_xts_decrypt) 390 391 /* 392 * aes_mac_update(u8 const in[], u32 const rk[], int rounds, 393 * int blocks, u8 dg[], int enc_before, int enc_after) 394 */ 395AES_ENTRY(aes_mac_update) 396 ld1 {v0.16b}, [x4] /* get dg */ 397 enc_prepare w2, x1, x7 398 cbz w5, .Lmacloop4x 399 400 encrypt_block v0, w2, x1, x7, w8 401 402.Lmacloop4x: 403 subs w3, w3, #4 404 bmi .Lmac1x 405 ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */ 406 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 407 encrypt_block v0, w2, x1, x7, w8 408 eor v0.16b, v0.16b, v2.16b 409 encrypt_block v0, w2, x1, x7, w8 410 eor v0.16b, v0.16b, v3.16b 411 encrypt_block v0, w2, x1, x7, w8 412 eor v0.16b, v0.16b, v4.16b 413 cmp w3, wzr 414 csinv x5, x6, xzr, eq 415 cbz w5, .Lmacout 416 encrypt_block v0, w2, x1, x7, w8 417 b .Lmacloop4x 418.Lmac1x: 419 add w3, w3, #4 420.Lmacloop: 421 cbz w3, .Lmacout 422 ld1 {v1.16b}, [x0], #16 /* get next pt block */ 423 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 424 425 subs w3, w3, #1 426 csinv x5, x6, xzr, eq 427 cbz w5, .Lmacout 428 429 encrypt_block v0, w2, x1, x7, w8 430 b .Lmacloop 431 432.Lmacout: 433 st1 {v0.16b}, [x4] /* return dg */ 434 ret 435AES_ENDPROC(aes_mac_update) 436