1/* 2 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON 3 * 4 * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11#include <linux/linkage.h> 12#include <asm/assembler.h> 13 14#define AES_ENTRY(func) ENTRY(neon_ ## func) 15#define AES_ENDPROC(func) ENDPROC(neon_ ## func) 16 17 /* multiply by polynomial 'x' in GF(2^8) */ 18 .macro mul_by_x, out, in, temp, const 19 sshr \temp, \in, #7 20 add \out, \in, \in 21 and \temp, \temp, \const 22 eor \out, \out, \temp 23 .endm 24 25 /* preload the entire Sbox */ 26 .macro prepare, sbox, shiftrows, temp 27 adr \temp, \sbox 28 movi v12.16b, #0x40 29 ldr q13, \shiftrows 30 movi v14.16b, #0x1b 31 ld1 {v16.16b-v19.16b}, [\temp], #64 32 ld1 {v20.16b-v23.16b}, [\temp], #64 33 ld1 {v24.16b-v27.16b}, [\temp], #64 34 ld1 {v28.16b-v31.16b}, [\temp] 35 .endm 36 37 /* do preload for encryption */ 38 .macro enc_prepare, ignore0, ignore1, temp 39 prepare .LForward_Sbox, .LForward_ShiftRows, \temp 40 .endm 41 42 .macro enc_switch_key, ignore0, ignore1, temp 43 /* do nothing */ 44 .endm 45 46 /* do preload for decryption */ 47 .macro dec_prepare, ignore0, ignore1, temp 48 prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp 49 .endm 50 51 /* apply SubBytes transformation using the the preloaded Sbox */ 52 .macro sub_bytes, in 53 sub v9.16b, \in\().16b, v12.16b 54 tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b 55 sub v10.16b, v9.16b, v12.16b 56 tbx \in\().16b, {v20.16b-v23.16b}, v9.16b 57 sub v11.16b, v10.16b, v12.16b 58 tbx \in\().16b, {v24.16b-v27.16b}, v10.16b 59 tbx \in\().16b, {v28.16b-v31.16b}, v11.16b 60 .endm 61 62 /* apply MixColumns transformation */ 63 .macro mix_columns, in 64 mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b 65 rev32 v8.8h, \in\().8h 66 eor \in\().16b, v10.16b, \in\().16b 67 shl v9.4s, v8.4s, #24 68 shl v11.4s, \in\().4s, #24 69 sri v9.4s, v8.4s, #8 70 sri v11.4s, \in\().4s, #8 71 eor v9.16b, v9.16b, v8.16b 72 eor v10.16b, v10.16b, v9.16b 73 eor \in\().16b, v10.16b, v11.16b 74 .endm 75 76 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 77 .macro inv_mix_columns, in 78 mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b 79 mul_by_x v11.16b, v11.16b, v10.16b, v14.16b 80 eor \in\().16b, \in\().16b, v11.16b 81 rev32 v11.8h, v11.8h 82 eor \in\().16b, \in\().16b, v11.16b 83 mix_columns \in 84 .endm 85 86 .macro do_block, enc, in, rounds, rk, rkp, i 87 ld1 {v15.4s}, [\rk] 88 add \rkp, \rk, #16 89 mov \i, \rounds 901111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 91 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ 92 sub_bytes \in 93 ld1 {v15.4s}, [\rkp], #16 94 subs \i, \i, #1 95 beq 2222f 96 .if \enc == 1 97 mix_columns \in 98 .else 99 inv_mix_columns \in 100 .endif 101 b 1111b 1022222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 103 .endm 104 105 .macro encrypt_block, in, rounds, rk, rkp, i 106 do_block 1, \in, \rounds, \rk, \rkp, \i 107 .endm 108 109 .macro decrypt_block, in, rounds, rk, rkp, i 110 do_block 0, \in, \rounds, \rk, \rkp, \i 111 .endm 112 113 /* 114 * Interleaved versions: functionally equivalent to the 115 * ones above, but applied to 2 or 4 AES states in parallel. 116 */ 117 118 .macro sub_bytes_2x, in0, in1 119 sub v8.16b, \in0\().16b, v12.16b 120 sub v9.16b, \in1\().16b, v12.16b 121 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b 122 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b 123 sub v10.16b, v8.16b, v12.16b 124 sub v11.16b, v9.16b, v12.16b 125 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b 126 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b 127 sub v8.16b, v10.16b, v12.16b 128 sub v9.16b, v11.16b, v12.16b 129 tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b 130 tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b 131 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b 132 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b 133 .endm 134 135 .macro sub_bytes_4x, in0, in1, in2, in3 136 sub v8.16b, \in0\().16b, v12.16b 137 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b 138 sub v9.16b, \in1\().16b, v12.16b 139 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b 140 sub v10.16b, \in2\().16b, v12.16b 141 tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b 142 sub v11.16b, \in3\().16b, v12.16b 143 tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b 144 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b 145 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b 146 sub v8.16b, v8.16b, v12.16b 147 tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b 148 sub v9.16b, v9.16b, v12.16b 149 tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b 150 sub v10.16b, v10.16b, v12.16b 151 tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b 152 sub v11.16b, v11.16b, v12.16b 153 tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b 154 sub v8.16b, v8.16b, v12.16b 155 tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b 156 sub v9.16b, v9.16b, v12.16b 157 tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b 158 sub v10.16b, v10.16b, v12.16b 159 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b 160 sub v11.16b, v11.16b, v12.16b 161 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b 162 tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b 163 tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b 164 .endm 165 166 .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const 167 sshr \tmp0\().16b, \in0\().16b, #7 168 add \out0\().16b, \in0\().16b, \in0\().16b 169 sshr \tmp1\().16b, \in1\().16b, #7 170 and \tmp0\().16b, \tmp0\().16b, \const\().16b 171 add \out1\().16b, \in1\().16b, \in1\().16b 172 and \tmp1\().16b, \tmp1\().16b, \const\().16b 173 eor \out0\().16b, \out0\().16b, \tmp0\().16b 174 eor \out1\().16b, \out1\().16b, \tmp1\().16b 175 .endm 176 177 .macro mix_columns_2x, in0, in1 178 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 179 rev32 v10.8h, \in0\().8h 180 rev32 v11.8h, \in1\().8h 181 eor \in0\().16b, v8.16b, \in0\().16b 182 eor \in1\().16b, v9.16b, \in1\().16b 183 shl v12.4s, v10.4s, #24 184 shl v13.4s, v11.4s, #24 185 eor v8.16b, v8.16b, v10.16b 186 sri v12.4s, v10.4s, #8 187 shl v10.4s, \in0\().4s, #24 188 eor v9.16b, v9.16b, v11.16b 189 sri v13.4s, v11.4s, #8 190 shl v11.4s, \in1\().4s, #24 191 sri v10.4s, \in0\().4s, #8 192 eor \in0\().16b, v8.16b, v12.16b 193 sri v11.4s, \in1\().4s, #8 194 eor \in1\().16b, v9.16b, v13.16b 195 eor \in0\().16b, v10.16b, \in0\().16b 196 eor \in1\().16b, v11.16b, \in1\().16b 197 .endm 198 199 .macro inv_mix_cols_2x, in0, in1 200 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 201 mul_by_x_2x v8, v9, v8, v9, v10, v11, v14 202 eor \in0\().16b, \in0\().16b, v8.16b 203 eor \in1\().16b, \in1\().16b, v9.16b 204 rev32 v8.8h, v8.8h 205 rev32 v9.8h, v9.8h 206 eor \in0\().16b, \in0\().16b, v8.16b 207 eor \in1\().16b, \in1\().16b, v9.16b 208 mix_columns_2x \in0, \in1 209 .endm 210 211 .macro inv_mix_cols_4x, in0, in1, in2, in3 212 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 213 mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14 214 mul_by_x_2x v8, v9, v8, v9, v12, v13, v14 215 mul_by_x_2x v10, v11, v10, v11, v12, v13, v14 216 eor \in0\().16b, \in0\().16b, v8.16b 217 eor \in1\().16b, \in1\().16b, v9.16b 218 eor \in2\().16b, \in2\().16b, v10.16b 219 eor \in3\().16b, \in3\().16b, v11.16b 220 rev32 v8.8h, v8.8h 221 rev32 v9.8h, v9.8h 222 rev32 v10.8h, v10.8h 223 rev32 v11.8h, v11.8h 224 eor \in0\().16b, \in0\().16b, v8.16b 225 eor \in1\().16b, \in1\().16b, v9.16b 226 eor \in2\().16b, \in2\().16b, v10.16b 227 eor \in3\().16b, \in3\().16b, v11.16b 228 mix_columns_2x \in0, \in1 229 mix_columns_2x \in2, \in3 230 .endm 231 232 .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i 233 ld1 {v15.4s}, [\rk] 234 add \rkp, \rk, #16 235 mov \i, \rounds 2361111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 237 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 238 sub_bytes_2x \in0, \in1 239 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 240 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 241 ld1 {v15.4s}, [\rkp], #16 242 subs \i, \i, #1 243 beq 2222f 244 .if \enc == 1 245 mix_columns_2x \in0, \in1 246 ldr q13, .LForward_ShiftRows 247 .else 248 inv_mix_cols_2x \in0, \in1 249 ldr q13, .LReverse_ShiftRows 250 .endif 251 movi v12.16b, #0x40 252 b 1111b 2532222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 254 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 255 .endm 256 257 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i 258 ld1 {v15.4s}, [\rk] 259 add \rkp, \rk, #16 260 mov \i, \rounds 2611111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 262 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 263 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 264 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 265 sub_bytes_4x \in0, \in1, \in2, \in3 266 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 267 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 268 tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ 269 tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ 270 ld1 {v15.4s}, [\rkp], #16 271 subs \i, \i, #1 272 beq 2222f 273 .if \enc == 1 274 mix_columns_2x \in0, \in1 275 mix_columns_2x \in2, \in3 276 ldr q13, .LForward_ShiftRows 277 .else 278 inv_mix_cols_4x \in0, \in1, \in2, \in3 279 ldr q13, .LReverse_ShiftRows 280 .endif 281 movi v12.16b, #0x40 282 b 1111b 2832222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 284 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 285 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 286 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 287 .endm 288 289 .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i 290 do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i 291 .endm 292 293 .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i 294 do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i 295 .endm 296 297 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 298 do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 299 .endm 300 301 .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 302 do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 303 .endm 304 305#include "aes-modes.S" 306 307 .text 308 .align 4 309.LForward_ShiftRows: 310CPU_LE( .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 ) 311CPU_LE( .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb ) 312CPU_BE( .byte 0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8 ) 313CPU_BE( .byte 0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0 ) 314 315.LReverse_ShiftRows: 316CPU_LE( .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb ) 317CPU_LE( .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 ) 318CPU_BE( .byte 0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8 ) 319CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 ) 320 321.LForward_Sbox: 322 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 323 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 324 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 325 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 326 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 327 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 328 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 329 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 330 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 331 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 332 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 333 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 334 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 335 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 336 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 337 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 338 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 339 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 340 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 341 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 342 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 343 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 344 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 345 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 346 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 347 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 348 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 349 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 350 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 351 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 352 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 353 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 354 355.LReverse_Sbox: 356 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 357 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 358 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 359 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 360 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 361 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 362 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 363 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 364 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 365 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 366 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 367 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 368 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 369 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 370 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 371 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 372 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 373 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 374 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 375 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 376 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 377 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 378 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 379 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 380 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 381 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 382 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 383 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 384 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 385 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 386 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 387 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 388