1/* 2 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON 3 * 4 * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11#include <linux/linkage.h> 12#include <asm/assembler.h> 13 14#define AES_ENTRY(func) ENTRY(neon_ ## func) 15#define AES_ENDPROC(func) ENDPROC(neon_ ## func) 16 17 /* multiply by polynomial 'x' in GF(2^8) */ 18 .macro mul_by_x, out, in, temp, const 19 sshr \temp, \in, #7 20 shl \out, \in, #1 21 and \temp, \temp, \const 22 eor \out, \out, \temp 23 .endm 24 25 /* multiply by polynomial 'x^2' in GF(2^8) */ 26 .macro mul_by_x2, out, in, temp, const 27 ushr \temp, \in, #6 28 shl \out, \in, #2 29 pmul \temp, \temp, \const 30 eor \out, \out, \temp 31 .endm 32 33 /* preload the entire Sbox */ 34 .macro prepare, sbox, shiftrows, temp 35 movi v12.16b, #0x1b 36 ldr_l q13, \shiftrows, \temp 37 ldr_l q14, .Lror32by8, \temp 38 adr_l \temp, \sbox 39 ld1 {v16.16b-v19.16b}, [\temp], #64 40 ld1 {v20.16b-v23.16b}, [\temp], #64 41 ld1 {v24.16b-v27.16b}, [\temp], #64 42 ld1 {v28.16b-v31.16b}, [\temp] 43 .endm 44 45 /* do preload for encryption */ 46 .macro enc_prepare, ignore0, ignore1, temp 47 prepare .LForward_Sbox, .LForward_ShiftRows, \temp 48 .endm 49 50 .macro enc_switch_key, ignore0, ignore1, temp 51 /* do nothing */ 52 .endm 53 54 /* do preload for decryption */ 55 .macro dec_prepare, ignore0, ignore1, temp 56 prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp 57 .endm 58 59 /* apply SubBytes transformation using the the preloaded Sbox */ 60 .macro sub_bytes, in 61 sub v9.16b, \in\().16b, v15.16b 62 tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b 63 sub v10.16b, v9.16b, v15.16b 64 tbx \in\().16b, {v20.16b-v23.16b}, v9.16b 65 sub v11.16b, v10.16b, v15.16b 66 tbx \in\().16b, {v24.16b-v27.16b}, v10.16b 67 tbx \in\().16b, {v28.16b-v31.16b}, v11.16b 68 .endm 69 70 /* apply MixColumns transformation */ 71 .macro mix_columns, in, enc 72 .if \enc == 0 73 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 74 mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b 75 eor \in\().16b, \in\().16b, v8.16b 76 rev32 v8.8h, v8.8h 77 eor \in\().16b, \in\().16b, v8.16b 78 .endif 79 80 mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b 81 rev32 v8.8h, \in\().8h 82 eor v8.16b, v8.16b, v9.16b 83 eor \in\().16b, \in\().16b, v8.16b 84 tbl \in\().16b, {\in\().16b}, v14.16b 85 eor \in\().16b, \in\().16b, v8.16b 86 .endm 87 88 .macro do_block, enc, in, rounds, rk, rkp, i 89 ld1 {v15.4s}, [\rk] 90 add \rkp, \rk, #16 91 mov \i, \rounds 921111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 93 movi v15.16b, #0x40 94 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ 95 sub_bytes \in 96 subs \i, \i, #1 97 ld1 {v15.4s}, [\rkp], #16 98 beq 2222f 99 mix_columns \in, \enc 100 b 1111b 1012222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 102 .endm 103 104 .macro encrypt_block, in, rounds, rk, rkp, i 105 do_block 1, \in, \rounds, \rk, \rkp, \i 106 .endm 107 108 .macro decrypt_block, in, rounds, rk, rkp, i 109 do_block 0, \in, \rounds, \rk, \rkp, \i 110 .endm 111 112 /* 113 * Interleaved versions: functionally equivalent to the 114 * ones above, but applied to 2 or 4 AES states in parallel. 115 */ 116 117 .macro sub_bytes_2x, in0, in1 118 sub v8.16b, \in0\().16b, v15.16b 119 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b 120 sub v9.16b, \in1\().16b, v15.16b 121 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b 122 sub v10.16b, v8.16b, v15.16b 123 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b 124 sub v11.16b, v9.16b, v15.16b 125 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b 126 sub v8.16b, v10.16b, v15.16b 127 tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b 128 sub v9.16b, v11.16b, v15.16b 129 tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b 130 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b 131 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b 132 .endm 133 134 .macro sub_bytes_4x, in0, in1, in2, in3 135 sub v8.16b, \in0\().16b, v15.16b 136 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b 137 sub v9.16b, \in1\().16b, v15.16b 138 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b 139 sub v10.16b, \in2\().16b, v15.16b 140 tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b 141 sub v11.16b, \in3\().16b, v15.16b 142 tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b 143 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b 144 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b 145 sub v8.16b, v8.16b, v15.16b 146 tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b 147 sub v9.16b, v9.16b, v15.16b 148 tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b 149 sub v10.16b, v10.16b, v15.16b 150 tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b 151 sub v11.16b, v11.16b, v15.16b 152 tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b 153 sub v8.16b, v8.16b, v15.16b 154 tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b 155 sub v9.16b, v9.16b, v15.16b 156 tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b 157 sub v10.16b, v10.16b, v15.16b 158 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b 159 sub v11.16b, v11.16b, v15.16b 160 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b 161 tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b 162 tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b 163 .endm 164 165 .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const 166 sshr \tmp0\().16b, \in0\().16b, #7 167 shl \out0\().16b, \in0\().16b, #1 168 sshr \tmp1\().16b, \in1\().16b, #7 169 and \tmp0\().16b, \tmp0\().16b, \const\().16b 170 shl \out1\().16b, \in1\().16b, #1 171 and \tmp1\().16b, \tmp1\().16b, \const\().16b 172 eor \out0\().16b, \out0\().16b, \tmp0\().16b 173 eor \out1\().16b, \out1\().16b, \tmp1\().16b 174 .endm 175 176 .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const 177 ushr \tmp0\().16b, \in0\().16b, #6 178 shl \out0\().16b, \in0\().16b, #2 179 ushr \tmp1\().16b, \in1\().16b, #6 180 pmul \tmp0\().16b, \tmp0\().16b, \const\().16b 181 shl \out1\().16b, \in1\().16b, #2 182 pmul \tmp1\().16b, \tmp1\().16b, \const\().16b 183 eor \out0\().16b, \out0\().16b, \tmp0\().16b 184 eor \out1\().16b, \out1\().16b, \tmp1\().16b 185 .endm 186 187 .macro mix_columns_2x, in0, in1, enc 188 .if \enc == 0 189 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 190 mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12 191 eor \in0\().16b, \in0\().16b, v8.16b 192 rev32 v8.8h, v8.8h 193 eor \in1\().16b, \in1\().16b, v9.16b 194 rev32 v9.8h, v9.8h 195 eor \in0\().16b, \in0\().16b, v8.16b 196 eor \in1\().16b, \in1\().16b, v9.16b 197 .endif 198 199 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12 200 rev32 v10.8h, \in0\().8h 201 rev32 v11.8h, \in1\().8h 202 eor v10.16b, v10.16b, v8.16b 203 eor v11.16b, v11.16b, v9.16b 204 eor \in0\().16b, \in0\().16b, v10.16b 205 eor \in1\().16b, \in1\().16b, v11.16b 206 tbl \in0\().16b, {\in0\().16b}, v14.16b 207 tbl \in1\().16b, {\in1\().16b}, v14.16b 208 eor \in0\().16b, \in0\().16b, v10.16b 209 eor \in1\().16b, \in1\().16b, v11.16b 210 .endm 211 212 .macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i 213 ld1 {v15.4s}, [\rk] 214 add \rkp, \rk, #16 215 mov \i, \rounds 2161111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 217 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 218 movi v15.16b, #0x40 219 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 220 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 221 sub_bytes_2x \in0, \in1 222 subs \i, \i, #1 223 ld1 {v15.4s}, [\rkp], #16 224 beq 2222f 225 mix_columns_2x \in0, \in1, \enc 226 b 1111b 2272222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 228 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 229 .endm 230 231 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i 232 ld1 {v15.4s}, [\rk] 233 add \rkp, \rk, #16 234 mov \i, \rounds 2351111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 236 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 237 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 238 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 239 movi v15.16b, #0x40 240 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 241 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 242 tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ 243 tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ 244 sub_bytes_4x \in0, \in1, \in2, \in3 245 subs \i, \i, #1 246 ld1 {v15.4s}, [\rkp], #16 247 beq 2222f 248 mix_columns_2x \in0, \in1, \enc 249 mix_columns_2x \in2, \in3, \enc 250 b 1111b 2512222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 252 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 253 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 254 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 255 .endm 256 257 .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i 258 do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i 259 .endm 260 261 .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i 262 do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i 263 .endm 264 265 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 266 do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 267 .endm 268 269 .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 270 do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 271 .endm 272 273#include "aes-modes.S" 274 275 .section ".rodata", "a" 276 .align 6 277.LForward_Sbox: 278 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 279 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 280 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 281 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 282 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 283 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 284 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 285 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 286 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 287 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 288 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 289 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 290 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 291 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 292 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 293 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 294 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 295 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 296 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 297 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 298 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 299 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 300 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 301 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 302 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 303 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 304 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 305 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 306 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 307 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 308 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 309 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 310 311.LReverse_Sbox: 312 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 313 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 314 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 315 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 316 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 317 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 318 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 319 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 320 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 321 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 322 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 323 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 324 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 325 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 326 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 327 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 328 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 329 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 330 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 331 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 332 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 333 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 334 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 335 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 336 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 337 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 338 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 339 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 340 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 341 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 342 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 343 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 344 345.LForward_ShiftRows: 346 .octa 0x0b06010c07020d08030e09040f0a0500 347 348.LReverse_ShiftRows: 349 .octa 0x0306090c0f0205080b0e0104070a0d00 350 351.Lror32by8: 352 .octa 0x0c0f0e0d080b0a090407060500030201 353