1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON 4 * 5 * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> 6 */ 7 8#include <linux/linkage.h> 9#include <asm/assembler.h> 10 11#define AES_ENTRY(func) ENTRY(neon_ ## func) 12#define AES_ENDPROC(func) ENDPROC(neon_ ## func) 13 14 xtsmask .req v7 15 cbciv .req v7 16 vctr .req v4 17 18 .macro xts_reload_mask, tmp 19 xts_load_mask \tmp 20 .endm 21 22 /* multiply by polynomial 'x' in GF(2^8) */ 23 .macro mul_by_x, out, in, temp, const 24 sshr \temp, \in, #7 25 shl \out, \in, #1 26 and \temp, \temp, \const 27 eor \out, \out, \temp 28 .endm 29 30 /* multiply by polynomial 'x^2' in GF(2^8) */ 31 .macro mul_by_x2, out, in, temp, const 32 ushr \temp, \in, #6 33 shl \out, \in, #2 34 pmul \temp, \temp, \const 35 eor \out, \out, \temp 36 .endm 37 38 /* preload the entire Sbox */ 39 .macro prepare, sbox, shiftrows, temp 40 movi v12.16b, #0x1b 41 ldr_l q13, \shiftrows, \temp 42 ldr_l q14, .Lror32by8, \temp 43 adr_l \temp, \sbox 44 ld1 {v16.16b-v19.16b}, [\temp], #64 45 ld1 {v20.16b-v23.16b}, [\temp], #64 46 ld1 {v24.16b-v27.16b}, [\temp], #64 47 ld1 {v28.16b-v31.16b}, [\temp] 48 .endm 49 50 /* do preload for encryption */ 51 .macro enc_prepare, ignore0, ignore1, temp 52 prepare .LForward_Sbox, .LForward_ShiftRows, \temp 53 .endm 54 55 .macro enc_switch_key, ignore0, ignore1, temp 56 /* do nothing */ 57 .endm 58 59 /* do preload for decryption */ 60 .macro dec_prepare, ignore0, ignore1, temp 61 prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp 62 .endm 63 64 /* apply SubBytes transformation using the the preloaded Sbox */ 65 .macro sub_bytes, in 66 sub v9.16b, \in\().16b, v15.16b 67 tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b 68 sub v10.16b, v9.16b, v15.16b 69 tbx \in\().16b, {v20.16b-v23.16b}, v9.16b 70 sub v11.16b, v10.16b, v15.16b 71 tbx \in\().16b, {v24.16b-v27.16b}, v10.16b 72 tbx \in\().16b, {v28.16b-v31.16b}, v11.16b 73 .endm 74 75 /* apply MixColumns transformation */ 76 .macro mix_columns, in, enc 77 .if \enc == 0 78 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 79 mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b 80 eor \in\().16b, \in\().16b, v8.16b 81 rev32 v8.8h, v8.8h 82 eor \in\().16b, \in\().16b, v8.16b 83 .endif 84 85 mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b 86 rev32 v8.8h, \in\().8h 87 eor v8.16b, v8.16b, v9.16b 88 eor \in\().16b, \in\().16b, v8.16b 89 tbl \in\().16b, {\in\().16b}, v14.16b 90 eor \in\().16b, \in\().16b, v8.16b 91 .endm 92 93 .macro do_block, enc, in, rounds, rk, rkp, i 94 ld1 {v15.4s}, [\rk] 95 add \rkp, \rk, #16 96 mov \i, \rounds 971111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 98 movi v15.16b, #0x40 99 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ 100 sub_bytes \in 101 subs \i, \i, #1 102 ld1 {v15.4s}, [\rkp], #16 103 beq 2222f 104 mix_columns \in, \enc 105 b 1111b 1062222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 107 .endm 108 109 .macro encrypt_block, in, rounds, rk, rkp, i 110 do_block 1, \in, \rounds, \rk, \rkp, \i 111 .endm 112 113 .macro decrypt_block, in, rounds, rk, rkp, i 114 do_block 0, \in, \rounds, \rk, \rkp, \i 115 .endm 116 117 /* 118 * Interleaved versions: functionally equivalent to the 119 * ones above, but applied to AES states in parallel. 120 */ 121 122 .macro sub_bytes_4x, in0, in1, in2, in3 123 sub v8.16b, \in0\().16b, v15.16b 124 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b 125 sub v9.16b, \in1\().16b, v15.16b 126 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b 127 sub v10.16b, \in2\().16b, v15.16b 128 tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b 129 sub v11.16b, \in3\().16b, v15.16b 130 tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b 131 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b 132 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b 133 sub v8.16b, v8.16b, v15.16b 134 tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b 135 sub v9.16b, v9.16b, v15.16b 136 tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b 137 sub v10.16b, v10.16b, v15.16b 138 tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b 139 sub v11.16b, v11.16b, v15.16b 140 tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b 141 sub v8.16b, v8.16b, v15.16b 142 tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b 143 sub v9.16b, v9.16b, v15.16b 144 tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b 145 sub v10.16b, v10.16b, v15.16b 146 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b 147 sub v11.16b, v11.16b, v15.16b 148 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b 149 tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b 150 tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b 151 .endm 152 153 .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const 154 sshr \tmp0\().16b, \in0\().16b, #7 155 shl \out0\().16b, \in0\().16b, #1 156 sshr \tmp1\().16b, \in1\().16b, #7 157 and \tmp0\().16b, \tmp0\().16b, \const\().16b 158 shl \out1\().16b, \in1\().16b, #1 159 and \tmp1\().16b, \tmp1\().16b, \const\().16b 160 eor \out0\().16b, \out0\().16b, \tmp0\().16b 161 eor \out1\().16b, \out1\().16b, \tmp1\().16b 162 .endm 163 164 .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const 165 ushr \tmp0\().16b, \in0\().16b, #6 166 shl \out0\().16b, \in0\().16b, #2 167 ushr \tmp1\().16b, \in1\().16b, #6 168 pmul \tmp0\().16b, \tmp0\().16b, \const\().16b 169 shl \out1\().16b, \in1\().16b, #2 170 pmul \tmp1\().16b, \tmp1\().16b, \const\().16b 171 eor \out0\().16b, \out0\().16b, \tmp0\().16b 172 eor \out1\().16b, \out1\().16b, \tmp1\().16b 173 .endm 174 175 .macro mix_columns_2x, in0, in1, enc 176 .if \enc == 0 177 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 178 mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12 179 eor \in0\().16b, \in0\().16b, v8.16b 180 rev32 v8.8h, v8.8h 181 eor \in1\().16b, \in1\().16b, v9.16b 182 rev32 v9.8h, v9.8h 183 eor \in0\().16b, \in0\().16b, v8.16b 184 eor \in1\().16b, \in1\().16b, v9.16b 185 .endif 186 187 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12 188 rev32 v10.8h, \in0\().8h 189 rev32 v11.8h, \in1\().8h 190 eor v10.16b, v10.16b, v8.16b 191 eor v11.16b, v11.16b, v9.16b 192 eor \in0\().16b, \in0\().16b, v10.16b 193 eor \in1\().16b, \in1\().16b, v11.16b 194 tbl \in0\().16b, {\in0\().16b}, v14.16b 195 tbl \in1\().16b, {\in1\().16b}, v14.16b 196 eor \in0\().16b, \in0\().16b, v10.16b 197 eor \in1\().16b, \in1\().16b, v11.16b 198 .endm 199 200 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i 201 ld1 {v15.4s}, [\rk] 202 add \rkp, \rk, #16 203 mov \i, \rounds 2041111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 205 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 206 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 207 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 208 movi v15.16b, #0x40 209 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 210 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 211 tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ 212 tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ 213 sub_bytes_4x \in0, \in1, \in2, \in3 214 subs \i, \i, #1 215 ld1 {v15.4s}, [\rkp], #16 216 beq 2222f 217 mix_columns_2x \in0, \in1, \enc 218 mix_columns_2x \in2, \in3, \enc 219 b 1111b 2202222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 221 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 222 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 223 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 224 .endm 225 226 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 227 do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 228 .endm 229 230 .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 231 do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 232 .endm 233 234#include "aes-modes.S" 235 236 .section ".rodata", "a" 237 .align 6 238.LForward_Sbox: 239 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 240 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 241 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 242 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 243 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 244 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 245 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 246 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 247 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 248 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 249 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 250 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 251 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 252 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 253 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 254 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 255 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 256 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 257 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 258 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 259 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 260 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 261 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 262 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 263 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 264 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 265 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 266 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 267 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 268 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 269 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 270 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 271 272.LReverse_Sbox: 273 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 274 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 275 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 276 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 277 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 278 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 279 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 280 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 281 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 282 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 283 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 284 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 285 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 286 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 287 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 288 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 289 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 290 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 291 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 292 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 293 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 294 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 295 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 296 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 297 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 298 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 299 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 300 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 301 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 302 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 303 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 304 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 305 306.LForward_ShiftRows: 307 .octa 0x0b06010c07020d08030e09040f0a0500 308 309.LReverse_ShiftRows: 310 .octa 0x0306090c0f0205080b0e0104070a0d00 311 312.Lror32by8: 313 .octa 0x0c0f0e0d080b0a090407060500030201 314