1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Scalar AES core transform 4 * 5 * Copyright (C) 2017 Linaro Ltd. 6 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> 7 */ 8 9#include <linux/linkage.h> 10#include <asm/assembler.h> 11#include <asm/cache.h> 12 13 .text 14 .align 5 15 16 rk .req r0 17 rounds .req r1 18 in .req r2 19 out .req r3 20 ttab .req ip 21 22 t0 .req lr 23 t1 .req r2 24 t2 .req r3 25 26 .macro __select, out, in, idx 27 .if __LINUX_ARM_ARCH__ < 7 28 and \out, \in, #0xff << (8 * \idx) 29 .else 30 ubfx \out, \in, #(8 * \idx), #8 31 .endif 32 .endm 33 34 .macro __load, out, in, idx, sz, op 35 .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 36 ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz] 37 .else 38 ldr\op \out, [ttab, \in, lsl #\sz] 39 .endif 40 .endm 41 42 .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr 43 __select \out0, \in0, 0 44 __select t0, \in1, 1 45 __load \out0, \out0, 0, \sz, \op 46 __load t0, t0, 1, \sz, \op 47 48 .if \enc 49 __select \out1, \in1, 0 50 __select t1, \in2, 1 51 .else 52 __select \out1, \in3, 0 53 __select t1, \in0, 1 54 .endif 55 __load \out1, \out1, 0, \sz, \op 56 __select t2, \in2, 2 57 __load t1, t1, 1, \sz, \op 58 __load t2, t2, 2, \sz, \op 59 60 eor \out0, \out0, t0, ror #24 61 62 __select t0, \in3, 3 63 .if \enc 64 __select \t3, \in3, 2 65 __select \t4, \in0, 3 66 .else 67 __select \t3, \in1, 2 68 __select \t4, \in2, 3 69 .endif 70 __load \t3, \t3, 2, \sz, \op 71 __load t0, t0, 3, \sz, \op 72 __load \t4, \t4, 3, \sz, \op 73 74 .ifnb \oldcpsr 75 /* 76 * This is the final round and we're done with all data-dependent table 77 * lookups, so we can safely re-enable interrupts. 78 */ 79 restore_irqs \oldcpsr 80 .endif 81 82 eor \out1, \out1, t1, ror #24 83 eor \out0, \out0, t2, ror #16 84 ldm rk!, {t1, t2} 85 eor \out1, \out1, \t3, ror #16 86 eor \out0, \out0, t0, ror #8 87 eor \out1, \out1, \t4, ror #8 88 eor \out0, \out0, t1 89 eor \out1, \out1, t2 90 .endm 91 92 .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr 93 __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op 94 __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr 95 .endm 96 97 .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr 98 __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op 99 __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr 100 .endm 101 102 .macro __rev, out, in 103 .if __LINUX_ARM_ARCH__ < 6 104 lsl t0, \in, #24 105 and t1, \in, #0xff00 106 and t2, \in, #0xff0000 107 orr \out, t0, \in, lsr #24 108 orr \out, \out, t1, lsl #8 109 orr \out, \out, t2, lsr #8 110 .else 111 rev \out, \in 112 .endif 113 .endm 114 115 .macro __adrl, out, sym, c 116 .if __LINUX_ARM_ARCH__ < 7 117 ldr\c \out, =\sym 118 .else 119 movw\c \out, #:lower16:\sym 120 movt\c \out, #:upper16:\sym 121 .endif 122 .endm 123 124 .macro do_crypt, round, ttab, ltab, bsz 125 push {r3-r11, lr} 126 127 // Load keys first, to reduce latency in case they're not cached yet. 128 ldm rk!, {r8-r11} 129 130 ldr r4, [in] 131 ldr r5, [in, #4] 132 ldr r6, [in, #8] 133 ldr r7, [in, #12] 134 135#ifdef CONFIG_CPU_BIG_ENDIAN 136 __rev r4, r4 137 __rev r5, r5 138 __rev r6, r6 139 __rev r7, r7 140#endif 141 142 eor r4, r4, r8 143 eor r5, r5, r9 144 eor r6, r6, r10 145 eor r7, r7, r11 146 147 __adrl ttab, \ttab 148 /* 149 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into 150 * L1 cache, assuming cacheline size >= 32. This is a hardening measure 151 * intended to make cache-timing attacks more difficult. They may not 152 * be fully prevented, however; see the paper 153 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf 154 * ("Cache-timing attacks on AES") for a discussion of the many 155 * difficulties involved in writing truly constant-time AES software. 156 */ 157 save_and_disable_irqs t0 158 .set i, 0 159 .rept 1024 / 128 160 ldr r8, [ttab, #i + 0] 161 ldr r9, [ttab, #i + 32] 162 ldr r10, [ttab, #i + 64] 163 ldr r11, [ttab, #i + 96] 164 .set i, i + 128 165 .endr 166 push {t0} // oldcpsr 167 168 tst rounds, #2 169 bne 1f 170 1710: \round r8, r9, r10, r11, r4, r5, r6, r7 172 \round r4, r5, r6, r7, r8, r9, r10, r11 173 1741: subs rounds, rounds, #4 175 \round r8, r9, r10, r11, r4, r5, r6, r7 176 bls 2f 177 \round r4, r5, r6, r7, r8, r9, r10, r11 178 b 0b 179 1802: .ifb \ltab 181 add ttab, ttab, #1 182 .else 183 __adrl ttab, \ltab 184 // Prefetch inverse S-box for final round; see explanation above 185 .set i, 0 186 .rept 256 / 64 187 ldr t0, [ttab, #i + 0] 188 ldr t1, [ttab, #i + 32] 189 .set i, i + 64 190 .endr 191 .endif 192 193 pop {rounds} // oldcpsr 194 \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds 195 196#ifdef CONFIG_CPU_BIG_ENDIAN 197 __rev r4, r4 198 __rev r5, r5 199 __rev r6, r6 200 __rev r7, r7 201#endif 202 203 ldr out, [sp] 204 205 str r4, [out] 206 str r5, [out, #4] 207 str r6, [out, #8] 208 str r7, [out, #12] 209 210 pop {r3-r11, pc} 211 212 .align 3 213 .ltorg 214 .endm 215 216ENTRY(__aes_arm_encrypt) 217 do_crypt fround, crypto_ft_tab,, 2 218ENDPROC(__aes_arm_encrypt) 219 220 .align 5 221ENTRY(__aes_arm_decrypt) 222 do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0 223ENDPROC(__aes_arm_decrypt) 224 225 .section ".rodata", "a" 226 .align L1_CACHE_SHIFT 227 .type __aes_arm_inverse_sbox, %object 228__aes_arm_inverse_sbox: 229 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 230 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 231 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 232 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 233 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 234 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 235 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 236 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 237 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 238 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 239 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 240 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 241 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 242 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 243 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 244 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 245 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 246 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 247 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 248 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 249 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 250 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 251 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 252 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 253 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 254 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 255 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 256 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 257 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 258 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 259 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 260 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 261 .size __aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox 262