1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Scalar AES core transform 4 * 5 * Copyright (C) 2017 Linaro Ltd. 6 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> 7 */ 8 9#include <linux/linkage.h> 10#include <asm/assembler.h> 11#include <asm/cache.h> 12 13 .text 14 .align 5 15 16 rk .req r0 17 rounds .req r1 18 in .req r2 19 out .req r3 20 ttab .req ip 21 22 t0 .req lr 23 t1 .req r2 24 t2 .req r3 25 26 .macro __select, out, in, idx 27 .if __LINUX_ARM_ARCH__ < 7 28 and \out, \in, #0xff << (8 * \idx) 29 .else 30 ubfx \out, \in, #(8 * \idx), #8 31 .endif 32 .endm 33 34 .macro __load, out, in, idx, sz, op 35 .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 36 ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz] 37 .else 38 ldr\op \out, [ttab, \in, lsl #\sz] 39 .endif 40 .endm 41 42 .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr 43 __select \out0, \in0, 0 44 __select t0, \in1, 1 45 __load \out0, \out0, 0, \sz, \op 46 __load t0, t0, 1, \sz, \op 47 48 .if \enc 49 __select \out1, \in1, 0 50 __select t1, \in2, 1 51 .else 52 __select \out1, \in3, 0 53 __select t1, \in0, 1 54 .endif 55 __load \out1, \out1, 0, \sz, \op 56 __select t2, \in2, 2 57 __load t1, t1, 1, \sz, \op 58 __load t2, t2, 2, \sz, \op 59 60 eor \out0, \out0, t0, ror #24 61 62 __select t0, \in3, 3 63 .if \enc 64 __select \t3, \in3, 2 65 __select \t4, \in0, 3 66 .else 67 __select \t3, \in1, 2 68 __select \t4, \in2, 3 69 .endif 70 __load \t3, \t3, 2, \sz, \op 71 __load t0, t0, 3, \sz, \op 72 __load \t4, \t4, 3, \sz, \op 73 74 .ifnb \oldcpsr 75 /* 76 * This is the final round and we're done with all data-dependent table 77 * lookups, so we can safely re-enable interrupts. 78 */ 79 restore_irqs \oldcpsr 80 .endif 81 82 eor \out1, \out1, t1, ror #24 83 eor \out0, \out0, t2, ror #16 84 ldm rk!, {t1, t2} 85 eor \out1, \out1, \t3, ror #16 86 eor \out0, \out0, t0, ror #8 87 eor \out1, \out1, \t4, ror #8 88 eor \out0, \out0, t1 89 eor \out1, \out1, t2 90 .endm 91 92 .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr 93 __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op 94 __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr 95 .endm 96 97 .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr 98 __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op 99 __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr 100 .endm 101 102 .macro __rev, out, in 103 .if __LINUX_ARM_ARCH__ < 6 104 lsl t0, \in, #24 105 and t1, \in, #0xff00 106 and t2, \in, #0xff0000 107 orr \out, t0, \in, lsr #24 108 orr \out, \out, t1, lsl #8 109 orr \out, \out, t2, lsr #8 110 .else 111 rev \out, \in 112 .endif 113 .endm 114 115 .macro __adrl, out, sym, c 116 .if __LINUX_ARM_ARCH__ < 7 117 ldr\c \out, =\sym 118 .else 119 movw\c \out, #:lower16:\sym 120 movt\c \out, #:upper16:\sym 121 .endif 122 .endm 123 124 .macro do_crypt, round, ttab, ltab, bsz 125 push {r3-r11, lr} 126 127 // Load keys first, to reduce latency in case they're not cached yet. 128 ldm rk!, {r8-r11} 129 130 ldr r4, [in] 131 ldr r5, [in, #4] 132 ldr r6, [in, #8] 133 ldr r7, [in, #12] 134 135#ifdef CONFIG_CPU_BIG_ENDIAN 136 __rev r4, r4 137 __rev r5, r5 138 __rev r6, r6 139 __rev r7, r7 140#endif 141 142 eor r4, r4, r8 143 eor r5, r5, r9 144 eor r6, r6, r10 145 eor r7, r7, r11 146 147 __adrl ttab, \ttab 148 /* 149 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into 150 * L1 cache, assuming cacheline size >= 32. This is a hardening measure 151 * intended to make cache-timing attacks more difficult. They may not 152 * be fully prevented, however; see the paper 153 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf 154 * ("Cache-timing attacks on AES") for a discussion of the many 155 * difficulties involved in writing truly constant-time AES software. 156 */ 157 save_and_disable_irqs t0 158 .set i, 0 159 .rept 1024 / 128 160 ldr r8, [ttab, #i + 0] 161 ldr r9, [ttab, #i + 32] 162 ldr r10, [ttab, #i + 64] 163 ldr r11, [ttab, #i + 96] 164 .set i, i + 128 165 .endr 166 push {t0} // oldcpsr 167 168 tst rounds, #2 169 bne 1f 170 1710: \round r8, r9, r10, r11, r4, r5, r6, r7 172 \round r4, r5, r6, r7, r8, r9, r10, r11 173 1741: subs rounds, rounds, #4 175 \round r8, r9, r10, r11, r4, r5, r6, r7 176 bls 2f 177 \round r4, r5, r6, r7, r8, r9, r10, r11 178 b 0b 179 1802: .ifb \ltab 181 add ttab, ttab, #1 182 .else 183 __adrl ttab, \ltab 184 // Prefetch inverse S-box for final round; see explanation above 185 .set i, 0 186 .rept 256 / 64 187 ldr t0, [ttab, #i + 0] 188 ldr t1, [ttab, #i + 32] 189 .set i, i + 64 190 .endr 191 .endif 192 193 pop {rounds} // oldcpsr 194 \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds 195 196#ifdef CONFIG_CPU_BIG_ENDIAN 197 __rev r4, r4 198 __rev r5, r5 199 __rev r6, r6 200 __rev r7, r7 201#endif 202 203 ldr out, [sp] 204 205 str r4, [out] 206 str r5, [out, #4] 207 str r6, [out, #8] 208 str r7, [out, #12] 209 210 pop {r3-r11, pc} 211 212 .align 3 213 .ltorg 214 .endm 215 216ENTRY(__aes_arm_encrypt) 217 do_crypt fround, crypto_ft_tab,, 2 218ENDPROC(__aes_arm_encrypt) 219 220 .align 5 221ENTRY(__aes_arm_decrypt) 222 do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0 223ENDPROC(__aes_arm_decrypt) 224