1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 281edb426SArd Biesheuvel/* 381edb426SArd Biesheuvel * Scalar AES core transform 481edb426SArd Biesheuvel * 581edb426SArd Biesheuvel * Copyright (C) 2017 Linaro Ltd. 681edb426SArd Biesheuvel * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> 781edb426SArd Biesheuvel */ 881edb426SArd Biesheuvel 981edb426SArd Biesheuvel#include <linux/linkage.h> 10913a3aa0SEric Biggers#include <asm/assembler.h> 110d149ce6SArd Biesheuvel#include <asm/cache.h> 1281edb426SArd Biesheuvel 1381edb426SArd Biesheuvel .text 1481edb426SArd Biesheuvel .align 5 1581edb426SArd Biesheuvel 1681edb426SArd Biesheuvel rk .req r0 1781edb426SArd Biesheuvel rounds .req r1 1881edb426SArd Biesheuvel in .req r2 1981edb426SArd Biesheuvel out .req r3 20658fa754SArd Biesheuvel ttab .req ip 2181edb426SArd Biesheuvel 2281edb426SArd Biesheuvel t0 .req lr 2381edb426SArd Biesheuvel t1 .req r2 2481edb426SArd Biesheuvel t2 .req r3 2581edb426SArd Biesheuvel 2681edb426SArd Biesheuvel .macro __select, out, in, idx 2781edb426SArd Biesheuvel .if __LINUX_ARM_ARCH__ < 7 2881edb426SArd Biesheuvel and \out, \in, #0xff << (8 * \idx) 2981edb426SArd Biesheuvel .else 3081edb426SArd Biesheuvel ubfx \out, \in, #(8 * \idx), #8 3181edb426SArd Biesheuvel .endif 3281edb426SArd Biesheuvel .endm 3381edb426SArd Biesheuvel 340d149ce6SArd Biesheuvel .macro __load, out, in, idx, sz, op 3581edb426SArd Biesheuvel .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 360d149ce6SArd Biesheuvel ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz] 3781edb426SArd Biesheuvel .else 380d149ce6SArd Biesheuvel ldr\op \out, [ttab, \in, lsl #\sz] 3981edb426SArd Biesheuvel .endif 4081edb426SArd Biesheuvel .endm 4181edb426SArd Biesheuvel 42913a3aa0SEric Biggers .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr 4381edb426SArd Biesheuvel __select \out0, \in0, 0 4481edb426SArd Biesheuvel __select t0, \in1, 1 450d149ce6SArd Biesheuvel __load \out0, \out0, 0, \sz, \op 460d149ce6SArd Biesheuvel __load t0, t0, 1, \sz, \op 4781edb426SArd Biesheuvel 4881edb426SArd Biesheuvel .if \enc 4981edb426SArd Biesheuvel __select \out1, \in1, 0 5081edb426SArd Biesheuvel __select t1, \in2, 1 5181edb426SArd Biesheuvel .else 5281edb426SArd Biesheuvel __select \out1, \in3, 0 5381edb426SArd Biesheuvel __select t1, \in0, 1 5481edb426SArd Biesheuvel .endif 550d149ce6SArd Biesheuvel __load \out1, \out1, 0, \sz, \op 5681edb426SArd Biesheuvel __select t2, \in2, 2 570d149ce6SArd Biesheuvel __load t1, t1, 1, \sz, \op 580d149ce6SArd Biesheuvel __load t2, t2, 2, \sz, \op 5981edb426SArd Biesheuvel 6081edb426SArd Biesheuvel eor \out0, \out0, t0, ror #24 6181edb426SArd Biesheuvel 6281edb426SArd Biesheuvel __select t0, \in3, 3 6381edb426SArd Biesheuvel .if \enc 6481edb426SArd Biesheuvel __select \t3, \in3, 2 6581edb426SArd Biesheuvel __select \t4, \in0, 3 6681edb426SArd Biesheuvel .else 6781edb426SArd Biesheuvel __select \t3, \in1, 2 6881edb426SArd Biesheuvel __select \t4, \in2, 3 6981edb426SArd Biesheuvel .endif 700d149ce6SArd Biesheuvel __load \t3, \t3, 2, \sz, \op 710d149ce6SArd Biesheuvel __load t0, t0, 3, \sz, \op 720d149ce6SArd Biesheuvel __load \t4, \t4, 3, \sz, \op 7381edb426SArd Biesheuvel 74913a3aa0SEric Biggers .ifnb \oldcpsr 75913a3aa0SEric Biggers /* 76913a3aa0SEric Biggers * This is the final round and we're done with all data-dependent table 77913a3aa0SEric Biggers * lookups, so we can safely re-enable interrupts. 78913a3aa0SEric Biggers */ 79913a3aa0SEric Biggers restore_irqs \oldcpsr 80913a3aa0SEric Biggers .endif 81913a3aa0SEric Biggers 8281edb426SArd Biesheuvel eor \out1, \out1, t1, ror #24 8381edb426SArd Biesheuvel eor \out0, \out0, t2, ror #16 8481edb426SArd Biesheuvel ldm rk!, {t1, t2} 8581edb426SArd Biesheuvel eor \out1, \out1, \t3, ror #16 8681edb426SArd Biesheuvel eor \out0, \out0, t0, ror #8 8781edb426SArd Biesheuvel eor \out1, \out1, \t4, ror #8 8881edb426SArd Biesheuvel eor \out0, \out0, t1 8981edb426SArd Biesheuvel eor \out1, \out1, t2 9081edb426SArd Biesheuvel .endm 9181edb426SArd Biesheuvel 92913a3aa0SEric Biggers .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr 930d149ce6SArd Biesheuvel __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op 94913a3aa0SEric Biggers __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr 9581edb426SArd Biesheuvel .endm 9681edb426SArd Biesheuvel 97913a3aa0SEric Biggers .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr 980d149ce6SArd Biesheuvel __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op 99913a3aa0SEric Biggers __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr 10081edb426SArd Biesheuvel .endm 10181edb426SArd Biesheuvel 1020d149ce6SArd Biesheuvel .macro do_crypt, round, ttab, ltab, bsz 10381edb426SArd Biesheuvel push {r3-r11, lr} 10481edb426SArd Biesheuvel 105913a3aa0SEric Biggers // Load keys first, to reduce latency in case they're not cached yet. 106913a3aa0SEric Biggers ldm rk!, {r8-r11} 107913a3aa0SEric Biggers 10881edb426SArd Biesheuvel ldr r4, [in] 10981edb426SArd Biesheuvel ldr r5, [in, #4] 11081edb426SArd Biesheuvel ldr r6, [in, #8] 11181edb426SArd Biesheuvel ldr r7, [in, #12] 11281edb426SArd Biesheuvel 11381edb426SArd Biesheuvel#ifdef CONFIG_CPU_BIG_ENDIAN 114*d5adb9d1SArd Biesheuvel rev_l r4, t0 115*d5adb9d1SArd Biesheuvel rev_l r5, t0 116*d5adb9d1SArd Biesheuvel rev_l r6, t0 117*d5adb9d1SArd Biesheuvel rev_l r7, t0 11881edb426SArd Biesheuvel#endif 11981edb426SArd Biesheuvel 12081edb426SArd Biesheuvel eor r4, r4, r8 12181edb426SArd Biesheuvel eor r5, r5, r9 12281edb426SArd Biesheuvel eor r6, r6, r10 12381edb426SArd Biesheuvel eor r7, r7, r11 12481edb426SArd Biesheuvel 125*d5adb9d1SArd Biesheuvel mov_l ttab, \ttab 126913a3aa0SEric Biggers /* 127913a3aa0SEric Biggers * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into 128913a3aa0SEric Biggers * L1 cache, assuming cacheline size >= 32. This is a hardening measure 129913a3aa0SEric Biggers * intended to make cache-timing attacks more difficult. They may not 130913a3aa0SEric Biggers * be fully prevented, however; see the paper 131913a3aa0SEric Biggers * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf 132913a3aa0SEric Biggers * ("Cache-timing attacks on AES") for a discussion of the many 133913a3aa0SEric Biggers * difficulties involved in writing truly constant-time AES software. 134913a3aa0SEric Biggers */ 135913a3aa0SEric Biggers save_and_disable_irqs t0 136913a3aa0SEric Biggers .set i, 0 137913a3aa0SEric Biggers .rept 1024 / 128 138913a3aa0SEric Biggers ldr r8, [ttab, #i + 0] 139913a3aa0SEric Biggers ldr r9, [ttab, #i + 32] 140913a3aa0SEric Biggers ldr r10, [ttab, #i + 64] 141913a3aa0SEric Biggers ldr r11, [ttab, #i + 96] 142913a3aa0SEric Biggers .set i, i + 128 143913a3aa0SEric Biggers .endr 144913a3aa0SEric Biggers push {t0} // oldcpsr 14581edb426SArd Biesheuvel 14681edb426SArd Biesheuvel tst rounds, #2 14781edb426SArd Biesheuvel bne 1f 14881edb426SArd Biesheuvel 14981edb426SArd Biesheuvel0: \round r8, r9, r10, r11, r4, r5, r6, r7 15081edb426SArd Biesheuvel \round r4, r5, r6, r7, r8, r9, r10, r11 15181edb426SArd Biesheuvel 15281edb426SArd Biesheuvel1: subs rounds, rounds, #4 15381edb426SArd Biesheuvel \round r8, r9, r10, r11, r4, r5, r6, r7 1540d149ce6SArd Biesheuvel bls 2f 15581edb426SArd Biesheuvel \round r4, r5, r6, r7, r8, r9, r10, r11 1560d149ce6SArd Biesheuvel b 0b 1570d149ce6SArd Biesheuvel 158913a3aa0SEric Biggers2: .ifb \ltab 159913a3aa0SEric Biggers add ttab, ttab, #1 160913a3aa0SEric Biggers .else 161*d5adb9d1SArd Biesheuvel mov_l ttab, \ltab 162913a3aa0SEric Biggers // Prefetch inverse S-box for final round; see explanation above 163913a3aa0SEric Biggers .set i, 0 164913a3aa0SEric Biggers .rept 256 / 64 165913a3aa0SEric Biggers ldr t0, [ttab, #i + 0] 166913a3aa0SEric Biggers ldr t1, [ttab, #i + 32] 167913a3aa0SEric Biggers .set i, i + 64 168913a3aa0SEric Biggers .endr 169913a3aa0SEric Biggers .endif 170913a3aa0SEric Biggers 171913a3aa0SEric Biggers pop {rounds} // oldcpsr 172913a3aa0SEric Biggers \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds 17381edb426SArd Biesheuvel 17481edb426SArd Biesheuvel#ifdef CONFIG_CPU_BIG_ENDIAN 175*d5adb9d1SArd Biesheuvel rev_l r4, t0 176*d5adb9d1SArd Biesheuvel rev_l r5, t0 177*d5adb9d1SArd Biesheuvel rev_l r6, t0 178*d5adb9d1SArd Biesheuvel rev_l r7, t0 17981edb426SArd Biesheuvel#endif 18081edb426SArd Biesheuvel 18181edb426SArd Biesheuvel ldr out, [sp] 18281edb426SArd Biesheuvel 18381edb426SArd Biesheuvel str r4, [out] 18481edb426SArd Biesheuvel str r5, [out, #4] 18581edb426SArd Biesheuvel str r6, [out, #8] 18681edb426SArd Biesheuvel str r7, [out, #12] 18781edb426SArd Biesheuvel 18881edb426SArd Biesheuvel pop {r3-r11, pc} 18981edb426SArd Biesheuvel 19081edb426SArd Biesheuvel .align 3 19181edb426SArd Biesheuvel .ltorg 19281edb426SArd Biesheuvel .endm 19381edb426SArd Biesheuvel 1944ff8b1ddSJinbum ParkENTRY(__aes_arm_encrypt) 195913a3aa0SEric Biggers do_crypt fround, crypto_ft_tab,, 2 1964ff8b1ddSJinbum ParkENDPROC(__aes_arm_encrypt) 1974ff8b1ddSJinbum Park 1984ff8b1ddSJinbum Park .align 5 1994ff8b1ddSJinbum ParkENTRY(__aes_arm_decrypt) 2008de6dd33SArd Biesheuvel do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0 2014ff8b1ddSJinbum ParkENDPROC(__aes_arm_decrypt) 202