1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Scalar AES core transform 4 * 5 * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 6 */ 7 8#include <linux/linkage.h> 9#include <asm/assembler.h> 10#include <asm/cache.h> 11 12 .text 13 14 rk .req x0 15 out .req x1 16 in .req x2 17 rounds .req x3 18 tt .req x2 19 20 .macro __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift 21 .ifc \op\shift, b0 22 ubfiz \reg0, \in0, #2, #8 23 ubfiz \reg1, \in1e, #2, #8 24 .else 25 ubfx \reg0, \in0, #\shift, #8 26 ubfx \reg1, \in1e, #\shift, #8 27 .endif 28 29 /* 30 * AArch64 cannot do byte size indexed loads from a table containing 31 * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a 32 * valid instruction. So perform the shift explicitly first for the 33 * high bytes (the low byte is shifted implicitly by using ubfiz rather 34 * than ubfx above) 35 */ 36 .ifnc \op, b 37 ldr \reg0, [tt, \reg0, uxtw #2] 38 ldr \reg1, [tt, \reg1, uxtw #2] 39 .else 40 .if \shift > 0 41 lsl \reg0, \reg0, #2 42 lsl \reg1, \reg1, #2 43 .endif 44 ldrb \reg0, [tt, \reg0, uxtw] 45 ldrb \reg1, [tt, \reg1, uxtw] 46 .endif 47 .endm 48 49 .macro __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift 50 ubfx \reg0, \in0, #\shift, #8 51 ubfx \reg1, \in1d, #\shift, #8 52 ldr\op \reg0, [tt, \reg0, uxtw #\sz] 53 ldr\op \reg1, [tt, \reg1, uxtw #\sz] 54 .endm 55 56 .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op 57 ldp \out0, \out1, [rk], #8 58 59 __pair\enc \sz, \op, w12, w13, \in0, \in1, \in3, 0 60 __pair\enc \sz, \op, w14, w15, \in1, \in2, \in0, 8 61 __pair\enc \sz, \op, w16, w17, \in2, \in3, \in1, 16 62 __pair\enc \sz, \op, \t0, \t1, \in3, \in0, \in2, 24 63 64 eor \out0, \out0, w12 65 eor \out1, \out1, w13 66 eor \out0, \out0, w14, ror #24 67 eor \out1, \out1, w15, ror #24 68 eor \out0, \out0, w16, ror #16 69 eor \out1, \out1, w17, ror #16 70 eor \out0, \out0, \t0, ror #8 71 eor \out1, \out1, \t1, ror #8 72 .endm 73 74 .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op 75 __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op 76 __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op 77 .endm 78 79 .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op 80 __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op 81 __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op 82 .endm 83 84 .macro do_crypt, round, ttab, ltab, bsz 85 ldp w4, w5, [in] 86 ldp w6, w7, [in, #8] 87 ldp w8, w9, [rk], #16 88 ldp w10, w11, [rk, #-8] 89 90CPU_BE( rev w4, w4 ) 91CPU_BE( rev w5, w5 ) 92CPU_BE( rev w6, w6 ) 93CPU_BE( rev w7, w7 ) 94 95 eor w4, w4, w8 96 eor w5, w5, w9 97 eor w6, w6, w10 98 eor w7, w7, w11 99 100 adr_l tt, \ttab 101 102 tbnz rounds, #1, 1f 103 1040: \round w8, w9, w10, w11, w4, w5, w6, w7 105 \round w4, w5, w6, w7, w8, w9, w10, w11 106 1071: subs rounds, rounds, #4 108 \round w8, w9, w10, w11, w4, w5, w6, w7 109 b.ls 3f 1102: \round w4, w5, w6, w7, w8, w9, w10, w11 111 b 0b 1123: adr_l tt, \ltab 113 \round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b 114 115CPU_BE( rev w4, w4 ) 116CPU_BE( rev w5, w5 ) 117CPU_BE( rev w6, w6 ) 118CPU_BE( rev w7, w7 ) 119 120 stp w4, w5, [out] 121 stp w6, w7, [out, #8] 122 ret 123 .endm 124 125ENTRY(__aes_arm64_encrypt) 126 do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2 127ENDPROC(__aes_arm64_encrypt) 128 129 .align 5 130ENTRY(__aes_arm64_decrypt) 131 do_crypt iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0 132ENDPROC(__aes_arm64_decrypt) 133 134 .section ".rodata", "a" 135 .align L1_CACHE_SHIFT 136 .type __aes_arm64_inverse_sbox, %object 137__aes_arm64_inverse_sbox: 138 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 139 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 140 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 141 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 142 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 143 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 144 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 145 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 146 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 147 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 148 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 149 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 150 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 151 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 152 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 153 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 154 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 155 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 156 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 157 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 158 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 159 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 160 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 161 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 162 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 163 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 164 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 165 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 166 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 167 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 168 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 169 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 170 .size __aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox 171