1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 2cc477bf6SArd Biesheuvel/* 3cc477bf6SArd Biesheuvel * Bit sliced AES using NEON instructions 4cc477bf6SArd Biesheuvel * 5cc477bf6SArd Biesheuvel * Copyright (C) 2017 Linaro Ltd. 6cc477bf6SArd Biesheuvel * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> 7cc477bf6SArd Biesheuvel */ 8cc477bf6SArd Biesheuvel 9cc477bf6SArd Biesheuvel/* 10cc477bf6SArd Biesheuvel * The algorithm implemented here is described in detail by the paper 11cc477bf6SArd Biesheuvel * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and 12cc477bf6SArd Biesheuvel * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf) 13cc477bf6SArd Biesheuvel * 14cc477bf6SArd Biesheuvel * This implementation is based primarily on the OpenSSL implementation 15cc477bf6SArd Biesheuvel * for 32-bit ARM written by Andy Polyakov <appro@openssl.org> 16cc477bf6SArd Biesheuvel */ 17cc477bf6SArd Biesheuvel 18cc477bf6SArd Biesheuvel#include <linux/linkage.h> 19cc477bf6SArd Biesheuvel#include <asm/assembler.h> 20cc477bf6SArd Biesheuvel 21cc477bf6SArd Biesheuvel .text 22cc477bf6SArd Biesheuvel .fpu neon 23cc477bf6SArd Biesheuvel 24cc477bf6SArd Biesheuvel rounds .req ip 25cc477bf6SArd Biesheuvel bskey .req r4 26cc477bf6SArd Biesheuvel 27cc477bf6SArd Biesheuvel q0l .req d0 28cc477bf6SArd Biesheuvel q0h .req d1 29cc477bf6SArd Biesheuvel q1l .req d2 30cc477bf6SArd Biesheuvel q1h .req d3 31cc477bf6SArd Biesheuvel q2l .req d4 32cc477bf6SArd Biesheuvel q2h .req d5 33cc477bf6SArd Biesheuvel q3l .req d6 34cc477bf6SArd Biesheuvel q3h .req d7 35cc477bf6SArd Biesheuvel q4l .req d8 36cc477bf6SArd Biesheuvel q4h .req d9 37cc477bf6SArd Biesheuvel q5l .req d10 38cc477bf6SArd Biesheuvel q5h .req d11 39cc477bf6SArd Biesheuvel q6l .req d12 40cc477bf6SArd Biesheuvel q6h .req d13 41cc477bf6SArd Biesheuvel q7l .req d14 42cc477bf6SArd Biesheuvel q7h .req d15 43cc477bf6SArd Biesheuvel q8l .req d16 44cc477bf6SArd Biesheuvel q8h .req d17 45cc477bf6SArd Biesheuvel q9l .req d18 46cc477bf6SArd Biesheuvel q9h .req d19 47cc477bf6SArd Biesheuvel q10l .req d20 48cc477bf6SArd Biesheuvel q10h .req d21 49cc477bf6SArd Biesheuvel q11l .req d22 50cc477bf6SArd Biesheuvel q11h .req d23 51cc477bf6SArd Biesheuvel q12l .req d24 52cc477bf6SArd Biesheuvel q12h .req d25 53cc477bf6SArd Biesheuvel q13l .req d26 54cc477bf6SArd Biesheuvel q13h .req d27 55cc477bf6SArd Biesheuvel q14l .req d28 56cc477bf6SArd Biesheuvel q14h .req d29 57cc477bf6SArd Biesheuvel q15l .req d30 58cc477bf6SArd Biesheuvel q15h .req d31 59cc477bf6SArd Biesheuvel 60cc477bf6SArd Biesheuvel .macro __tbl, out, tbl, in, tmp 61cc477bf6SArd Biesheuvel .ifc \out, \tbl 62cc477bf6SArd Biesheuvel .ifb \tmp 63cc477bf6SArd Biesheuvel .error __tbl needs temp register if out == tbl 64cc477bf6SArd Biesheuvel .endif 65cc477bf6SArd Biesheuvel vmov \tmp, \out 66cc477bf6SArd Biesheuvel .endif 67cc477bf6SArd Biesheuvel vtbl.8 \out\()l, {\tbl}, \in\()l 68cc477bf6SArd Biesheuvel .ifc \out, \tbl 69cc477bf6SArd Biesheuvel vtbl.8 \out\()h, {\tmp}, \in\()h 70cc477bf6SArd Biesheuvel .else 71cc477bf6SArd Biesheuvel vtbl.8 \out\()h, {\tbl}, \in\()h 72cc477bf6SArd Biesheuvel .endif 73cc477bf6SArd Biesheuvel .endm 74cc477bf6SArd Biesheuvel 75cc477bf6SArd Biesheuvel .macro __ldr, out, sym 76cc477bf6SArd Biesheuvel vldr \out\()l, \sym 77cc477bf6SArd Biesheuvel vldr \out\()h, \sym + 8 78cc477bf6SArd Biesheuvel .endm 79cc477bf6SArd Biesheuvel 80cc477bf6SArd Biesheuvel .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 81cc477bf6SArd Biesheuvel veor \b2, \b2, \b1 82cc477bf6SArd Biesheuvel veor \b5, \b5, \b6 83cc477bf6SArd Biesheuvel veor \b3, \b3, \b0 84cc477bf6SArd Biesheuvel veor \b6, \b6, \b2 85cc477bf6SArd Biesheuvel veor \b5, \b5, \b0 86cc477bf6SArd Biesheuvel veor \b6, \b6, \b3 87cc477bf6SArd Biesheuvel veor \b3, \b3, \b7 88cc477bf6SArd Biesheuvel veor \b7, \b7, \b5 89cc477bf6SArd Biesheuvel veor \b3, \b3, \b4 90cc477bf6SArd Biesheuvel veor \b4, \b4, \b5 91cc477bf6SArd Biesheuvel veor \b2, \b2, \b7 92cc477bf6SArd Biesheuvel veor \b3, \b3, \b1 93cc477bf6SArd Biesheuvel veor \b1, \b1, \b5 94cc477bf6SArd Biesheuvel .endm 95cc477bf6SArd Biesheuvel 96cc477bf6SArd Biesheuvel .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 97cc477bf6SArd Biesheuvel veor \b0, \b0, \b6 98cc477bf6SArd Biesheuvel veor \b1, \b1, \b4 99cc477bf6SArd Biesheuvel veor \b4, \b4, \b6 100cc477bf6SArd Biesheuvel veor \b2, \b2, \b0 101cc477bf6SArd Biesheuvel veor \b6, \b6, \b1 102cc477bf6SArd Biesheuvel veor \b1, \b1, \b5 103cc477bf6SArd Biesheuvel veor \b5, \b5, \b3 104cc477bf6SArd Biesheuvel veor \b3, \b3, \b7 105cc477bf6SArd Biesheuvel veor \b7, \b7, \b5 106cc477bf6SArd Biesheuvel veor \b2, \b2, \b5 107cc477bf6SArd Biesheuvel veor \b4, \b4, \b7 108cc477bf6SArd Biesheuvel .endm 109cc477bf6SArd Biesheuvel 110cc477bf6SArd Biesheuvel .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5 111cc477bf6SArd Biesheuvel veor \b1, \b1, \b7 112cc477bf6SArd Biesheuvel veor \b4, \b4, \b7 113cc477bf6SArd Biesheuvel veor \b7, \b7, \b5 114cc477bf6SArd Biesheuvel veor \b1, \b1, \b3 115cc477bf6SArd Biesheuvel veor \b2, \b2, \b5 116cc477bf6SArd Biesheuvel veor \b3, \b3, \b7 117cc477bf6SArd Biesheuvel veor \b6, \b6, \b1 118cc477bf6SArd Biesheuvel veor \b2, \b2, \b0 119cc477bf6SArd Biesheuvel veor \b5, \b5, \b3 120cc477bf6SArd Biesheuvel veor \b4, \b4, \b6 121cc477bf6SArd Biesheuvel veor \b0, \b0, \b6 122cc477bf6SArd Biesheuvel veor \b1, \b1, \b4 123cc477bf6SArd Biesheuvel .endm 124cc477bf6SArd Biesheuvel 125cc477bf6SArd Biesheuvel .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2 126cc477bf6SArd Biesheuvel veor \b1, \b1, \b5 127cc477bf6SArd Biesheuvel veor \b2, \b2, \b7 128cc477bf6SArd Biesheuvel veor \b3, \b3, \b1 129cc477bf6SArd Biesheuvel veor \b4, \b4, \b5 130cc477bf6SArd Biesheuvel veor \b7, \b7, \b5 131cc477bf6SArd Biesheuvel veor \b3, \b3, \b4 132cc477bf6SArd Biesheuvel veor \b5, \b5, \b0 133cc477bf6SArd Biesheuvel veor \b3, \b3, \b7 134cc477bf6SArd Biesheuvel veor \b6, \b6, \b2 135cc477bf6SArd Biesheuvel veor \b2, \b2, \b1 136cc477bf6SArd Biesheuvel veor \b6, \b6, \b3 137cc477bf6SArd Biesheuvel veor \b3, \b3, \b0 138cc477bf6SArd Biesheuvel veor \b5, \b5, \b6 139cc477bf6SArd Biesheuvel .endm 140cc477bf6SArd Biesheuvel 141cc477bf6SArd Biesheuvel .macro mul_gf4, x0, x1, y0, y1, t0, t1 142cc477bf6SArd Biesheuvel veor \t0, \y0, \y1 143cc477bf6SArd Biesheuvel vand \t0, \t0, \x0 144cc477bf6SArd Biesheuvel veor \x0, \x0, \x1 145cc477bf6SArd Biesheuvel vand \t1, \x1, \y0 146cc477bf6SArd Biesheuvel vand \x0, \x0, \y1 147cc477bf6SArd Biesheuvel veor \x1, \t1, \t0 148cc477bf6SArd Biesheuvel veor \x0, \x0, \t1 149cc477bf6SArd Biesheuvel .endm 150cc477bf6SArd Biesheuvel 151cc477bf6SArd Biesheuvel .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1 152cc477bf6SArd Biesheuvel veor \t0, \y0, \y1 153cc477bf6SArd Biesheuvel veor \t1, \y2, \y3 154cc477bf6SArd Biesheuvel vand \t0, \t0, \x0 155cc477bf6SArd Biesheuvel vand \t1, \t1, \x2 156cc477bf6SArd Biesheuvel veor \x0, \x0, \x1 157cc477bf6SArd Biesheuvel veor \x2, \x2, \x3 158cc477bf6SArd Biesheuvel vand \x1, \x1, \y0 159cc477bf6SArd Biesheuvel vand \x3, \x3, \y2 160cc477bf6SArd Biesheuvel vand \x0, \x0, \y1 161cc477bf6SArd Biesheuvel vand \x2, \x2, \y3 162cc477bf6SArd Biesheuvel veor \x1, \x1, \x0 163cc477bf6SArd Biesheuvel veor \x2, \x2, \x3 164cc477bf6SArd Biesheuvel veor \x0, \x0, \t0 165cc477bf6SArd Biesheuvel veor \x3, \x3, \t1 166cc477bf6SArd Biesheuvel .endm 167cc477bf6SArd Biesheuvel 168cc477bf6SArd Biesheuvel .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \ 169cc477bf6SArd Biesheuvel y0, y1, y2, y3, t0, t1, t2, t3 170cc477bf6SArd Biesheuvel veor \t0, \x0, \x2 171cc477bf6SArd Biesheuvel veor \t1, \x1, \x3 172cc477bf6SArd Biesheuvel mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3 173cc477bf6SArd Biesheuvel veor \y0, \y0, \y2 174cc477bf6SArd Biesheuvel veor \y1, \y1, \y3 175cc477bf6SArd Biesheuvel mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2 176cc477bf6SArd Biesheuvel veor \x0, \x0, \t0 177cc477bf6SArd Biesheuvel veor \x2, \x2, \t0 178cc477bf6SArd Biesheuvel veor \x1, \x1, \t1 179cc477bf6SArd Biesheuvel veor \x3, \x3, \t1 180cc477bf6SArd Biesheuvel veor \t0, \x4, \x6 181cc477bf6SArd Biesheuvel veor \t1, \x5, \x7 182cc477bf6SArd Biesheuvel mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2 183cc477bf6SArd Biesheuvel veor \y0, \y0, \y2 184cc477bf6SArd Biesheuvel veor \y1, \y1, \y3 185cc477bf6SArd Biesheuvel mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3 186cc477bf6SArd Biesheuvel veor \x4, \x4, \t0 187cc477bf6SArd Biesheuvel veor \x6, \x6, \t0 188cc477bf6SArd Biesheuvel veor \x5, \x5, \t1 189cc477bf6SArd Biesheuvel veor \x7, \x7, \t1 190cc477bf6SArd Biesheuvel .endm 191cc477bf6SArd Biesheuvel 192cc477bf6SArd Biesheuvel .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \ 193cc477bf6SArd Biesheuvel t0, t1, t2, t3, s0, s1, s2, s3 194cc477bf6SArd Biesheuvel veor \t3, \x4, \x6 195cc477bf6SArd Biesheuvel veor \t0, \x5, \x7 196cc477bf6SArd Biesheuvel veor \t1, \x1, \x3 197cc477bf6SArd Biesheuvel veor \s1, \x7, \x6 198cc477bf6SArd Biesheuvel veor \s0, \x0, \x2 199cc477bf6SArd Biesheuvel veor \s3, \t3, \t0 200cc477bf6SArd Biesheuvel vorr \t2, \t0, \t1 201cc477bf6SArd Biesheuvel vand \s2, \t3, \s0 202cc477bf6SArd Biesheuvel vorr \t3, \t3, \s0 203cc477bf6SArd Biesheuvel veor \s0, \s0, \t1 204cc477bf6SArd Biesheuvel vand \t0, \t0, \t1 205cc477bf6SArd Biesheuvel veor \t1, \x3, \x2 206cc477bf6SArd Biesheuvel vand \s3, \s3, \s0 207cc477bf6SArd Biesheuvel vand \s1, \s1, \t1 208cc477bf6SArd Biesheuvel veor \t1, \x4, \x5 209cc477bf6SArd Biesheuvel veor \s0, \x1, \x0 210cc477bf6SArd Biesheuvel veor \t3, \t3, \s1 211cc477bf6SArd Biesheuvel veor \t2, \t2, \s1 212cc477bf6SArd Biesheuvel vand \s1, \t1, \s0 213cc477bf6SArd Biesheuvel vorr \t1, \t1, \s0 214cc477bf6SArd Biesheuvel veor \t3, \t3, \s3 215cc477bf6SArd Biesheuvel veor \t0, \t0, \s1 216cc477bf6SArd Biesheuvel veor \t2, \t2, \s2 217cc477bf6SArd Biesheuvel veor \t1, \t1, \s3 218cc477bf6SArd Biesheuvel veor \t0, \t0, \s2 219cc477bf6SArd Biesheuvel vand \s0, \x7, \x3 220cc477bf6SArd Biesheuvel veor \t1, \t1, \s2 221cc477bf6SArd Biesheuvel vand \s1, \x6, \x2 222cc477bf6SArd Biesheuvel vand \s2, \x5, \x1 223cc477bf6SArd Biesheuvel vorr \s3, \x4, \x0 224cc477bf6SArd Biesheuvel veor \t3, \t3, \s0 225cc477bf6SArd Biesheuvel veor \t1, \t1, \s2 226cc477bf6SArd Biesheuvel veor \s0, \t0, \s3 227cc477bf6SArd Biesheuvel veor \t2, \t2, \s1 228cc477bf6SArd Biesheuvel vand \s2, \t3, \t1 229cc477bf6SArd Biesheuvel veor \s1, \t2, \s2 230cc477bf6SArd Biesheuvel veor \s3, \s0, \s2 231cc477bf6SArd Biesheuvel vbsl \s1, \t1, \s0 232cc477bf6SArd Biesheuvel vmvn \t0, \s0 233cc477bf6SArd Biesheuvel vbsl \s0, \s1, \s3 234cc477bf6SArd Biesheuvel vbsl \t0, \s1, \s3 235cc477bf6SArd Biesheuvel vbsl \s3, \t3, \t2 236cc477bf6SArd Biesheuvel veor \t3, \t3, \t2 237cc477bf6SArd Biesheuvel vand \s2, \s0, \s3 238cc477bf6SArd Biesheuvel veor \t1, \t1, \t0 239cc477bf6SArd Biesheuvel veor \s2, \s2, \t3 240cc477bf6SArd Biesheuvel mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ 241cc477bf6SArd Biesheuvel \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 242cc477bf6SArd Biesheuvel .endm 243cc477bf6SArd Biesheuvel 244cc477bf6SArd Biesheuvel .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ 245cc477bf6SArd Biesheuvel t0, t1, t2, t3, s0, s1, s2, s3 246cc477bf6SArd Biesheuvel in_bs_ch \b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7 247cc477bf6SArd Biesheuvel inv_gf256 \b6, \b5, \b0, \b3, \b7, \b1, \b4, \b2, \ 248cc477bf6SArd Biesheuvel \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 249cc477bf6SArd Biesheuvel out_bs_ch \b7, \b1, \b4, \b2, \b6, \b5, \b0, \b3 250cc477bf6SArd Biesheuvel .endm 251cc477bf6SArd Biesheuvel 252cc477bf6SArd Biesheuvel .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ 253cc477bf6SArd Biesheuvel t0, t1, t2, t3, s0, s1, s2, s3 254cc477bf6SArd Biesheuvel inv_in_bs_ch \b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7 255cc477bf6SArd Biesheuvel inv_gf256 \b5, \b1, \b2, \b6, \b3, \b7, \b0, \b4, \ 256cc477bf6SArd Biesheuvel \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 257cc477bf6SArd Biesheuvel inv_out_bs_ch \b3, \b7, \b0, \b4, \b5, \b1, \b2, \b6 258cc477bf6SArd Biesheuvel .endm 259cc477bf6SArd Biesheuvel 260cc477bf6SArd Biesheuvel .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \ 261cc477bf6SArd Biesheuvel t0, t1, t2, t3, mask 262cc477bf6SArd Biesheuvel vld1.8 {\t0-\t1}, [bskey, :256]! 263cc477bf6SArd Biesheuvel veor \t0, \t0, \x0 264cc477bf6SArd Biesheuvel vld1.8 {\t2-\t3}, [bskey, :256]! 265cc477bf6SArd Biesheuvel veor \t1, \t1, \x1 266cc477bf6SArd Biesheuvel __tbl \x0, \t0, \mask 267cc477bf6SArd Biesheuvel veor \t2, \t2, \x2 268cc477bf6SArd Biesheuvel __tbl \x1, \t1, \mask 269cc477bf6SArd Biesheuvel vld1.8 {\t0-\t1}, [bskey, :256]! 270cc477bf6SArd Biesheuvel veor \t3, \t3, \x3 271cc477bf6SArd Biesheuvel __tbl \x2, \t2, \mask 272cc477bf6SArd Biesheuvel __tbl \x3, \t3, \mask 273cc477bf6SArd Biesheuvel vld1.8 {\t2-\t3}, [bskey, :256]! 274cc477bf6SArd Biesheuvel veor \t0, \t0, \x4 275cc477bf6SArd Biesheuvel veor \t1, \t1, \x5 276cc477bf6SArd Biesheuvel __tbl \x4, \t0, \mask 277cc477bf6SArd Biesheuvel veor \t2, \t2, \x6 278cc477bf6SArd Biesheuvel __tbl \x5, \t1, \mask 279cc477bf6SArd Biesheuvel veor \t3, \t3, \x7 280cc477bf6SArd Biesheuvel __tbl \x6, \t2, \mask 281cc477bf6SArd Biesheuvel __tbl \x7, \t3, \mask 282cc477bf6SArd Biesheuvel .endm 283cc477bf6SArd Biesheuvel 284cc477bf6SArd Biesheuvel .macro inv_shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \ 285cc477bf6SArd Biesheuvel t0, t1, t2, t3, mask 286cc477bf6SArd Biesheuvel __tbl \x0, \x0, \mask, \t0 287cc477bf6SArd Biesheuvel __tbl \x1, \x1, \mask, \t1 288cc477bf6SArd Biesheuvel __tbl \x2, \x2, \mask, \t2 289cc477bf6SArd Biesheuvel __tbl \x3, \x3, \mask, \t3 290cc477bf6SArd Biesheuvel __tbl \x4, \x4, \mask, \t0 291cc477bf6SArd Biesheuvel __tbl \x5, \x5, \mask, \t1 292cc477bf6SArd Biesheuvel __tbl \x6, \x6, \mask, \t2 293cc477bf6SArd Biesheuvel __tbl \x7, \x7, \mask, \t3 294cc477bf6SArd Biesheuvel .endm 295cc477bf6SArd Biesheuvel 296cc477bf6SArd Biesheuvel .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ 297cc477bf6SArd Biesheuvel t0, t1, t2, t3, t4, t5, t6, t7, inv 298cc477bf6SArd Biesheuvel vext.8 \t0, \x0, \x0, #12 299cc477bf6SArd Biesheuvel vext.8 \t1, \x1, \x1, #12 300cc477bf6SArd Biesheuvel veor \x0, \x0, \t0 301cc477bf6SArd Biesheuvel vext.8 \t2, \x2, \x2, #12 302cc477bf6SArd Biesheuvel veor \x1, \x1, \t1 303cc477bf6SArd Biesheuvel vext.8 \t3, \x3, \x3, #12 304cc477bf6SArd Biesheuvel veor \x2, \x2, \t2 305cc477bf6SArd Biesheuvel vext.8 \t4, \x4, \x4, #12 306cc477bf6SArd Biesheuvel veor \x3, \x3, \t3 307cc477bf6SArd Biesheuvel vext.8 \t5, \x5, \x5, #12 308cc477bf6SArd Biesheuvel veor \x4, \x4, \t4 309cc477bf6SArd Biesheuvel vext.8 \t6, \x6, \x6, #12 310cc477bf6SArd Biesheuvel veor \x5, \x5, \t5 311cc477bf6SArd Biesheuvel vext.8 \t7, \x7, \x7, #12 312cc477bf6SArd Biesheuvel veor \x6, \x6, \t6 313cc477bf6SArd Biesheuvel veor \t1, \t1, \x0 314cc477bf6SArd Biesheuvel veor.8 \x7, \x7, \t7 315cc477bf6SArd Biesheuvel vext.8 \x0, \x0, \x0, #8 316cc477bf6SArd Biesheuvel veor \t2, \t2, \x1 317cc477bf6SArd Biesheuvel veor \t0, \t0, \x7 318cc477bf6SArd Biesheuvel veor \t1, \t1, \x7 319cc477bf6SArd Biesheuvel vext.8 \x1, \x1, \x1, #8 320cc477bf6SArd Biesheuvel veor \t5, \t5, \x4 321cc477bf6SArd Biesheuvel veor \x0, \x0, \t0 322cc477bf6SArd Biesheuvel veor \t6, \t6, \x5 323cc477bf6SArd Biesheuvel veor \x1, \x1, \t1 324cc477bf6SArd Biesheuvel vext.8 \t0, \x4, \x4, #8 325cc477bf6SArd Biesheuvel veor \t4, \t4, \x3 326cc477bf6SArd Biesheuvel vext.8 \t1, \x5, \x5, #8 327cc477bf6SArd Biesheuvel veor \t7, \t7, \x6 328cc477bf6SArd Biesheuvel vext.8 \x4, \x3, \x3, #8 329cc477bf6SArd Biesheuvel veor \t3, \t3, \x2 330cc477bf6SArd Biesheuvel vext.8 \x5, \x7, \x7, #8 331cc477bf6SArd Biesheuvel veor \t4, \t4, \x7 332cc477bf6SArd Biesheuvel vext.8 \x3, \x6, \x6, #8 333cc477bf6SArd Biesheuvel veor \t3, \t3, \x7 334cc477bf6SArd Biesheuvel vext.8 \x6, \x2, \x2, #8 335cc477bf6SArd Biesheuvel veor \x7, \t1, \t5 336cc477bf6SArd Biesheuvel .ifb \inv 337cc477bf6SArd Biesheuvel veor \x2, \t0, \t4 338cc477bf6SArd Biesheuvel veor \x4, \x4, \t3 339cc477bf6SArd Biesheuvel veor \x5, \x5, \t7 340cc477bf6SArd Biesheuvel veor \x3, \x3, \t6 341cc477bf6SArd Biesheuvel veor \x6, \x6, \t2 342cc477bf6SArd Biesheuvel .else 343cc477bf6SArd Biesheuvel veor \t3, \t3, \x4 344cc477bf6SArd Biesheuvel veor \x5, \x5, \t7 345cc477bf6SArd Biesheuvel veor \x2, \x3, \t6 346cc477bf6SArd Biesheuvel veor \x3, \t0, \t4 347cc477bf6SArd Biesheuvel veor \x4, \x6, \t2 348cc477bf6SArd Biesheuvel vmov \x6, \t3 349cc477bf6SArd Biesheuvel .endif 350cc477bf6SArd Biesheuvel .endm 351cc477bf6SArd Biesheuvel 352cc477bf6SArd Biesheuvel .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ 353cc477bf6SArd Biesheuvel t0, t1, t2, t3, t4, t5, t6, t7 354cc477bf6SArd Biesheuvel vld1.8 {\t0-\t1}, [bskey, :256]! 355cc477bf6SArd Biesheuvel veor \x0, \x0, \t0 356cc477bf6SArd Biesheuvel vld1.8 {\t2-\t3}, [bskey, :256]! 357cc477bf6SArd Biesheuvel veor \x1, \x1, \t1 358cc477bf6SArd Biesheuvel vld1.8 {\t4-\t5}, [bskey, :256]! 359cc477bf6SArd Biesheuvel veor \x2, \x2, \t2 360cc477bf6SArd Biesheuvel vld1.8 {\t6-\t7}, [bskey, :256] 361cc477bf6SArd Biesheuvel sub bskey, bskey, #224 362cc477bf6SArd Biesheuvel veor \x3, \x3, \t3 363cc477bf6SArd Biesheuvel veor \x4, \x4, \t4 364cc477bf6SArd Biesheuvel veor \x5, \x5, \t5 365cc477bf6SArd Biesheuvel veor \x6, \x6, \t6 366cc477bf6SArd Biesheuvel veor \x7, \x7, \t7 367cc477bf6SArd Biesheuvel vext.8 \t0, \x0, \x0, #8 368cc477bf6SArd Biesheuvel vext.8 \t6, \x6, \x6, #8 369cc477bf6SArd Biesheuvel vext.8 \t7, \x7, \x7, #8 370cc477bf6SArd Biesheuvel veor \t0, \t0, \x0 371cc477bf6SArd Biesheuvel vext.8 \t1, \x1, \x1, #8 372cc477bf6SArd Biesheuvel veor \t6, \t6, \x6 373cc477bf6SArd Biesheuvel vext.8 \t2, \x2, \x2, #8 374cc477bf6SArd Biesheuvel veor \t7, \t7, \x7 375cc477bf6SArd Biesheuvel vext.8 \t3, \x3, \x3, #8 376cc477bf6SArd Biesheuvel veor \t1, \t1, \x1 377cc477bf6SArd Biesheuvel vext.8 \t4, \x4, \x4, #8 378cc477bf6SArd Biesheuvel veor \t2, \t2, \x2 379cc477bf6SArd Biesheuvel vext.8 \t5, \x5, \x5, #8 380cc477bf6SArd Biesheuvel veor \t3, \t3, \x3 381cc477bf6SArd Biesheuvel veor \t4, \t4, \x4 382cc477bf6SArd Biesheuvel veor \t5, \t5, \x5 383cc477bf6SArd Biesheuvel veor \x0, \x0, \t6 384cc477bf6SArd Biesheuvel veor \x1, \x1, \t6 385cc477bf6SArd Biesheuvel veor \x2, \x2, \t0 386cc477bf6SArd Biesheuvel veor \x4, \x4, \t2 387cc477bf6SArd Biesheuvel veor \x3, \x3, \t1 388cc477bf6SArd Biesheuvel veor \x1, \x1, \t7 389cc477bf6SArd Biesheuvel veor \x2, \x2, \t7 390cc477bf6SArd Biesheuvel veor \x4, \x4, \t6 391cc477bf6SArd Biesheuvel veor \x5, \x5, \t3 392cc477bf6SArd Biesheuvel veor \x3, \x3, \t6 393cc477bf6SArd Biesheuvel veor \x6, \x6, \t4 394cc477bf6SArd Biesheuvel veor \x4, \x4, \t7 395cc477bf6SArd Biesheuvel veor \x5, \x5, \t7 396cc477bf6SArd Biesheuvel veor \x7, \x7, \t5 397cc477bf6SArd Biesheuvel mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ 398cc477bf6SArd Biesheuvel \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1 399cc477bf6SArd Biesheuvel .endm 400cc477bf6SArd Biesheuvel 401cc477bf6SArd Biesheuvel .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1 402cc477bf6SArd Biesheuvel vshr.u64 \t0, \b0, #\n 403cc477bf6SArd Biesheuvel vshr.u64 \t1, \b1, #\n 404cc477bf6SArd Biesheuvel veor \t0, \t0, \a0 405cc477bf6SArd Biesheuvel veor \t1, \t1, \a1 406cc477bf6SArd Biesheuvel vand \t0, \t0, \mask 407cc477bf6SArd Biesheuvel vand \t1, \t1, \mask 408cc477bf6SArd Biesheuvel veor \a0, \a0, \t0 409cc477bf6SArd Biesheuvel vshl.s64 \t0, \t0, #\n 410cc477bf6SArd Biesheuvel veor \a1, \a1, \t1 411cc477bf6SArd Biesheuvel vshl.s64 \t1, \t1, #\n 412cc477bf6SArd Biesheuvel veor \b0, \b0, \t0 413cc477bf6SArd Biesheuvel veor \b1, \b1, \t1 414cc477bf6SArd Biesheuvel .endm 415cc477bf6SArd Biesheuvel 416cc477bf6SArd Biesheuvel .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3 417cc477bf6SArd Biesheuvel vmov.i8 \t0, #0x55 418cc477bf6SArd Biesheuvel vmov.i8 \t1, #0x33 419cc477bf6SArd Biesheuvel swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3 420cc477bf6SArd Biesheuvel swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3 421cc477bf6SArd Biesheuvel vmov.i8 \t0, #0x0f 422cc477bf6SArd Biesheuvel swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3 423cc477bf6SArd Biesheuvel swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3 424cc477bf6SArd Biesheuvel swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3 425cc477bf6SArd Biesheuvel swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3 426cc477bf6SArd Biesheuvel .endm 427cc477bf6SArd Biesheuvel 428cc477bf6SArd Biesheuvel .align 4 429cc477bf6SArd BiesheuvelM0: .quad 0x02060a0e03070b0f, 0x0004080c0105090d 430cc477bf6SArd Biesheuvel 431cc477bf6SArd Biesheuvel /* 432cc477bf6SArd Biesheuvel * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds) 433cc477bf6SArd Biesheuvel */ 434cc477bf6SArd BiesheuvelENTRY(aesbs_convert_key) 435cc477bf6SArd Biesheuvel vld1.32 {q7}, [r1]! // load round 0 key 436cc477bf6SArd Biesheuvel vld1.32 {q15}, [r1]! // load round 1 key 437cc477bf6SArd Biesheuvel 438cc477bf6SArd Biesheuvel vmov.i8 q8, #0x01 // bit masks 439cc477bf6SArd Biesheuvel vmov.i8 q9, #0x02 440cc477bf6SArd Biesheuvel vmov.i8 q10, #0x04 441cc477bf6SArd Biesheuvel vmov.i8 q11, #0x08 442cc477bf6SArd Biesheuvel vmov.i8 q12, #0x10 443cc477bf6SArd Biesheuvel vmov.i8 q13, #0x20 444cc477bf6SArd Biesheuvel __ldr q14, M0 445cc477bf6SArd Biesheuvel 446cc477bf6SArd Biesheuvel sub r2, r2, #1 447cc477bf6SArd Biesheuvel vst1.8 {q7}, [r0, :128]! // save round 0 key 448cc477bf6SArd Biesheuvel 449cc477bf6SArd Biesheuvel.Lkey_loop: 450cc477bf6SArd Biesheuvel __tbl q7, q15, q14 451cc477bf6SArd Biesheuvel vmov.i8 q6, #0x40 452cc477bf6SArd Biesheuvel vmov.i8 q15, #0x80 453cc477bf6SArd Biesheuvel 454cc477bf6SArd Biesheuvel vtst.8 q0, q7, q8 455cc477bf6SArd Biesheuvel vtst.8 q1, q7, q9 456cc477bf6SArd Biesheuvel vtst.8 q2, q7, q10 457cc477bf6SArd Biesheuvel vtst.8 q3, q7, q11 458cc477bf6SArd Biesheuvel vtst.8 q4, q7, q12 459cc477bf6SArd Biesheuvel vtst.8 q5, q7, q13 460cc477bf6SArd Biesheuvel vtst.8 q6, q7, q6 461cc477bf6SArd Biesheuvel vtst.8 q7, q7, q15 462cc477bf6SArd Biesheuvel vld1.32 {q15}, [r1]! // load next round key 463cc477bf6SArd Biesheuvel vmvn q0, q0 464cc477bf6SArd Biesheuvel vmvn q1, q1 465cc477bf6SArd Biesheuvel vmvn q5, q5 466cc477bf6SArd Biesheuvel vmvn q6, q6 467cc477bf6SArd Biesheuvel 468cc477bf6SArd Biesheuvel subs r2, r2, #1 469cc477bf6SArd Biesheuvel vst1.8 {q0-q1}, [r0, :256]! 470cc477bf6SArd Biesheuvel vst1.8 {q2-q3}, [r0, :256]! 471cc477bf6SArd Biesheuvel vst1.8 {q4-q5}, [r0, :256]! 472cc477bf6SArd Biesheuvel vst1.8 {q6-q7}, [r0, :256]! 473cc477bf6SArd Biesheuvel bne .Lkey_loop 474cc477bf6SArd Biesheuvel 475cc477bf6SArd Biesheuvel vmov.i8 q7, #0x63 // compose .L63 476cc477bf6SArd Biesheuvel veor q15, q15, q7 477cc477bf6SArd Biesheuvel vst1.8 {q15}, [r0, :128] 478cc477bf6SArd Biesheuvel bx lr 479cc477bf6SArd BiesheuvelENDPROC(aesbs_convert_key) 480cc477bf6SArd Biesheuvel 481cc477bf6SArd Biesheuvel .align 4 482cc477bf6SArd BiesheuvelM0SR: .quad 0x0a0e02060f03070b, 0x0004080c05090d01 483cc477bf6SArd Biesheuvel 484cc477bf6SArd Biesheuvelaesbs_encrypt8: 485cc477bf6SArd Biesheuvel vld1.8 {q9}, [bskey, :128]! // round 0 key 486cc477bf6SArd Biesheuvel __ldr q8, M0SR 487cc477bf6SArd Biesheuvel 488cc477bf6SArd Biesheuvel veor q10, q0, q9 // xor with round0 key 489cc477bf6SArd Biesheuvel veor q11, q1, q9 490cc477bf6SArd Biesheuvel __tbl q0, q10, q8 491cc477bf6SArd Biesheuvel veor q12, q2, q9 492cc477bf6SArd Biesheuvel __tbl q1, q11, q8 493cc477bf6SArd Biesheuvel veor q13, q3, q9 494cc477bf6SArd Biesheuvel __tbl q2, q12, q8 495cc477bf6SArd Biesheuvel veor q14, q4, q9 496cc477bf6SArd Biesheuvel __tbl q3, q13, q8 497cc477bf6SArd Biesheuvel veor q15, q5, q9 498cc477bf6SArd Biesheuvel __tbl q4, q14, q8 499cc477bf6SArd Biesheuvel veor q10, q6, q9 500cc477bf6SArd Biesheuvel __tbl q5, q15, q8 501cc477bf6SArd Biesheuvel veor q11, q7, q9 502cc477bf6SArd Biesheuvel __tbl q6, q10, q8 503cc477bf6SArd Biesheuvel __tbl q7, q11, q8 504cc477bf6SArd Biesheuvel 505cc477bf6SArd Biesheuvel bitslice q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11 506cc477bf6SArd Biesheuvel 507cc477bf6SArd Biesheuvel sub rounds, rounds, #1 508cc477bf6SArd Biesheuvel b .Lenc_sbox 509cc477bf6SArd Biesheuvel 510cc477bf6SArd Biesheuvel .align 5 511cc477bf6SArd BiesheuvelSR: .quad 0x0504070600030201, 0x0f0e0d0c0a09080b 512cc477bf6SArd BiesheuvelSRM0: .quad 0x0304090e00050a0f, 0x01060b0c0207080d 513cc477bf6SArd Biesheuvel 514cc477bf6SArd Biesheuvel.Lenc_last: 515cc477bf6SArd Biesheuvel __ldr q12, SRM0 516cc477bf6SArd Biesheuvel.Lenc_loop: 517cc477bf6SArd Biesheuvel shift_rows q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12 518cc477bf6SArd Biesheuvel.Lenc_sbox: 519cc477bf6SArd Biesheuvel sbox q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \ 520cc477bf6SArd Biesheuvel q13, q14, q15 521cc477bf6SArd Biesheuvel subs rounds, rounds, #1 522cc477bf6SArd Biesheuvel bcc .Lenc_done 523cc477bf6SArd Biesheuvel 524cc477bf6SArd Biesheuvel mix_cols q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11, q12, \ 525cc477bf6SArd Biesheuvel q13, q14, q15 526cc477bf6SArd Biesheuvel 527cc477bf6SArd Biesheuvel beq .Lenc_last 528cc477bf6SArd Biesheuvel __ldr q12, SR 529cc477bf6SArd Biesheuvel b .Lenc_loop 530cc477bf6SArd Biesheuvel 531cc477bf6SArd Biesheuvel.Lenc_done: 532cc477bf6SArd Biesheuvel vld1.8 {q12}, [bskey, :128] // last round key 533cc477bf6SArd Biesheuvel 534cc477bf6SArd Biesheuvel bitslice q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11 535cc477bf6SArd Biesheuvel 536cc477bf6SArd Biesheuvel veor q0, q0, q12 537cc477bf6SArd Biesheuvel veor q1, q1, q12 538cc477bf6SArd Biesheuvel veor q4, q4, q12 539cc477bf6SArd Biesheuvel veor q6, q6, q12 540cc477bf6SArd Biesheuvel veor q3, q3, q12 541cc477bf6SArd Biesheuvel veor q7, q7, q12 542cc477bf6SArd Biesheuvel veor q2, q2, q12 543cc477bf6SArd Biesheuvel veor q5, q5, q12 544cc477bf6SArd Biesheuvel bx lr 545cc477bf6SArd BiesheuvelENDPROC(aesbs_encrypt8) 546cc477bf6SArd Biesheuvel 547cc477bf6SArd Biesheuvel .align 4 548cc477bf6SArd BiesheuvelM0ISR: .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 549cc477bf6SArd Biesheuvel 550cc477bf6SArd Biesheuvelaesbs_decrypt8: 551cc477bf6SArd Biesheuvel add bskey, bskey, rounds, lsl #7 552cc477bf6SArd Biesheuvel sub bskey, bskey, #112 553cc477bf6SArd Biesheuvel vld1.8 {q9}, [bskey, :128] // round 0 key 554cc477bf6SArd Biesheuvel sub bskey, bskey, #128 555cc477bf6SArd Biesheuvel __ldr q8, M0ISR 556cc477bf6SArd Biesheuvel 557cc477bf6SArd Biesheuvel veor q10, q0, q9 // xor with round0 key 558cc477bf6SArd Biesheuvel veor q11, q1, q9 559cc477bf6SArd Biesheuvel __tbl q0, q10, q8 560cc477bf6SArd Biesheuvel veor q12, q2, q9 561cc477bf6SArd Biesheuvel __tbl q1, q11, q8 562cc477bf6SArd Biesheuvel veor q13, q3, q9 563cc477bf6SArd Biesheuvel __tbl q2, q12, q8 564cc477bf6SArd Biesheuvel veor q14, q4, q9 565cc477bf6SArd Biesheuvel __tbl q3, q13, q8 566cc477bf6SArd Biesheuvel veor q15, q5, q9 567cc477bf6SArd Biesheuvel __tbl q4, q14, q8 568cc477bf6SArd Biesheuvel veor q10, q6, q9 569cc477bf6SArd Biesheuvel __tbl q5, q15, q8 570cc477bf6SArd Biesheuvel veor q11, q7, q9 571cc477bf6SArd Biesheuvel __tbl q6, q10, q8 572cc477bf6SArd Biesheuvel __tbl q7, q11, q8 573cc477bf6SArd Biesheuvel 574cc477bf6SArd Biesheuvel bitslice q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11 575cc477bf6SArd Biesheuvel 576cc477bf6SArd Biesheuvel sub rounds, rounds, #1 577cc477bf6SArd Biesheuvel b .Ldec_sbox 578cc477bf6SArd Biesheuvel 579cc477bf6SArd Biesheuvel .align 5 580cc477bf6SArd BiesheuvelISR: .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 581cc477bf6SArd BiesheuvelISRM0: .quad 0x01040b0e0205080f, 0x0306090c00070a0d 582cc477bf6SArd Biesheuvel 583cc477bf6SArd Biesheuvel.Ldec_last: 584cc477bf6SArd Biesheuvel __ldr q12, ISRM0 585cc477bf6SArd Biesheuvel.Ldec_loop: 586cc477bf6SArd Biesheuvel inv_shift_rows q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12 587cc477bf6SArd Biesheuvel.Ldec_sbox: 588cc477bf6SArd Biesheuvel inv_sbox q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \ 589cc477bf6SArd Biesheuvel q13, q14, q15 590cc477bf6SArd Biesheuvel subs rounds, rounds, #1 591cc477bf6SArd Biesheuvel bcc .Ldec_done 592cc477bf6SArd Biesheuvel 593cc477bf6SArd Biesheuvel inv_mix_cols q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11, q12, \ 594cc477bf6SArd Biesheuvel q13, q14, q15 595cc477bf6SArd Biesheuvel 596cc477bf6SArd Biesheuvel beq .Ldec_last 597cc477bf6SArd Biesheuvel __ldr q12, ISR 598cc477bf6SArd Biesheuvel b .Ldec_loop 599cc477bf6SArd Biesheuvel 600cc477bf6SArd Biesheuvel.Ldec_done: 601cc477bf6SArd Biesheuvel add bskey, bskey, #112 602cc477bf6SArd Biesheuvel vld1.8 {q12}, [bskey, :128] // last round key 603cc477bf6SArd Biesheuvel 604cc477bf6SArd Biesheuvel bitslice q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11 605cc477bf6SArd Biesheuvel 606cc477bf6SArd Biesheuvel veor q0, q0, q12 607cc477bf6SArd Biesheuvel veor q1, q1, q12 608cc477bf6SArd Biesheuvel veor q6, q6, q12 609cc477bf6SArd Biesheuvel veor q4, q4, q12 610cc477bf6SArd Biesheuvel veor q2, q2, q12 611cc477bf6SArd Biesheuvel veor q7, q7, q12 612cc477bf6SArd Biesheuvel veor q3, q3, q12 613cc477bf6SArd Biesheuvel veor q5, q5, q12 614cc477bf6SArd Biesheuvel bx lr 615cc477bf6SArd BiesheuvelENDPROC(aesbs_decrypt8) 616cc477bf6SArd Biesheuvel 617cc477bf6SArd Biesheuvel /* 618cc477bf6SArd Biesheuvel * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 619cc477bf6SArd Biesheuvel * int blocks) 620cc477bf6SArd Biesheuvel * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 621cc477bf6SArd Biesheuvel * int blocks) 622cc477bf6SArd Biesheuvel */ 623cc477bf6SArd Biesheuvel .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 624cc477bf6SArd Biesheuvel push {r4-r6, lr} 625cc477bf6SArd Biesheuvel ldr r5, [sp, #16] // number of blocks 626cc477bf6SArd Biesheuvel 62745a4777eSArd Biesheuvel99: adr ip, 0f 628cc477bf6SArd Biesheuvel and lr, r5, #7 629cc477bf6SArd Biesheuvel cmp r5, #8 630cc477bf6SArd Biesheuvel sub ip, ip, lr, lsl #2 63145a4777eSArd Biesheuvel movlt pc, ip // computed goto if blocks < 8 632cc477bf6SArd Biesheuvel 633cc477bf6SArd Biesheuvel vld1.8 {q0}, [r1]! 634cc477bf6SArd Biesheuvel vld1.8 {q1}, [r1]! 635cc477bf6SArd Biesheuvel vld1.8 {q2}, [r1]! 636cc477bf6SArd Biesheuvel vld1.8 {q3}, [r1]! 637cc477bf6SArd Biesheuvel vld1.8 {q4}, [r1]! 638cc477bf6SArd Biesheuvel vld1.8 {q5}, [r1]! 639cc477bf6SArd Biesheuvel vld1.8 {q6}, [r1]! 640cc477bf6SArd Biesheuvel vld1.8 {q7}, [r1]! 641cc477bf6SArd Biesheuvel 642cc477bf6SArd Biesheuvel0: mov bskey, r2 643cc477bf6SArd Biesheuvel mov rounds, r3 644cc477bf6SArd Biesheuvel bl \do8 645cc477bf6SArd Biesheuvel 64645a4777eSArd Biesheuvel adr ip, 1f 647cc477bf6SArd Biesheuvel and lr, r5, #7 648cc477bf6SArd Biesheuvel cmp r5, #8 649cc477bf6SArd Biesheuvel sub ip, ip, lr, lsl #2 65045a4777eSArd Biesheuvel movlt pc, ip // computed goto if blocks < 8 651cc477bf6SArd Biesheuvel 652cc477bf6SArd Biesheuvel vst1.8 {\o0}, [r0]! 653cc477bf6SArd Biesheuvel vst1.8 {\o1}, [r0]! 654cc477bf6SArd Biesheuvel vst1.8 {\o2}, [r0]! 655cc477bf6SArd Biesheuvel vst1.8 {\o3}, [r0]! 656cc477bf6SArd Biesheuvel vst1.8 {\o4}, [r0]! 657cc477bf6SArd Biesheuvel vst1.8 {\o5}, [r0]! 658cc477bf6SArd Biesheuvel vst1.8 {\o6}, [r0]! 659cc477bf6SArd Biesheuvel vst1.8 {\o7}, [r0]! 660cc477bf6SArd Biesheuvel 661cc477bf6SArd Biesheuvel1: subs r5, r5, #8 662cc477bf6SArd Biesheuvel bgt 99b 663cc477bf6SArd Biesheuvel 664cc477bf6SArd Biesheuvel pop {r4-r6, pc} 665cc477bf6SArd Biesheuvel .endm 666cc477bf6SArd Biesheuvel 667cc477bf6SArd Biesheuvel .align 4 668cc477bf6SArd BiesheuvelENTRY(aesbs_ecb_encrypt) 669cc477bf6SArd Biesheuvel __ecb_crypt aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5 670cc477bf6SArd BiesheuvelENDPROC(aesbs_ecb_encrypt) 671cc477bf6SArd Biesheuvel 672cc477bf6SArd Biesheuvel .align 4 673cc477bf6SArd BiesheuvelENTRY(aesbs_ecb_decrypt) 674cc477bf6SArd Biesheuvel __ecb_crypt aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5 675cc477bf6SArd BiesheuvelENDPROC(aesbs_ecb_decrypt) 676cc477bf6SArd Biesheuvel 677cc477bf6SArd Biesheuvel /* 678cc477bf6SArd Biesheuvel * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], 679cc477bf6SArd Biesheuvel * int rounds, int blocks, u8 iv[]) 680cc477bf6SArd Biesheuvel */ 681cc477bf6SArd Biesheuvel .align 4 682cc477bf6SArd BiesheuvelENTRY(aesbs_cbc_decrypt) 683cc477bf6SArd Biesheuvel mov ip, sp 684cc477bf6SArd Biesheuvel push {r4-r6, lr} 685cc477bf6SArd Biesheuvel ldm ip, {r5-r6} // load args 4-5 686cc477bf6SArd Biesheuvel 68745a4777eSArd Biesheuvel99: adr ip, 0f 688cc477bf6SArd Biesheuvel and lr, r5, #7 689cc477bf6SArd Biesheuvel cmp r5, #8 690cc477bf6SArd Biesheuvel sub ip, ip, lr, lsl #2 691cc477bf6SArd Biesheuvel mov lr, r1 69245a4777eSArd Biesheuvel movlt pc, ip // computed goto if blocks < 8 693cc477bf6SArd Biesheuvel 694cc477bf6SArd Biesheuvel vld1.8 {q0}, [lr]! 695cc477bf6SArd Biesheuvel vld1.8 {q1}, [lr]! 696cc477bf6SArd Biesheuvel vld1.8 {q2}, [lr]! 697cc477bf6SArd Biesheuvel vld1.8 {q3}, [lr]! 698cc477bf6SArd Biesheuvel vld1.8 {q4}, [lr]! 699cc477bf6SArd Biesheuvel vld1.8 {q5}, [lr]! 700cc477bf6SArd Biesheuvel vld1.8 {q6}, [lr]! 701cc477bf6SArd Biesheuvel vld1.8 {q7}, [lr] 702cc477bf6SArd Biesheuvel 703cc477bf6SArd Biesheuvel0: mov bskey, r2 704cc477bf6SArd Biesheuvel mov rounds, r3 705cc477bf6SArd Biesheuvel bl aesbs_decrypt8 706cc477bf6SArd Biesheuvel 707cc477bf6SArd Biesheuvel vld1.8 {q8}, [r6] 708cc477bf6SArd Biesheuvel vmov q9, q8 709cc477bf6SArd Biesheuvel vmov q10, q8 710cc477bf6SArd Biesheuvel vmov q11, q8 711cc477bf6SArd Biesheuvel vmov q12, q8 712cc477bf6SArd Biesheuvel vmov q13, q8 713cc477bf6SArd Biesheuvel vmov q14, q8 714cc477bf6SArd Biesheuvel vmov q15, q8 715cc477bf6SArd Biesheuvel 71645a4777eSArd Biesheuvel adr ip, 1f 717cc477bf6SArd Biesheuvel and lr, r5, #7 718cc477bf6SArd Biesheuvel cmp r5, #8 719cc477bf6SArd Biesheuvel sub ip, ip, lr, lsl #2 72045a4777eSArd Biesheuvel movlt pc, ip // computed goto if blocks < 8 721cc477bf6SArd Biesheuvel 722cc477bf6SArd Biesheuvel vld1.8 {q9}, [r1]! 723cc477bf6SArd Biesheuvel vld1.8 {q10}, [r1]! 724cc477bf6SArd Biesheuvel vld1.8 {q11}, [r1]! 725cc477bf6SArd Biesheuvel vld1.8 {q12}, [r1]! 726cc477bf6SArd Biesheuvel vld1.8 {q13}, [r1]! 727cc477bf6SArd Biesheuvel vld1.8 {q14}, [r1]! 728cc477bf6SArd Biesheuvel vld1.8 {q15}, [r1]! 729cc477bf6SArd Biesheuvel W(nop) 730cc477bf6SArd Biesheuvel 73145a4777eSArd Biesheuvel1: adr ip, 2f 732cc477bf6SArd Biesheuvel sub ip, ip, lr, lsl #3 73345a4777eSArd Biesheuvel movlt pc, ip // computed goto if blocks < 8 734cc477bf6SArd Biesheuvel 735cc477bf6SArd Biesheuvel veor q0, q0, q8 736cc477bf6SArd Biesheuvel vst1.8 {q0}, [r0]! 737cc477bf6SArd Biesheuvel veor q1, q1, q9 738cc477bf6SArd Biesheuvel vst1.8 {q1}, [r0]! 739cc477bf6SArd Biesheuvel veor q6, q6, q10 740cc477bf6SArd Biesheuvel vst1.8 {q6}, [r0]! 741cc477bf6SArd Biesheuvel veor q4, q4, q11 742cc477bf6SArd Biesheuvel vst1.8 {q4}, [r0]! 743cc477bf6SArd Biesheuvel veor q2, q2, q12 744cc477bf6SArd Biesheuvel vst1.8 {q2}, [r0]! 745cc477bf6SArd Biesheuvel veor q7, q7, q13 746cc477bf6SArd Biesheuvel vst1.8 {q7}, [r0]! 747cc477bf6SArd Biesheuvel veor q3, q3, q14 748cc477bf6SArd Biesheuvel vst1.8 {q3}, [r0]! 749cc477bf6SArd Biesheuvel veor q5, q5, q15 750cc477bf6SArd Biesheuvel vld1.8 {q8}, [r1]! // load next round's iv 751cc477bf6SArd Biesheuvel2: vst1.8 {q5}, [r0]! 752cc477bf6SArd Biesheuvel 753cc477bf6SArd Biesheuvel subs r5, r5, #8 754cc477bf6SArd Biesheuvel vst1.8 {q8}, [r6] // store next round's iv 755cc477bf6SArd Biesheuvel bgt 99b 756cc477bf6SArd Biesheuvel 757cc477bf6SArd Biesheuvel pop {r4-r6, pc} 758cc477bf6SArd BiesheuvelENDPROC(aesbs_cbc_decrypt) 759cc477bf6SArd Biesheuvel 760cc477bf6SArd Biesheuvel .macro next_ctr, q 761*c8bf850eSArd Biesheuvel vmov \q\()h, r9, r10 762cc477bf6SArd Biesheuvel adds r10, r10, #1 763cc477bf6SArd Biesheuvel adcs r9, r9, #0 764*c8bf850eSArd Biesheuvel vmov \q\()l, r7, r8 765cc477bf6SArd Biesheuvel adcs r8, r8, #0 766cc477bf6SArd Biesheuvel adc r7, r7, #0 767cc477bf6SArd Biesheuvel vrev32.8 \q, \q 768cc477bf6SArd Biesheuvel .endm 769cc477bf6SArd Biesheuvel 770cc477bf6SArd Biesheuvel /* 771cc477bf6SArd Biesheuvel * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], 772*c8bf850eSArd Biesheuvel * int rounds, int bytes, u8 ctr[]) 773cc477bf6SArd Biesheuvel */ 774cc477bf6SArd BiesheuvelENTRY(aesbs_ctr_encrypt) 775cc477bf6SArd Biesheuvel mov ip, sp 776cc477bf6SArd Biesheuvel push {r4-r10, lr} 777cc477bf6SArd Biesheuvel 778*c8bf850eSArd Biesheuvel ldm ip, {r5, r6} // load args 4-5 779cc477bf6SArd Biesheuvel vld1.8 {q0}, [r6] // load counter 780cc477bf6SArd Biesheuvel vrev32.8 q1, q0 781cc477bf6SArd Biesheuvel vmov r9, r10, d3 782cc477bf6SArd Biesheuvel vmov r7, r8, d2 783cc477bf6SArd Biesheuvel 784cc477bf6SArd Biesheuvel adds r10, r10, #1 785cc477bf6SArd Biesheuvel adcs r9, r9, #0 786cc477bf6SArd Biesheuvel adcs r8, r8, #0 787cc477bf6SArd Biesheuvel adc r7, r7, #0 788cc477bf6SArd Biesheuvel 789cc477bf6SArd Biesheuvel99: vmov q1, q0 790cc477bf6SArd Biesheuvel sub lr, r5, #1 791*c8bf850eSArd Biesheuvel vmov q2, q0 792*c8bf850eSArd Biesheuvel adr ip, 0f 793*c8bf850eSArd Biesheuvel vmov q3, q0 794*c8bf850eSArd Biesheuvel and lr, lr, #112 795*c8bf850eSArd Biesheuvel vmov q4, q0 796*c8bf850eSArd Biesheuvel cmp r5, #112 797*c8bf850eSArd Biesheuvel vmov q5, q0 798*c8bf850eSArd Biesheuvel sub ip, ip, lr, lsl #1 799*c8bf850eSArd Biesheuvel vmov q6, q0 800*c8bf850eSArd Biesheuvel add ip, ip, lr, lsr #2 801*c8bf850eSArd Biesheuvel vmov q7, q0 802*c8bf850eSArd Biesheuvel movle pc, ip // computed goto if bytes < 112 803cc477bf6SArd Biesheuvel 804cc477bf6SArd Biesheuvel next_ctr q1 805cc477bf6SArd Biesheuvel next_ctr q2 806cc477bf6SArd Biesheuvel next_ctr q3 807cc477bf6SArd Biesheuvel next_ctr q4 808cc477bf6SArd Biesheuvel next_ctr q5 809cc477bf6SArd Biesheuvel next_ctr q6 810cc477bf6SArd Biesheuvel next_ctr q7 811cc477bf6SArd Biesheuvel 812cc477bf6SArd Biesheuvel0: mov bskey, r2 813cc477bf6SArd Biesheuvel mov rounds, r3 814cc477bf6SArd Biesheuvel bl aesbs_encrypt8 815cc477bf6SArd Biesheuvel 81645a4777eSArd Biesheuvel adr ip, 1f 817*c8bf850eSArd Biesheuvel sub lr, r5, #1 818*c8bf850eSArd Biesheuvel cmp r5, #128 819*c8bf850eSArd Biesheuvel bic lr, lr, #15 820*c8bf850eSArd Biesheuvel ands r4, r5, #15 // preserves C flag 821*c8bf850eSArd Biesheuvel teqcs r5, r5 // set Z flag if not last iteration 822*c8bf850eSArd Biesheuvel sub ip, ip, lr, lsr #2 823*c8bf850eSArd Biesheuvel rsb r4, r4, #16 824*c8bf850eSArd Biesheuvel movcc pc, ip // computed goto if bytes < 128 825cc477bf6SArd Biesheuvel 826cc477bf6SArd Biesheuvel vld1.8 {q8}, [r1]! 827cc477bf6SArd Biesheuvel vld1.8 {q9}, [r1]! 828cc477bf6SArd Biesheuvel vld1.8 {q10}, [r1]! 829cc477bf6SArd Biesheuvel vld1.8 {q11}, [r1]! 830cc477bf6SArd Biesheuvel vld1.8 {q12}, [r1]! 831cc477bf6SArd Biesheuvel vld1.8 {q13}, [r1]! 832cc477bf6SArd Biesheuvel vld1.8 {q14}, [r1]! 833*c8bf850eSArd Biesheuvel1: subne r1, r1, r4 834cc477bf6SArd Biesheuvel vld1.8 {q15}, [r1]! 835cc477bf6SArd Biesheuvel 836*c8bf850eSArd Biesheuvel add ip, ip, #2f - 1b 837cc477bf6SArd Biesheuvel 838cc477bf6SArd Biesheuvel veor q0, q0, q8 839cc477bf6SArd Biesheuvel veor q1, q1, q9 840cc477bf6SArd Biesheuvel veor q4, q4, q10 841cc477bf6SArd Biesheuvel veor q6, q6, q11 842cc477bf6SArd Biesheuvel veor q3, q3, q12 843cc477bf6SArd Biesheuvel veor q7, q7, q13 844cc477bf6SArd Biesheuvel veor q2, q2, q14 845*c8bf850eSArd Biesheuvel bne 3f 846*c8bf850eSArd Biesheuvel veor q5, q5, q15 847*c8bf850eSArd Biesheuvel 848*c8bf850eSArd Biesheuvel movcc pc, ip // computed goto if bytes < 128 849*c8bf850eSArd Biesheuvel 850*c8bf850eSArd Biesheuvel vst1.8 {q0}, [r0]! 851*c8bf850eSArd Biesheuvel vst1.8 {q1}, [r0]! 852*c8bf850eSArd Biesheuvel vst1.8 {q4}, [r0]! 853*c8bf850eSArd Biesheuvel vst1.8 {q6}, [r0]! 854*c8bf850eSArd Biesheuvel vst1.8 {q3}, [r0]! 855*c8bf850eSArd Biesheuvel vst1.8 {q7}, [r0]! 856cc477bf6SArd Biesheuvel vst1.8 {q2}, [r0]! 857*c8bf850eSArd Biesheuvel2: subne r0, r0, r4 858cc477bf6SArd Biesheuvel vst1.8 {q5}, [r0]! 859cc477bf6SArd Biesheuvel 860*c8bf850eSArd Biesheuvel next_ctr q0 861cc477bf6SArd Biesheuvel 862*c8bf850eSArd Biesheuvel subs r5, r5, #128 863cc477bf6SArd Biesheuvel bgt 99b 864cc477bf6SArd Biesheuvel 8651a20b966SArd Biesheuvel vst1.8 {q0}, [r6] 866cc477bf6SArd Biesheuvel pop {r4-r10, pc} 8671a20b966SArd Biesheuvel 868*c8bf850eSArd Biesheuvel3: adr lr, .Lpermute_table + 16 869*c8bf850eSArd Biesheuvel cmp r5, #16 // Z flag remains cleared 870*c8bf850eSArd Biesheuvel sub lr, lr, r4 871*c8bf850eSArd Biesheuvel vld1.8 {q8-q9}, [lr] 872*c8bf850eSArd Biesheuvel vtbl.8 d16, {q5}, d16 873*c8bf850eSArd Biesheuvel vtbl.8 d17, {q5}, d17 874*c8bf850eSArd Biesheuvel veor q5, q8, q15 875*c8bf850eSArd Biesheuvel bcc 4f // have to reload prev if R5 < 16 876*c8bf850eSArd Biesheuvel vtbx.8 d10, {q2}, d18 877*c8bf850eSArd Biesheuvel vtbx.8 d11, {q2}, d19 878*c8bf850eSArd Biesheuvel mov pc, ip // branch back to VST sequence 879*c8bf850eSArd Biesheuvel 880*c8bf850eSArd Biesheuvel4: sub r0, r0, r4 881*c8bf850eSArd Biesheuvel vshr.s8 q9, q9, #7 // create mask for VBIF 882*c8bf850eSArd Biesheuvel vld1.8 {q8}, [r0] // reload 883*c8bf850eSArd Biesheuvel vbif q5, q8, q9 884*c8bf850eSArd Biesheuvel vst1.8 {q5}, [r0] 885*c8bf850eSArd Biesheuvel pop {r4-r10, pc} 886cc477bf6SArd BiesheuvelENDPROC(aesbs_ctr_encrypt) 887cc477bf6SArd Biesheuvel 888*c8bf850eSArd Biesheuvel .align 6 889*c8bf850eSArd Biesheuvel.Lpermute_table: 890*c8bf850eSArd Biesheuvel .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 891*c8bf850eSArd Biesheuvel .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 892*c8bf850eSArd Biesheuvel .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 893*c8bf850eSArd Biesheuvel .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f 894*c8bf850eSArd Biesheuvel .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 895*c8bf850eSArd Biesheuvel .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 896*c8bf850eSArd Biesheuvel 897cc477bf6SArd Biesheuvel .macro next_tweak, out, in, const, tmp 898cc477bf6SArd Biesheuvel vshr.s64 \tmp, \in, #63 899cc477bf6SArd Biesheuvel vand \tmp, \tmp, \const 900cc477bf6SArd Biesheuvel vadd.u64 \out, \in, \in 901cc477bf6SArd Biesheuvel vext.8 \tmp, \tmp, \tmp, #8 902cc477bf6SArd Biesheuvel veor \out, \out, \tmp 903cc477bf6SArd Biesheuvel .endm 904cc477bf6SArd Biesheuvel 905cc477bf6SArd Biesheuvel /* 906cc477bf6SArd Biesheuvel * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 9072ed8b790SArd Biesheuvel * int blocks, u8 iv[], int reorder_last_tweak) 908cc477bf6SArd Biesheuvel * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 9092ed8b790SArd Biesheuvel * int blocks, u8 iv[], int reorder_last_tweak) 910cc477bf6SArd Biesheuvel */ 911*c8bf850eSArd Biesheuvel .align 6 912cc477bf6SArd Biesheuvel__xts_prepare8: 913cc477bf6SArd Biesheuvel vld1.8 {q14}, [r7] // load iv 91438e73b3dSArd Biesheuvel vmov.i32 d30, #0x87 // compose tweak mask vector 91538e73b3dSArd Biesheuvel vmovl.u32 q15, d30 91638e73b3dSArd Biesheuvel vshr.u64 d30, d31, #7 917cc477bf6SArd Biesheuvel vmov q12, q14 918cc477bf6SArd Biesheuvel 91945a4777eSArd Biesheuvel adr ip, 0f 920cc477bf6SArd Biesheuvel and r4, r6, #7 921cc477bf6SArd Biesheuvel cmp r6, #8 922cc477bf6SArd Biesheuvel sub ip, ip, r4, lsl #5 923cc477bf6SArd Biesheuvel mov r4, sp 92445a4777eSArd Biesheuvel movlt pc, ip // computed goto if blocks < 8 925cc477bf6SArd Biesheuvel 926cc477bf6SArd Biesheuvel vld1.8 {q0}, [r1]! 927cc477bf6SArd Biesheuvel next_tweak q12, q14, q15, q13 928cc477bf6SArd Biesheuvel veor q0, q0, q14 929cc477bf6SArd Biesheuvel vst1.8 {q14}, [r4, :128]! 930cc477bf6SArd Biesheuvel 931cc477bf6SArd Biesheuvel vld1.8 {q1}, [r1]! 932cc477bf6SArd Biesheuvel next_tweak q14, q12, q15, q13 933cc477bf6SArd Biesheuvel veor q1, q1, q12 934cc477bf6SArd Biesheuvel vst1.8 {q12}, [r4, :128]! 935cc477bf6SArd Biesheuvel 936cc477bf6SArd Biesheuvel vld1.8 {q2}, [r1]! 937cc477bf6SArd Biesheuvel next_tweak q12, q14, q15, q13 938cc477bf6SArd Biesheuvel veor q2, q2, q14 939cc477bf6SArd Biesheuvel vst1.8 {q14}, [r4, :128]! 940cc477bf6SArd Biesheuvel 941cc477bf6SArd Biesheuvel vld1.8 {q3}, [r1]! 942cc477bf6SArd Biesheuvel next_tweak q14, q12, q15, q13 943cc477bf6SArd Biesheuvel veor q3, q3, q12 944cc477bf6SArd Biesheuvel vst1.8 {q12}, [r4, :128]! 945cc477bf6SArd Biesheuvel 946cc477bf6SArd Biesheuvel vld1.8 {q4}, [r1]! 947cc477bf6SArd Biesheuvel next_tweak q12, q14, q15, q13 948cc477bf6SArd Biesheuvel veor q4, q4, q14 949cc477bf6SArd Biesheuvel vst1.8 {q14}, [r4, :128]! 950cc477bf6SArd Biesheuvel 951cc477bf6SArd Biesheuvel vld1.8 {q5}, [r1]! 952cc477bf6SArd Biesheuvel next_tweak q14, q12, q15, q13 953cc477bf6SArd Biesheuvel veor q5, q5, q12 954cc477bf6SArd Biesheuvel vst1.8 {q12}, [r4, :128]! 955cc477bf6SArd Biesheuvel 956cc477bf6SArd Biesheuvel vld1.8 {q6}, [r1]! 957cc477bf6SArd Biesheuvel next_tweak q12, q14, q15, q13 958cc477bf6SArd Biesheuvel veor q6, q6, q14 959cc477bf6SArd Biesheuvel vst1.8 {q14}, [r4, :128]! 960cc477bf6SArd Biesheuvel 961cc477bf6SArd Biesheuvel vld1.8 {q7}, [r1]! 962cc477bf6SArd Biesheuvel next_tweak q14, q12, q15, q13 9632ed8b790SArd BiesheuvelTHUMB( itt le ) 9642ed8b790SArd Biesheuvel W(cmple) r8, #0 9652ed8b790SArd Biesheuvel ble 1f 9662ed8b790SArd Biesheuvel0: veor q7, q7, q12 967cc477bf6SArd Biesheuvel vst1.8 {q12}, [r4, :128] 968cc477bf6SArd Biesheuvel 9692ed8b790SArd Biesheuvel vst1.8 {q14}, [r7] // store next iv 970cc477bf6SArd Biesheuvel bx lr 9712ed8b790SArd Biesheuvel 9722ed8b790SArd Biesheuvel1: vswp q12, q14 9732ed8b790SArd Biesheuvel b 0b 974cc477bf6SArd BiesheuvelENDPROC(__xts_prepare8) 975cc477bf6SArd Biesheuvel 976cc477bf6SArd Biesheuvel .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 977cc477bf6SArd Biesheuvel push {r4-r8, lr} 978cc477bf6SArd Biesheuvel mov r5, sp // preserve sp 979cc477bf6SArd Biesheuvel ldrd r6, r7, [sp, #24] // get blocks and iv args 980be6d6993SArd Biesheuvel rsb r8, ip, #1 981cc477bf6SArd Biesheuvel sub ip, sp, #128 // make room for 8x tweak 982cc477bf6SArd Biesheuvel bic ip, ip, #0xf // align sp to 16 bytes 983cc477bf6SArd Biesheuvel mov sp, ip 984cc477bf6SArd Biesheuvel 985cc477bf6SArd Biesheuvel99: bl __xts_prepare8 986cc477bf6SArd Biesheuvel 987cc477bf6SArd Biesheuvel mov bskey, r2 988cc477bf6SArd Biesheuvel mov rounds, r3 989cc477bf6SArd Biesheuvel bl \do8 990cc477bf6SArd Biesheuvel 99145a4777eSArd Biesheuvel adr ip, 0f 992cc477bf6SArd Biesheuvel and lr, r6, #7 993cc477bf6SArd Biesheuvel cmp r6, #8 994cc477bf6SArd Biesheuvel sub ip, ip, lr, lsl #2 995cc477bf6SArd Biesheuvel mov r4, sp 99645a4777eSArd Biesheuvel movlt pc, ip // computed goto if blocks < 8 997cc477bf6SArd Biesheuvel 998cc477bf6SArd Biesheuvel vld1.8 {q8}, [r4, :128]! 999cc477bf6SArd Biesheuvel vld1.8 {q9}, [r4, :128]! 1000cc477bf6SArd Biesheuvel vld1.8 {q10}, [r4, :128]! 1001cc477bf6SArd Biesheuvel vld1.8 {q11}, [r4, :128]! 1002cc477bf6SArd Biesheuvel vld1.8 {q12}, [r4, :128]! 1003cc477bf6SArd Biesheuvel vld1.8 {q13}, [r4, :128]! 1004cc477bf6SArd Biesheuvel vld1.8 {q14}, [r4, :128]! 1005cc477bf6SArd Biesheuvel vld1.8 {q15}, [r4, :128] 1006cc477bf6SArd Biesheuvel 100745a4777eSArd Biesheuvel0: adr ip, 1f 1008cc477bf6SArd Biesheuvel sub ip, ip, lr, lsl #3 100945a4777eSArd Biesheuvel movlt pc, ip // computed goto if blocks < 8 1010cc477bf6SArd Biesheuvel 1011cc477bf6SArd Biesheuvel veor \o0, \o0, q8 1012cc477bf6SArd Biesheuvel vst1.8 {\o0}, [r0]! 1013cc477bf6SArd Biesheuvel veor \o1, \o1, q9 1014cc477bf6SArd Biesheuvel vst1.8 {\o1}, [r0]! 1015cc477bf6SArd Biesheuvel veor \o2, \o2, q10 1016cc477bf6SArd Biesheuvel vst1.8 {\o2}, [r0]! 1017cc477bf6SArd Biesheuvel veor \o3, \o3, q11 1018cc477bf6SArd Biesheuvel vst1.8 {\o3}, [r0]! 1019cc477bf6SArd Biesheuvel veor \o4, \o4, q12 1020cc477bf6SArd Biesheuvel vst1.8 {\o4}, [r0]! 1021cc477bf6SArd Biesheuvel veor \o5, \o5, q13 1022cc477bf6SArd Biesheuvel vst1.8 {\o5}, [r0]! 1023cc477bf6SArd Biesheuvel veor \o6, \o6, q14 1024cc477bf6SArd Biesheuvel vst1.8 {\o6}, [r0]! 1025cc477bf6SArd Biesheuvel veor \o7, \o7, q15 1026cc477bf6SArd Biesheuvel vst1.8 {\o7}, [r0]! 1027cc477bf6SArd Biesheuvel 1028cc477bf6SArd Biesheuvel1: subs r6, r6, #8 1029cc477bf6SArd Biesheuvel bgt 99b 1030cc477bf6SArd Biesheuvel 1031cc477bf6SArd Biesheuvel mov sp, r5 1032cc477bf6SArd Biesheuvel pop {r4-r8, pc} 1033cc477bf6SArd Biesheuvel .endm 1034cc477bf6SArd Biesheuvel 1035cc477bf6SArd BiesheuvelENTRY(aesbs_xts_encrypt) 1036be6d6993SArd Biesheuvel mov ip, #0 // never reorder final tweak 1037cc477bf6SArd Biesheuvel __xts_crypt aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5 1038cc477bf6SArd BiesheuvelENDPROC(aesbs_xts_encrypt) 1039cc477bf6SArd Biesheuvel 1040cc477bf6SArd BiesheuvelENTRY(aesbs_xts_decrypt) 1041be6d6993SArd Biesheuvel ldr ip, [sp, #8] // reorder final tweak? 1042cc477bf6SArd Biesheuvel __xts_crypt aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5 1043cc477bf6SArd BiesheuvelENDPROC(aesbs_xts_decrypt) 1044