1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 286464859SArd Biesheuvel/* 386464859SArd Biesheuvel * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions 486464859SArd Biesheuvel * 586464859SArd Biesheuvel * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> 686464859SArd Biesheuvel */ 786464859SArd Biesheuvel 886464859SArd Biesheuvel#include <linux/linkage.h> 986464859SArd Biesheuvel#include <asm/assembler.h> 1086464859SArd Biesheuvel 1186464859SArd Biesheuvel .text 1286464859SArd Biesheuvel .fpu crypto-neon-fp-armv8 1386464859SArd Biesheuvel .align 3 1486464859SArd Biesheuvel 1586464859SArd Biesheuvel .macro enc_round, state, key 1686464859SArd Biesheuvel aese.8 \state, \key 1786464859SArd Biesheuvel aesmc.8 \state, \state 1886464859SArd Biesheuvel .endm 1986464859SArd Biesheuvel 2086464859SArd Biesheuvel .macro dec_round, state, key 2186464859SArd Biesheuvel aesd.8 \state, \key 2286464859SArd Biesheuvel aesimc.8 \state, \state 2386464859SArd Biesheuvel .endm 2486464859SArd Biesheuvel 2586464859SArd Biesheuvel .macro enc_dround, key1, key2 2686464859SArd Biesheuvel enc_round q0, \key1 2786464859SArd Biesheuvel enc_round q0, \key2 2886464859SArd Biesheuvel .endm 2986464859SArd Biesheuvel 3086464859SArd Biesheuvel .macro dec_dround, key1, key2 3186464859SArd Biesheuvel dec_round q0, \key1 3286464859SArd Biesheuvel dec_round q0, \key2 3386464859SArd Biesheuvel .endm 3486464859SArd Biesheuvel 3586464859SArd Biesheuvel .macro enc_fround, key1, key2, key3 3686464859SArd Biesheuvel enc_round q0, \key1 3786464859SArd Biesheuvel aese.8 q0, \key2 3886464859SArd Biesheuvel veor q0, q0, \key3 3986464859SArd Biesheuvel .endm 4086464859SArd Biesheuvel 4186464859SArd Biesheuvel .macro dec_fround, key1, key2, key3 4286464859SArd Biesheuvel dec_round q0, \key1 4386464859SArd Biesheuvel aesd.8 q0, \key2 4486464859SArd Biesheuvel veor q0, q0, \key3 4586464859SArd Biesheuvel .endm 4686464859SArd Biesheuvel 4786464859SArd Biesheuvel .macro enc_dround_3x, key1, key2 4886464859SArd Biesheuvel enc_round q0, \key1 4986464859SArd Biesheuvel enc_round q1, \key1 5086464859SArd Biesheuvel enc_round q2, \key1 5186464859SArd Biesheuvel enc_round q0, \key2 5286464859SArd Biesheuvel enc_round q1, \key2 5386464859SArd Biesheuvel enc_round q2, \key2 5486464859SArd Biesheuvel .endm 5586464859SArd Biesheuvel 5686464859SArd Biesheuvel .macro dec_dround_3x, key1, key2 5786464859SArd Biesheuvel dec_round q0, \key1 5886464859SArd Biesheuvel dec_round q1, \key1 5986464859SArd Biesheuvel dec_round q2, \key1 6086464859SArd Biesheuvel dec_round q0, \key2 6186464859SArd Biesheuvel dec_round q1, \key2 6286464859SArd Biesheuvel dec_round q2, \key2 6386464859SArd Biesheuvel .endm 6486464859SArd Biesheuvel 6586464859SArd Biesheuvel .macro enc_fround_3x, key1, key2, key3 6686464859SArd Biesheuvel enc_round q0, \key1 6786464859SArd Biesheuvel enc_round q1, \key1 6886464859SArd Biesheuvel enc_round q2, \key1 6986464859SArd Biesheuvel aese.8 q0, \key2 7086464859SArd Biesheuvel aese.8 q1, \key2 7186464859SArd Biesheuvel aese.8 q2, \key2 7286464859SArd Biesheuvel veor q0, q0, \key3 7386464859SArd Biesheuvel veor q1, q1, \key3 7486464859SArd Biesheuvel veor q2, q2, \key3 7586464859SArd Biesheuvel .endm 7686464859SArd Biesheuvel 7786464859SArd Biesheuvel .macro dec_fround_3x, key1, key2, key3 7886464859SArd Biesheuvel dec_round q0, \key1 7986464859SArd Biesheuvel dec_round q1, \key1 8086464859SArd Biesheuvel dec_round q2, \key1 8186464859SArd Biesheuvel aesd.8 q0, \key2 8286464859SArd Biesheuvel aesd.8 q1, \key2 8386464859SArd Biesheuvel aesd.8 q2, \key2 8486464859SArd Biesheuvel veor q0, q0, \key3 8586464859SArd Biesheuvel veor q1, q1, \key3 8686464859SArd Biesheuvel veor q2, q2, \key3 8786464859SArd Biesheuvel .endm 8886464859SArd Biesheuvel 8986464859SArd Biesheuvel .macro do_block, dround, fround 9086464859SArd Biesheuvel cmp r3, #12 @ which key size? 9186464859SArd Biesheuvel vld1.8 {q10-q11}, [ip]! 9286464859SArd Biesheuvel \dround q8, q9 9386464859SArd Biesheuvel vld1.8 {q12-q13}, [ip]! 9486464859SArd Biesheuvel \dround q10, q11 9586464859SArd Biesheuvel vld1.8 {q10-q11}, [ip]! 9686464859SArd Biesheuvel \dround q12, q13 9786464859SArd Biesheuvel vld1.8 {q12-q13}, [ip]! 9886464859SArd Biesheuvel \dround q10, q11 9986464859SArd Biesheuvel blo 0f @ AES-128: 10 rounds 10086464859SArd Biesheuvel vld1.8 {q10-q11}, [ip]! 10186464859SArd Biesheuvel \dround q12, q13 1026499e8cfSArd Biesheuvel beq 1f @ AES-192: 12 rounds 10386464859SArd Biesheuvel vld1.8 {q12-q13}, [ip] 10486464859SArd Biesheuvel \dround q10, q11 10586464859SArd Biesheuvel0: \fround q12, q13, q14 10686464859SArd Biesheuvel bx lr 10786464859SArd Biesheuvel 1086499e8cfSArd Biesheuvel1: \fround q10, q11, q14 10986464859SArd Biesheuvel bx lr 11086464859SArd Biesheuvel .endm 11186464859SArd Biesheuvel 11286464859SArd Biesheuvel /* 11386464859SArd Biesheuvel * Internal, non-AAPCS compliant functions that implement the core AES 11486464859SArd Biesheuvel * transforms. These should preserve all registers except q0 - q2 and ip 11586464859SArd Biesheuvel * Arguments: 11686464859SArd Biesheuvel * q0 : first in/output block 11786464859SArd Biesheuvel * q1 : second in/output block (_3x version only) 11886464859SArd Biesheuvel * q2 : third in/output block (_3x version only) 11986464859SArd Biesheuvel * q8 : first round key 12086464859SArd Biesheuvel * q9 : secound round key 12186464859SArd Biesheuvel * q14 : final round key 1226499e8cfSArd Biesheuvel * r2 : address of round key array 12386464859SArd Biesheuvel * r3 : number of rounds 12486464859SArd Biesheuvel */ 12586464859SArd Biesheuvel .align 6 12686464859SArd Biesheuvelaes_encrypt: 12786464859SArd Biesheuvel add ip, r2, #32 @ 3rd round key 12886464859SArd Biesheuvel.Laes_encrypt_tweak: 12986464859SArd Biesheuvel do_block enc_dround, enc_fround 13086464859SArd BiesheuvelENDPROC(aes_encrypt) 13186464859SArd Biesheuvel 13286464859SArd Biesheuvel .align 6 13386464859SArd Biesheuvelaes_decrypt: 13486464859SArd Biesheuvel add ip, r2, #32 @ 3rd round key 13586464859SArd Biesheuvel do_block dec_dround, dec_fround 13686464859SArd BiesheuvelENDPROC(aes_decrypt) 13786464859SArd Biesheuvel 13886464859SArd Biesheuvel .align 6 13986464859SArd Biesheuvelaes_encrypt_3x: 14086464859SArd Biesheuvel add ip, r2, #32 @ 3rd round key 14186464859SArd Biesheuvel do_block enc_dround_3x, enc_fround_3x 14286464859SArd BiesheuvelENDPROC(aes_encrypt_3x) 14386464859SArd Biesheuvel 14486464859SArd Biesheuvel .align 6 14586464859SArd Biesheuvelaes_decrypt_3x: 14686464859SArd Biesheuvel add ip, r2, #32 @ 3rd round key 14786464859SArd Biesheuvel do_block dec_dround_3x, dec_fround_3x 14886464859SArd BiesheuvelENDPROC(aes_decrypt_3x) 14986464859SArd Biesheuvel 15086464859SArd Biesheuvel .macro prepare_key, rk, rounds 15186464859SArd Biesheuvel add ip, \rk, \rounds, lsl #4 15286464859SArd Biesheuvel vld1.8 {q8-q9}, [\rk] @ load first 2 round keys 15386464859SArd Biesheuvel vld1.8 {q14}, [ip] @ load last round key 15486464859SArd Biesheuvel .endm 15586464859SArd Biesheuvel 15686464859SArd Biesheuvel /* 15786464859SArd Biesheuvel * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 15886464859SArd Biesheuvel * int blocks) 15986464859SArd Biesheuvel * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 16086464859SArd Biesheuvel * int blocks) 16186464859SArd Biesheuvel */ 16286464859SArd BiesheuvelENTRY(ce_aes_ecb_encrypt) 16386464859SArd Biesheuvel push {r4, lr} 16486464859SArd Biesheuvel ldr r4, [sp, #8] 16586464859SArd Biesheuvel prepare_key r2, r3 16686464859SArd Biesheuvel.Lecbencloop3x: 16786464859SArd Biesheuvel subs r4, r4, #3 16886464859SArd Biesheuvel bmi .Lecbenc1x 1691465fb13SArd Biesheuvel vld1.8 {q0-q1}, [r1]! 1701465fb13SArd Biesheuvel vld1.8 {q2}, [r1]! 17186464859SArd Biesheuvel bl aes_encrypt_3x 1721465fb13SArd Biesheuvel vst1.8 {q0-q1}, [r0]! 1731465fb13SArd Biesheuvel vst1.8 {q2}, [r0]! 17486464859SArd Biesheuvel b .Lecbencloop3x 17586464859SArd Biesheuvel.Lecbenc1x: 17686464859SArd Biesheuvel adds r4, r4, #3 17786464859SArd Biesheuvel beq .Lecbencout 17886464859SArd Biesheuvel.Lecbencloop: 1791465fb13SArd Biesheuvel vld1.8 {q0}, [r1]! 18086464859SArd Biesheuvel bl aes_encrypt 1811465fb13SArd Biesheuvel vst1.8 {q0}, [r0]! 18286464859SArd Biesheuvel subs r4, r4, #1 18386464859SArd Biesheuvel bne .Lecbencloop 18486464859SArd Biesheuvel.Lecbencout: 18586464859SArd Biesheuvel pop {r4, pc} 18686464859SArd BiesheuvelENDPROC(ce_aes_ecb_encrypt) 18786464859SArd Biesheuvel 18886464859SArd BiesheuvelENTRY(ce_aes_ecb_decrypt) 18986464859SArd Biesheuvel push {r4, lr} 19086464859SArd Biesheuvel ldr r4, [sp, #8] 19186464859SArd Biesheuvel prepare_key r2, r3 19286464859SArd Biesheuvel.Lecbdecloop3x: 19386464859SArd Biesheuvel subs r4, r4, #3 19486464859SArd Biesheuvel bmi .Lecbdec1x 1951465fb13SArd Biesheuvel vld1.8 {q0-q1}, [r1]! 1961465fb13SArd Biesheuvel vld1.8 {q2}, [r1]! 19786464859SArd Biesheuvel bl aes_decrypt_3x 1981465fb13SArd Biesheuvel vst1.8 {q0-q1}, [r0]! 1991465fb13SArd Biesheuvel vst1.8 {q2}, [r0]! 20086464859SArd Biesheuvel b .Lecbdecloop3x 20186464859SArd Biesheuvel.Lecbdec1x: 20286464859SArd Biesheuvel adds r4, r4, #3 20386464859SArd Biesheuvel beq .Lecbdecout 20486464859SArd Biesheuvel.Lecbdecloop: 2051465fb13SArd Biesheuvel vld1.8 {q0}, [r1]! 20686464859SArd Biesheuvel bl aes_decrypt 2071465fb13SArd Biesheuvel vst1.8 {q0}, [r0]! 20886464859SArd Biesheuvel subs r4, r4, #1 20986464859SArd Biesheuvel bne .Lecbdecloop 21086464859SArd Biesheuvel.Lecbdecout: 21186464859SArd Biesheuvel pop {r4, pc} 21286464859SArd BiesheuvelENDPROC(ce_aes_ecb_decrypt) 21386464859SArd Biesheuvel 21486464859SArd Biesheuvel /* 21586464859SArd Biesheuvel * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 21686464859SArd Biesheuvel * int blocks, u8 iv[]) 21786464859SArd Biesheuvel * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 21886464859SArd Biesheuvel * int blocks, u8 iv[]) 21986464859SArd Biesheuvel */ 22086464859SArd BiesheuvelENTRY(ce_aes_cbc_encrypt) 22186464859SArd Biesheuvel push {r4-r6, lr} 22286464859SArd Biesheuvel ldrd r4, r5, [sp, #16] 22386464859SArd Biesheuvel vld1.8 {q0}, [r5] 22486464859SArd Biesheuvel prepare_key r2, r3 22586464859SArd Biesheuvel.Lcbcencloop: 2261465fb13SArd Biesheuvel vld1.8 {q1}, [r1]! @ get next pt block 22786464859SArd Biesheuvel veor q0, q0, q1 @ ..and xor with iv 22886464859SArd Biesheuvel bl aes_encrypt 2291465fb13SArd Biesheuvel vst1.8 {q0}, [r0]! 23086464859SArd Biesheuvel subs r4, r4, #1 23186464859SArd Biesheuvel bne .Lcbcencloop 23286464859SArd Biesheuvel vst1.8 {q0}, [r5] 23386464859SArd Biesheuvel pop {r4-r6, pc} 23486464859SArd BiesheuvelENDPROC(ce_aes_cbc_encrypt) 23586464859SArd Biesheuvel 23686464859SArd BiesheuvelENTRY(ce_aes_cbc_decrypt) 23786464859SArd Biesheuvel push {r4-r6, lr} 23886464859SArd Biesheuvel ldrd r4, r5, [sp, #16] 23986464859SArd Biesheuvel vld1.8 {q6}, [r5] @ keep iv in q6 24086464859SArd Biesheuvel prepare_key r2, r3 24186464859SArd Biesheuvel.Lcbcdecloop3x: 24286464859SArd Biesheuvel subs r4, r4, #3 24386464859SArd Biesheuvel bmi .Lcbcdec1x 2441465fb13SArd Biesheuvel vld1.8 {q0-q1}, [r1]! 2451465fb13SArd Biesheuvel vld1.8 {q2}, [r1]! 24686464859SArd Biesheuvel vmov q3, q0 24786464859SArd Biesheuvel vmov q4, q1 24886464859SArd Biesheuvel vmov q5, q2 24986464859SArd Biesheuvel bl aes_decrypt_3x 25086464859SArd Biesheuvel veor q0, q0, q6 25186464859SArd Biesheuvel veor q1, q1, q3 25286464859SArd Biesheuvel veor q2, q2, q4 25386464859SArd Biesheuvel vmov q6, q5 2541465fb13SArd Biesheuvel vst1.8 {q0-q1}, [r0]! 2551465fb13SArd Biesheuvel vst1.8 {q2}, [r0]! 25686464859SArd Biesheuvel b .Lcbcdecloop3x 25786464859SArd Biesheuvel.Lcbcdec1x: 25886464859SArd Biesheuvel adds r4, r4, #3 25986464859SArd Biesheuvel beq .Lcbcdecout 26086464859SArd Biesheuvel vmov q15, q14 @ preserve last round key 26186464859SArd Biesheuvel.Lcbcdecloop: 2621465fb13SArd Biesheuvel vld1.8 {q0}, [r1]! @ get next ct block 26386464859SArd Biesheuvel veor q14, q15, q6 @ combine prev ct with last key 26486464859SArd Biesheuvel vmov q6, q0 26586464859SArd Biesheuvel bl aes_decrypt 2661465fb13SArd Biesheuvel vst1.8 {q0}, [r0]! 26786464859SArd Biesheuvel subs r4, r4, #1 26886464859SArd Biesheuvel bne .Lcbcdecloop 26986464859SArd Biesheuvel.Lcbcdecout: 27086464859SArd Biesheuvel vst1.8 {q6}, [r5] @ keep iv in q6 27186464859SArd Biesheuvel pop {r4-r6, pc} 27286464859SArd BiesheuvelENDPROC(ce_aes_cbc_decrypt) 27386464859SArd Biesheuvel 27486464859SArd Biesheuvel /* 27586464859SArd Biesheuvel * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 27686464859SArd Biesheuvel * int blocks, u8 ctr[]) 27786464859SArd Biesheuvel */ 27886464859SArd BiesheuvelENTRY(ce_aes_ctr_encrypt) 27986464859SArd Biesheuvel push {r4-r6, lr} 28086464859SArd Biesheuvel ldrd r4, r5, [sp, #16] 28186464859SArd Biesheuvel vld1.8 {q6}, [r5] @ load ctr 28286464859SArd Biesheuvel prepare_key r2, r3 28386464859SArd Biesheuvel vmov r6, s27 @ keep swabbed ctr in r6 28486464859SArd Biesheuvel rev r6, r6 28586464859SArd Biesheuvel cmn r6, r4 @ 32 bit overflow? 28686464859SArd Biesheuvel bcs .Lctrloop 28786464859SArd Biesheuvel.Lctrloop3x: 28886464859SArd Biesheuvel subs r4, r4, #3 28986464859SArd Biesheuvel bmi .Lctr1x 29086464859SArd Biesheuvel add r6, r6, #1 29186464859SArd Biesheuvel vmov q0, q6 29286464859SArd Biesheuvel vmov q1, q6 29386464859SArd Biesheuvel rev ip, r6 29486464859SArd Biesheuvel add r6, r6, #1 29586464859SArd Biesheuvel vmov q2, q6 29686464859SArd Biesheuvel vmov s7, ip 29786464859SArd Biesheuvel rev ip, r6 29886464859SArd Biesheuvel add r6, r6, #1 29986464859SArd Biesheuvel vmov s11, ip 3001465fb13SArd Biesheuvel vld1.8 {q3-q4}, [r1]! 3011465fb13SArd Biesheuvel vld1.8 {q5}, [r1]! 30286464859SArd Biesheuvel bl aes_encrypt_3x 30386464859SArd Biesheuvel veor q0, q0, q3 30486464859SArd Biesheuvel veor q1, q1, q4 30586464859SArd Biesheuvel veor q2, q2, q5 30686464859SArd Biesheuvel rev ip, r6 3071465fb13SArd Biesheuvel vst1.8 {q0-q1}, [r0]! 3081465fb13SArd Biesheuvel vst1.8 {q2}, [r0]! 30986464859SArd Biesheuvel vmov s27, ip 31086464859SArd Biesheuvel b .Lctrloop3x 31186464859SArd Biesheuvel.Lctr1x: 31286464859SArd Biesheuvel adds r4, r4, #3 31386464859SArd Biesheuvel beq .Lctrout 31486464859SArd Biesheuvel.Lctrloop: 31586464859SArd Biesheuvel vmov q0, q6 31686464859SArd Biesheuvel bl aes_encrypt 31786464859SArd Biesheuvel 31886464859SArd Biesheuvel adds r6, r6, #1 @ increment BE ctr 31986464859SArd Biesheuvel rev ip, r6 32086464859SArd Biesheuvel vmov s27, ip 32186464859SArd Biesheuvel bcs .Lctrcarry 322511306b2SEric Biggers 323511306b2SEric Biggers.Lctrcarrydone: 324511306b2SEric Biggers subs r4, r4, #1 325511306b2SEric Biggers bmi .Lctrtailblock @ blocks < 0 means tail block 326511306b2SEric Biggers vld1.8 {q3}, [r1]! 327511306b2SEric Biggers veor q3, q0, q3 328511306b2SEric Biggers vst1.8 {q3}, [r0]! 32986464859SArd Biesheuvel bne .Lctrloop 330511306b2SEric Biggers 33186464859SArd Biesheuvel.Lctrout: 332511306b2SEric Biggers vst1.8 {q6}, [r5] @ return next CTR value 33386464859SArd Biesheuvel pop {r4-r6, pc} 33486464859SArd Biesheuvel 3351465fb13SArd Biesheuvel.Lctrtailblock: 336511306b2SEric Biggers vst1.8 {q0}, [r0, :64] @ return the key stream 337511306b2SEric Biggers b .Lctrout 33886464859SArd Biesheuvel 33986464859SArd Biesheuvel.Lctrcarry: 34086464859SArd Biesheuvel .irp sreg, s26, s25, s24 34186464859SArd Biesheuvel vmov ip, \sreg @ load next word of ctr 34286464859SArd Biesheuvel rev ip, ip @ ... to handle the carry 34386464859SArd Biesheuvel adds ip, ip, #1 34486464859SArd Biesheuvel rev ip, ip 34586464859SArd Biesheuvel vmov \sreg, ip 346511306b2SEric Biggers bcc .Lctrcarrydone 34786464859SArd Biesheuvel .endr 348511306b2SEric Biggers b .Lctrcarrydone 34986464859SArd BiesheuvelENDPROC(ce_aes_ctr_encrypt) 35086464859SArd Biesheuvel 35186464859SArd Biesheuvel /* 35286464859SArd Biesheuvel * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 35386464859SArd Biesheuvel * int blocks, u8 iv[], u8 const rk2[], int first) 35486464859SArd Biesheuvel * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 35586464859SArd Biesheuvel * int blocks, u8 iv[], u8 const rk2[], int first) 35686464859SArd Biesheuvel */ 35786464859SArd Biesheuvel 35886464859SArd Biesheuvel .macro next_tweak, out, in, const, tmp 35986464859SArd Biesheuvel vshr.s64 \tmp, \in, #63 36086464859SArd Biesheuvel vand \tmp, \tmp, \const 36186464859SArd Biesheuvel vadd.u64 \out, \in, \in 36286464859SArd Biesheuvel vext.8 \tmp, \tmp, \tmp, #8 36386464859SArd Biesheuvel veor \out, \out, \tmp 36486464859SArd Biesheuvel .endm 36586464859SArd Biesheuvel 36686464859SArd Biesheuvel .align 3 36786464859SArd Biesheuvel.Lxts_mul_x: 36886464859SArd Biesheuvel .quad 1, 0x87 36986464859SArd Biesheuvel 37086464859SArd Biesheuvelce_aes_xts_init: 37186464859SArd Biesheuvel vldr d14, .Lxts_mul_x 37286464859SArd Biesheuvel vldr d15, .Lxts_mul_x + 8 37386464859SArd Biesheuvel 37486464859SArd Biesheuvel ldrd r4, r5, [sp, #16] @ load args 37586464859SArd Biesheuvel ldr r6, [sp, #28] 37686464859SArd Biesheuvel vld1.8 {q0}, [r5] @ load iv 37786464859SArd Biesheuvel teq r6, #1 @ start of a block? 37886464859SArd Biesheuvel bxne lr 37986464859SArd Biesheuvel 38086464859SArd Biesheuvel @ Encrypt the IV in q0 with the second AES key. This should only 38186464859SArd Biesheuvel @ be done at the start of a block. 38286464859SArd Biesheuvel ldr r6, [sp, #24] @ load AES key 2 38386464859SArd Biesheuvel prepare_key r6, r3 38486464859SArd Biesheuvel add ip, r6, #32 @ 3rd round key of key 2 38586464859SArd Biesheuvel b .Laes_encrypt_tweak @ tail call 38686464859SArd BiesheuvelENDPROC(ce_aes_xts_init) 38786464859SArd Biesheuvel 38886464859SArd BiesheuvelENTRY(ce_aes_xts_encrypt) 38986464859SArd Biesheuvel push {r4-r6, lr} 39086464859SArd Biesheuvel 39186464859SArd Biesheuvel bl ce_aes_xts_init @ run shared prologue 39286464859SArd Biesheuvel prepare_key r2, r3 39386464859SArd Biesheuvel vmov q3, q0 39486464859SArd Biesheuvel 39586464859SArd Biesheuvel teq r6, #0 @ start of a block? 39686464859SArd Biesheuvel bne .Lxtsenc3x 39786464859SArd Biesheuvel 39886464859SArd Biesheuvel.Lxtsencloop3x: 39986464859SArd Biesheuvel next_tweak q3, q3, q7, q6 40086464859SArd Biesheuvel.Lxtsenc3x: 40186464859SArd Biesheuvel subs r4, r4, #3 40286464859SArd Biesheuvel bmi .Lxtsenc1x 4031465fb13SArd Biesheuvel vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks 4041465fb13SArd Biesheuvel vld1.8 {q2}, [r1]! 40586464859SArd Biesheuvel next_tweak q4, q3, q7, q6 40686464859SArd Biesheuvel veor q0, q0, q3 40786464859SArd Biesheuvel next_tweak q5, q4, q7, q6 40886464859SArd Biesheuvel veor q1, q1, q4 40986464859SArd Biesheuvel veor q2, q2, q5 41086464859SArd Biesheuvel bl aes_encrypt_3x 41186464859SArd Biesheuvel veor q0, q0, q3 41286464859SArd Biesheuvel veor q1, q1, q4 41386464859SArd Biesheuvel veor q2, q2, q5 4141465fb13SArd Biesheuvel vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks 4151465fb13SArd Biesheuvel vst1.8 {q2}, [r0]! 41686464859SArd Biesheuvel vmov q3, q5 41786464859SArd Biesheuvel teq r4, #0 41886464859SArd Biesheuvel beq .Lxtsencout 41986464859SArd Biesheuvel b .Lxtsencloop3x 42086464859SArd Biesheuvel.Lxtsenc1x: 42186464859SArd Biesheuvel adds r4, r4, #3 42286464859SArd Biesheuvel beq .Lxtsencout 42386464859SArd Biesheuvel.Lxtsencloop: 4241465fb13SArd Biesheuvel vld1.8 {q0}, [r1]! 42586464859SArd Biesheuvel veor q0, q0, q3 42686464859SArd Biesheuvel bl aes_encrypt 42786464859SArd Biesheuvel veor q0, q0, q3 4281465fb13SArd Biesheuvel vst1.8 {q0}, [r0]! 42986464859SArd Biesheuvel subs r4, r4, #1 43086464859SArd Biesheuvel beq .Lxtsencout 43186464859SArd Biesheuvel next_tweak q3, q3, q7, q6 43286464859SArd Biesheuvel b .Lxtsencloop 43386464859SArd Biesheuvel.Lxtsencout: 43486464859SArd Biesheuvel vst1.8 {q3}, [r5] 43586464859SArd Biesheuvel pop {r4-r6, pc} 43686464859SArd BiesheuvelENDPROC(ce_aes_xts_encrypt) 43786464859SArd Biesheuvel 43886464859SArd Biesheuvel 43986464859SArd BiesheuvelENTRY(ce_aes_xts_decrypt) 44086464859SArd Biesheuvel push {r4-r6, lr} 44186464859SArd Biesheuvel 44286464859SArd Biesheuvel bl ce_aes_xts_init @ run shared prologue 44386464859SArd Biesheuvel prepare_key r2, r3 44486464859SArd Biesheuvel vmov q3, q0 44586464859SArd Biesheuvel 44686464859SArd Biesheuvel teq r6, #0 @ start of a block? 44786464859SArd Biesheuvel bne .Lxtsdec3x 44886464859SArd Biesheuvel 44986464859SArd Biesheuvel.Lxtsdecloop3x: 45086464859SArd Biesheuvel next_tweak q3, q3, q7, q6 45186464859SArd Biesheuvel.Lxtsdec3x: 45286464859SArd Biesheuvel subs r4, r4, #3 45386464859SArd Biesheuvel bmi .Lxtsdec1x 4541465fb13SArd Biesheuvel vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks 4551465fb13SArd Biesheuvel vld1.8 {q2}, [r1]! 45686464859SArd Biesheuvel next_tweak q4, q3, q7, q6 45786464859SArd Biesheuvel veor q0, q0, q3 45886464859SArd Biesheuvel next_tweak q5, q4, q7, q6 45986464859SArd Biesheuvel veor q1, q1, q4 46086464859SArd Biesheuvel veor q2, q2, q5 46186464859SArd Biesheuvel bl aes_decrypt_3x 46286464859SArd Biesheuvel veor q0, q0, q3 46386464859SArd Biesheuvel veor q1, q1, q4 46486464859SArd Biesheuvel veor q2, q2, q5 4651465fb13SArd Biesheuvel vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks 4661465fb13SArd Biesheuvel vst1.8 {q2}, [r0]! 46786464859SArd Biesheuvel vmov q3, q5 46886464859SArd Biesheuvel teq r4, #0 46986464859SArd Biesheuvel beq .Lxtsdecout 47086464859SArd Biesheuvel b .Lxtsdecloop3x 47186464859SArd Biesheuvel.Lxtsdec1x: 47286464859SArd Biesheuvel adds r4, r4, #3 47386464859SArd Biesheuvel beq .Lxtsdecout 47486464859SArd Biesheuvel.Lxtsdecloop: 4751465fb13SArd Biesheuvel vld1.8 {q0}, [r1]! 47686464859SArd Biesheuvel veor q0, q0, q3 47786464859SArd Biesheuvel add ip, r2, #32 @ 3rd round key 47886464859SArd Biesheuvel bl aes_decrypt 47986464859SArd Biesheuvel veor q0, q0, q3 4801465fb13SArd Biesheuvel vst1.8 {q0}, [r0]! 48186464859SArd Biesheuvel subs r4, r4, #1 48286464859SArd Biesheuvel beq .Lxtsdecout 48386464859SArd Biesheuvel next_tweak q3, q3, q7, q6 48486464859SArd Biesheuvel b .Lxtsdecloop 48586464859SArd Biesheuvel.Lxtsdecout: 48686464859SArd Biesheuvel vst1.8 {q3}, [r5] 48786464859SArd Biesheuvel pop {r4-r6, pc} 48886464859SArd BiesheuvelENDPROC(ce_aes_xts_decrypt) 48986464859SArd Biesheuvel 49086464859SArd Biesheuvel /* 49186464859SArd Biesheuvel * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the 49286464859SArd Biesheuvel * AES sbox substitution on each byte in 49386464859SArd Biesheuvel * 'input' 49486464859SArd Biesheuvel */ 49586464859SArd BiesheuvelENTRY(ce_aes_sub) 49686464859SArd Biesheuvel vdup.32 q1, r0 49786464859SArd Biesheuvel veor q0, q0, q0 49886464859SArd Biesheuvel aese.8 q0, q1 49986464859SArd Biesheuvel vmov r0, s0 50086464859SArd Biesheuvel bx lr 50186464859SArd BiesheuvelENDPROC(ce_aes_sub) 50286464859SArd Biesheuvel 50386464859SArd Biesheuvel /* 50486464859SArd Biesheuvel * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns 50586464859SArd Biesheuvel * operation on round key *src 50686464859SArd Biesheuvel */ 50786464859SArd BiesheuvelENTRY(ce_aes_invert) 50886464859SArd Biesheuvel vld1.8 {q0}, [r1] 50986464859SArd Biesheuvel aesimc.8 q0, q0 51086464859SArd Biesheuvel vst1.8 {q0}, [r0] 51186464859SArd Biesheuvel bx lr 51286464859SArd BiesheuvelENDPROC(ce_aes_invert) 513