186464859SArd Biesheuvel/* 286464859SArd Biesheuvel * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions 386464859SArd Biesheuvel * 486464859SArd Biesheuvel * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> 586464859SArd Biesheuvel * 686464859SArd Biesheuvel * This program is free software; you can redistribute it and/or modify 786464859SArd Biesheuvel * it under the terms of the GNU General Public License version 2 as 886464859SArd Biesheuvel * published by the Free Software Foundation. 986464859SArd Biesheuvel */ 1086464859SArd Biesheuvel 1186464859SArd Biesheuvel#include <linux/linkage.h> 1286464859SArd Biesheuvel#include <asm/assembler.h> 1386464859SArd Biesheuvel 1486464859SArd Biesheuvel .text 1586464859SArd Biesheuvel .fpu crypto-neon-fp-armv8 1686464859SArd Biesheuvel .align 3 1786464859SArd Biesheuvel 1886464859SArd Biesheuvel .macro enc_round, state, key 1986464859SArd Biesheuvel aese.8 \state, \key 2086464859SArd Biesheuvel aesmc.8 \state, \state 2186464859SArd Biesheuvel .endm 2286464859SArd Biesheuvel 2386464859SArd Biesheuvel .macro dec_round, state, key 2486464859SArd Biesheuvel aesd.8 \state, \key 2586464859SArd Biesheuvel aesimc.8 \state, \state 2686464859SArd Biesheuvel .endm 2786464859SArd Biesheuvel 2886464859SArd Biesheuvel .macro enc_dround, key1, key2 2986464859SArd Biesheuvel enc_round q0, \key1 3086464859SArd Biesheuvel enc_round q0, \key2 3186464859SArd Biesheuvel .endm 3286464859SArd Biesheuvel 3386464859SArd Biesheuvel .macro dec_dround, key1, key2 3486464859SArd Biesheuvel dec_round q0, \key1 3586464859SArd Biesheuvel dec_round q0, \key2 3686464859SArd Biesheuvel .endm 3786464859SArd Biesheuvel 3886464859SArd Biesheuvel .macro enc_fround, key1, key2, key3 3986464859SArd Biesheuvel enc_round q0, \key1 4086464859SArd Biesheuvel aese.8 q0, \key2 4186464859SArd Biesheuvel veor q0, q0, \key3 4286464859SArd Biesheuvel .endm 4386464859SArd Biesheuvel 4486464859SArd Biesheuvel .macro dec_fround, key1, key2, key3 4586464859SArd Biesheuvel dec_round q0, \key1 4686464859SArd Biesheuvel aesd.8 q0, \key2 4786464859SArd Biesheuvel veor q0, q0, \key3 4886464859SArd Biesheuvel .endm 4986464859SArd Biesheuvel 5086464859SArd Biesheuvel .macro enc_dround_3x, key1, key2 5186464859SArd Biesheuvel enc_round q0, \key1 5286464859SArd Biesheuvel enc_round q1, \key1 5386464859SArd Biesheuvel enc_round q2, \key1 5486464859SArd Biesheuvel enc_round q0, \key2 5586464859SArd Biesheuvel enc_round q1, \key2 5686464859SArd Biesheuvel enc_round q2, \key2 5786464859SArd Biesheuvel .endm 5886464859SArd Biesheuvel 5986464859SArd Biesheuvel .macro dec_dround_3x, key1, key2 6086464859SArd Biesheuvel dec_round q0, \key1 6186464859SArd Biesheuvel dec_round q1, \key1 6286464859SArd Biesheuvel dec_round q2, \key1 6386464859SArd Biesheuvel dec_round q0, \key2 6486464859SArd Biesheuvel dec_round q1, \key2 6586464859SArd Biesheuvel dec_round q2, \key2 6686464859SArd Biesheuvel .endm 6786464859SArd Biesheuvel 6886464859SArd Biesheuvel .macro enc_fround_3x, key1, key2, key3 6986464859SArd Biesheuvel enc_round q0, \key1 7086464859SArd Biesheuvel enc_round q1, \key1 7186464859SArd Biesheuvel enc_round q2, \key1 7286464859SArd Biesheuvel aese.8 q0, \key2 7386464859SArd Biesheuvel aese.8 q1, \key2 7486464859SArd Biesheuvel aese.8 q2, \key2 7586464859SArd Biesheuvel veor q0, q0, \key3 7686464859SArd Biesheuvel veor q1, q1, \key3 7786464859SArd Biesheuvel veor q2, q2, \key3 7886464859SArd Biesheuvel .endm 7986464859SArd Biesheuvel 8086464859SArd Biesheuvel .macro dec_fround_3x, key1, key2, key3 8186464859SArd Biesheuvel dec_round q0, \key1 8286464859SArd Biesheuvel dec_round q1, \key1 8386464859SArd Biesheuvel dec_round q2, \key1 8486464859SArd Biesheuvel aesd.8 q0, \key2 8586464859SArd Biesheuvel aesd.8 q1, \key2 8686464859SArd Biesheuvel aesd.8 q2, \key2 8786464859SArd Biesheuvel veor q0, q0, \key3 8886464859SArd Biesheuvel veor q1, q1, \key3 8986464859SArd Biesheuvel veor q2, q2, \key3 9086464859SArd Biesheuvel .endm 9186464859SArd Biesheuvel 9286464859SArd Biesheuvel .macro do_block, dround, fround 9386464859SArd Biesheuvel cmp r3, #12 @ which key size? 9486464859SArd Biesheuvel vld1.8 {q10-q11}, [ip]! 9586464859SArd Biesheuvel \dround q8, q9 9686464859SArd Biesheuvel vld1.8 {q12-q13}, [ip]! 9786464859SArd Biesheuvel \dround q10, q11 9886464859SArd Biesheuvel vld1.8 {q10-q11}, [ip]! 9986464859SArd Biesheuvel \dround q12, q13 10086464859SArd Biesheuvel vld1.8 {q12-q13}, [ip]! 10186464859SArd Biesheuvel \dround q10, q11 10286464859SArd Biesheuvel blo 0f @ AES-128: 10 rounds 10386464859SArd Biesheuvel vld1.8 {q10-q11}, [ip]! 10486464859SArd Biesheuvel \dround q12, q13 1056499e8cfSArd Biesheuvel beq 1f @ AES-192: 12 rounds 10686464859SArd Biesheuvel vld1.8 {q12-q13}, [ip] 10786464859SArd Biesheuvel \dround q10, q11 10886464859SArd Biesheuvel0: \fround q12, q13, q14 10986464859SArd Biesheuvel bx lr 11086464859SArd Biesheuvel 1116499e8cfSArd Biesheuvel1: \fround q10, q11, q14 11286464859SArd Biesheuvel bx lr 11386464859SArd Biesheuvel .endm 11486464859SArd Biesheuvel 11586464859SArd Biesheuvel /* 11686464859SArd Biesheuvel * Internal, non-AAPCS compliant functions that implement the core AES 11786464859SArd Biesheuvel * transforms. These should preserve all registers except q0 - q2 and ip 11886464859SArd Biesheuvel * Arguments: 11986464859SArd Biesheuvel * q0 : first in/output block 12086464859SArd Biesheuvel * q1 : second in/output block (_3x version only) 12186464859SArd Biesheuvel * q2 : third in/output block (_3x version only) 12286464859SArd Biesheuvel * q8 : first round key 12386464859SArd Biesheuvel * q9 : secound round key 12486464859SArd Biesheuvel * q14 : final round key 1256499e8cfSArd Biesheuvel * r2 : address of round key array 12686464859SArd Biesheuvel * r3 : number of rounds 12786464859SArd Biesheuvel */ 12886464859SArd Biesheuvel .align 6 12986464859SArd Biesheuvelaes_encrypt: 13086464859SArd Biesheuvel add ip, r2, #32 @ 3rd round key 13186464859SArd Biesheuvel.Laes_encrypt_tweak: 13286464859SArd Biesheuvel do_block enc_dround, enc_fround 13386464859SArd BiesheuvelENDPROC(aes_encrypt) 13486464859SArd Biesheuvel 13586464859SArd Biesheuvel .align 6 13686464859SArd Biesheuvelaes_decrypt: 13786464859SArd Biesheuvel add ip, r2, #32 @ 3rd round key 13886464859SArd Biesheuvel do_block dec_dround, dec_fround 13986464859SArd BiesheuvelENDPROC(aes_decrypt) 14086464859SArd Biesheuvel 14186464859SArd Biesheuvel .align 6 14286464859SArd Biesheuvelaes_encrypt_3x: 14386464859SArd Biesheuvel add ip, r2, #32 @ 3rd round key 14486464859SArd Biesheuvel do_block enc_dround_3x, enc_fround_3x 14586464859SArd BiesheuvelENDPROC(aes_encrypt_3x) 14686464859SArd Biesheuvel 14786464859SArd Biesheuvel .align 6 14886464859SArd Biesheuvelaes_decrypt_3x: 14986464859SArd Biesheuvel add ip, r2, #32 @ 3rd round key 15086464859SArd Biesheuvel do_block dec_dround_3x, dec_fround_3x 15186464859SArd BiesheuvelENDPROC(aes_decrypt_3x) 15286464859SArd Biesheuvel 15386464859SArd Biesheuvel .macro prepare_key, rk, rounds 15486464859SArd Biesheuvel add ip, \rk, \rounds, lsl #4 15586464859SArd Biesheuvel vld1.8 {q8-q9}, [\rk] @ load first 2 round keys 15686464859SArd Biesheuvel vld1.8 {q14}, [ip] @ load last round key 15786464859SArd Biesheuvel .endm 15886464859SArd Biesheuvel 15986464859SArd Biesheuvel /* 16086464859SArd Biesheuvel * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 16186464859SArd Biesheuvel * int blocks) 16286464859SArd Biesheuvel * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 16386464859SArd Biesheuvel * int blocks) 16486464859SArd Biesheuvel */ 16586464859SArd BiesheuvelENTRY(ce_aes_ecb_encrypt) 16686464859SArd Biesheuvel push {r4, lr} 16786464859SArd Biesheuvel ldr r4, [sp, #8] 16886464859SArd Biesheuvel prepare_key r2, r3 16986464859SArd Biesheuvel.Lecbencloop3x: 17086464859SArd Biesheuvel subs r4, r4, #3 17186464859SArd Biesheuvel bmi .Lecbenc1x 17286464859SArd Biesheuvel vld1.8 {q0-q1}, [r1, :64]! 17386464859SArd Biesheuvel vld1.8 {q2}, [r1, :64]! 17486464859SArd Biesheuvel bl aes_encrypt_3x 17586464859SArd Biesheuvel vst1.8 {q0-q1}, [r0, :64]! 17686464859SArd Biesheuvel vst1.8 {q2}, [r0, :64]! 17786464859SArd Biesheuvel b .Lecbencloop3x 17886464859SArd Biesheuvel.Lecbenc1x: 17986464859SArd Biesheuvel adds r4, r4, #3 18086464859SArd Biesheuvel beq .Lecbencout 18186464859SArd Biesheuvel.Lecbencloop: 18286464859SArd Biesheuvel vld1.8 {q0}, [r1, :64]! 18386464859SArd Biesheuvel bl aes_encrypt 18486464859SArd Biesheuvel vst1.8 {q0}, [r0, :64]! 18586464859SArd Biesheuvel subs r4, r4, #1 18686464859SArd Biesheuvel bne .Lecbencloop 18786464859SArd Biesheuvel.Lecbencout: 18886464859SArd Biesheuvel pop {r4, pc} 18986464859SArd BiesheuvelENDPROC(ce_aes_ecb_encrypt) 19086464859SArd Biesheuvel 19186464859SArd BiesheuvelENTRY(ce_aes_ecb_decrypt) 19286464859SArd Biesheuvel push {r4, lr} 19386464859SArd Biesheuvel ldr r4, [sp, #8] 19486464859SArd Biesheuvel prepare_key r2, r3 19586464859SArd Biesheuvel.Lecbdecloop3x: 19686464859SArd Biesheuvel subs r4, r4, #3 19786464859SArd Biesheuvel bmi .Lecbdec1x 19886464859SArd Biesheuvel vld1.8 {q0-q1}, [r1, :64]! 19986464859SArd Biesheuvel vld1.8 {q2}, [r1, :64]! 20086464859SArd Biesheuvel bl aes_decrypt_3x 20186464859SArd Biesheuvel vst1.8 {q0-q1}, [r0, :64]! 20286464859SArd Biesheuvel vst1.8 {q2}, [r0, :64]! 20386464859SArd Biesheuvel b .Lecbdecloop3x 20486464859SArd Biesheuvel.Lecbdec1x: 20586464859SArd Biesheuvel adds r4, r4, #3 20686464859SArd Biesheuvel beq .Lecbdecout 20786464859SArd Biesheuvel.Lecbdecloop: 20886464859SArd Biesheuvel vld1.8 {q0}, [r1, :64]! 20986464859SArd Biesheuvel bl aes_decrypt 21086464859SArd Biesheuvel vst1.8 {q0}, [r0, :64]! 21186464859SArd Biesheuvel subs r4, r4, #1 21286464859SArd Biesheuvel bne .Lecbdecloop 21386464859SArd Biesheuvel.Lecbdecout: 21486464859SArd Biesheuvel pop {r4, pc} 21586464859SArd BiesheuvelENDPROC(ce_aes_ecb_decrypt) 21686464859SArd Biesheuvel 21786464859SArd Biesheuvel /* 21886464859SArd Biesheuvel * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 21986464859SArd Biesheuvel * int blocks, u8 iv[]) 22086464859SArd Biesheuvel * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 22186464859SArd Biesheuvel * int blocks, u8 iv[]) 22286464859SArd Biesheuvel */ 22386464859SArd BiesheuvelENTRY(ce_aes_cbc_encrypt) 22486464859SArd Biesheuvel push {r4-r6, lr} 22586464859SArd Biesheuvel ldrd r4, r5, [sp, #16] 22686464859SArd Biesheuvel vld1.8 {q0}, [r5] 22786464859SArd Biesheuvel prepare_key r2, r3 22886464859SArd Biesheuvel.Lcbcencloop: 22986464859SArd Biesheuvel vld1.8 {q1}, [r1, :64]! @ get next pt block 23086464859SArd Biesheuvel veor q0, q0, q1 @ ..and xor with iv 23186464859SArd Biesheuvel bl aes_encrypt 23286464859SArd Biesheuvel vst1.8 {q0}, [r0, :64]! 23386464859SArd Biesheuvel subs r4, r4, #1 23486464859SArd Biesheuvel bne .Lcbcencloop 23586464859SArd Biesheuvel vst1.8 {q0}, [r5] 23686464859SArd Biesheuvel pop {r4-r6, pc} 23786464859SArd BiesheuvelENDPROC(ce_aes_cbc_encrypt) 23886464859SArd Biesheuvel 23986464859SArd BiesheuvelENTRY(ce_aes_cbc_decrypt) 24086464859SArd Biesheuvel push {r4-r6, lr} 24186464859SArd Biesheuvel ldrd r4, r5, [sp, #16] 24286464859SArd Biesheuvel vld1.8 {q6}, [r5] @ keep iv in q6 24386464859SArd Biesheuvel prepare_key r2, r3 24486464859SArd Biesheuvel.Lcbcdecloop3x: 24586464859SArd Biesheuvel subs r4, r4, #3 24686464859SArd Biesheuvel bmi .Lcbcdec1x 24786464859SArd Biesheuvel vld1.8 {q0-q1}, [r1, :64]! 24886464859SArd Biesheuvel vld1.8 {q2}, [r1, :64]! 24986464859SArd Biesheuvel vmov q3, q0 25086464859SArd Biesheuvel vmov q4, q1 25186464859SArd Biesheuvel vmov q5, q2 25286464859SArd Biesheuvel bl aes_decrypt_3x 25386464859SArd Biesheuvel veor q0, q0, q6 25486464859SArd Biesheuvel veor q1, q1, q3 25586464859SArd Biesheuvel veor q2, q2, q4 25686464859SArd Biesheuvel vmov q6, q5 25786464859SArd Biesheuvel vst1.8 {q0-q1}, [r0, :64]! 25886464859SArd Biesheuvel vst1.8 {q2}, [r0, :64]! 25986464859SArd Biesheuvel b .Lcbcdecloop3x 26086464859SArd Biesheuvel.Lcbcdec1x: 26186464859SArd Biesheuvel adds r4, r4, #3 26286464859SArd Biesheuvel beq .Lcbcdecout 26386464859SArd Biesheuvel vmov q15, q14 @ preserve last round key 26486464859SArd Biesheuvel.Lcbcdecloop: 26586464859SArd Biesheuvel vld1.8 {q0}, [r1, :64]! @ get next ct block 26686464859SArd Biesheuvel veor q14, q15, q6 @ combine prev ct with last key 26786464859SArd Biesheuvel vmov q6, q0 26886464859SArd Biesheuvel bl aes_decrypt 26986464859SArd Biesheuvel vst1.8 {q0}, [r0, :64]! 27086464859SArd Biesheuvel subs r4, r4, #1 27186464859SArd Biesheuvel bne .Lcbcdecloop 27286464859SArd Biesheuvel.Lcbcdecout: 27386464859SArd Biesheuvel vst1.8 {q6}, [r5] @ keep iv in q6 27486464859SArd Biesheuvel pop {r4-r6, pc} 27586464859SArd BiesheuvelENDPROC(ce_aes_cbc_decrypt) 27686464859SArd Biesheuvel 27786464859SArd Biesheuvel /* 27886464859SArd Biesheuvel * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 27986464859SArd Biesheuvel * int blocks, u8 ctr[]) 28086464859SArd Biesheuvel */ 28186464859SArd BiesheuvelENTRY(ce_aes_ctr_encrypt) 28286464859SArd Biesheuvel push {r4-r6, lr} 28386464859SArd Biesheuvel ldrd r4, r5, [sp, #16] 28486464859SArd Biesheuvel vld1.8 {q6}, [r5] @ load ctr 28586464859SArd Biesheuvel prepare_key r2, r3 28686464859SArd Biesheuvel vmov r6, s27 @ keep swabbed ctr in r6 28786464859SArd Biesheuvel rev r6, r6 28886464859SArd Biesheuvel cmn r6, r4 @ 32 bit overflow? 28986464859SArd Biesheuvel bcs .Lctrloop 29086464859SArd Biesheuvel.Lctrloop3x: 29186464859SArd Biesheuvel subs r4, r4, #3 29286464859SArd Biesheuvel bmi .Lctr1x 29386464859SArd Biesheuvel add r6, r6, #1 29486464859SArd Biesheuvel vmov q0, q6 29586464859SArd Biesheuvel vmov q1, q6 29686464859SArd Biesheuvel rev ip, r6 29786464859SArd Biesheuvel add r6, r6, #1 29886464859SArd Biesheuvel vmov q2, q6 29986464859SArd Biesheuvel vmov s7, ip 30086464859SArd Biesheuvel rev ip, r6 30186464859SArd Biesheuvel add r6, r6, #1 30286464859SArd Biesheuvel vmov s11, ip 30386464859SArd Biesheuvel vld1.8 {q3-q4}, [r1, :64]! 30486464859SArd Biesheuvel vld1.8 {q5}, [r1, :64]! 30586464859SArd Biesheuvel bl aes_encrypt_3x 30686464859SArd Biesheuvel veor q0, q0, q3 30786464859SArd Biesheuvel veor q1, q1, q4 30886464859SArd Biesheuvel veor q2, q2, q5 30986464859SArd Biesheuvel rev ip, r6 31086464859SArd Biesheuvel vst1.8 {q0-q1}, [r0, :64]! 31186464859SArd Biesheuvel vst1.8 {q2}, [r0, :64]! 31286464859SArd Biesheuvel vmov s27, ip 31386464859SArd Biesheuvel b .Lctrloop3x 31486464859SArd Biesheuvel.Lctr1x: 31586464859SArd Biesheuvel adds r4, r4, #3 31686464859SArd Biesheuvel beq .Lctrout 31786464859SArd Biesheuvel.Lctrloop: 31886464859SArd Biesheuvel vmov q0, q6 31986464859SArd Biesheuvel bl aes_encrypt 32086464859SArd Biesheuvel subs r4, r4, #1 32186464859SArd Biesheuvel bmi .Lctrhalfblock @ blocks < 0 means 1/2 block 32286464859SArd Biesheuvel vld1.8 {q3}, [r1, :64]! 32386464859SArd Biesheuvel veor q3, q0, q3 32486464859SArd Biesheuvel vst1.8 {q3}, [r0, :64]! 32586464859SArd Biesheuvel 32686464859SArd Biesheuvel adds r6, r6, #1 @ increment BE ctr 32786464859SArd Biesheuvel rev ip, r6 32886464859SArd Biesheuvel vmov s27, ip 32986464859SArd Biesheuvel bcs .Lctrcarry 33086464859SArd Biesheuvel teq r4, #0 33186464859SArd Biesheuvel bne .Lctrloop 33286464859SArd Biesheuvel.Lctrout: 33386464859SArd Biesheuvel vst1.8 {q6}, [r5] 33486464859SArd Biesheuvel pop {r4-r6, pc} 33586464859SArd Biesheuvel 33686464859SArd Biesheuvel.Lctrhalfblock: 33786464859SArd Biesheuvel vld1.8 {d1}, [r1, :64] 33886464859SArd Biesheuvel veor d0, d0, d1 33986464859SArd Biesheuvel vst1.8 {d0}, [r0, :64] 34086464859SArd Biesheuvel pop {r4-r6, pc} 34186464859SArd Biesheuvel 34286464859SArd Biesheuvel.Lctrcarry: 34386464859SArd Biesheuvel .irp sreg, s26, s25, s24 34486464859SArd Biesheuvel vmov ip, \sreg @ load next word of ctr 34586464859SArd Biesheuvel rev ip, ip @ ... to handle the carry 34686464859SArd Biesheuvel adds ip, ip, #1 34786464859SArd Biesheuvel rev ip, ip 34886464859SArd Biesheuvel vmov \sreg, ip 34986464859SArd Biesheuvel bcc 0f 35086464859SArd Biesheuvel .endr 35186464859SArd Biesheuvel0: teq r4, #0 35286464859SArd Biesheuvel beq .Lctrout 35386464859SArd Biesheuvel b .Lctrloop 35486464859SArd BiesheuvelENDPROC(ce_aes_ctr_encrypt) 35586464859SArd Biesheuvel 35686464859SArd Biesheuvel /* 35786464859SArd Biesheuvel * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 35886464859SArd Biesheuvel * int blocks, u8 iv[], u8 const rk2[], int first) 35986464859SArd Biesheuvel * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 36086464859SArd Biesheuvel * int blocks, u8 iv[], u8 const rk2[], int first) 36186464859SArd Biesheuvel */ 36286464859SArd Biesheuvel 36386464859SArd Biesheuvel .macro next_tweak, out, in, const, tmp 36486464859SArd Biesheuvel vshr.s64 \tmp, \in, #63 36586464859SArd Biesheuvel vand \tmp, \tmp, \const 36686464859SArd Biesheuvel vadd.u64 \out, \in, \in 36786464859SArd Biesheuvel vext.8 \tmp, \tmp, \tmp, #8 36886464859SArd Biesheuvel veor \out, \out, \tmp 36986464859SArd Biesheuvel .endm 37086464859SArd Biesheuvel 37186464859SArd Biesheuvel .align 3 37286464859SArd Biesheuvel.Lxts_mul_x: 37386464859SArd Biesheuvel .quad 1, 0x87 37486464859SArd Biesheuvel 37586464859SArd Biesheuvelce_aes_xts_init: 37686464859SArd Biesheuvel vldr d14, .Lxts_mul_x 37786464859SArd Biesheuvel vldr d15, .Lxts_mul_x + 8 37886464859SArd Biesheuvel 37986464859SArd Biesheuvel ldrd r4, r5, [sp, #16] @ load args 38086464859SArd Biesheuvel ldr r6, [sp, #28] 38186464859SArd Biesheuvel vld1.8 {q0}, [r5] @ load iv 38286464859SArd Biesheuvel teq r6, #1 @ start of a block? 38386464859SArd Biesheuvel bxne lr 38486464859SArd Biesheuvel 38586464859SArd Biesheuvel @ Encrypt the IV in q0 with the second AES key. This should only 38686464859SArd Biesheuvel @ be done at the start of a block. 38786464859SArd Biesheuvel ldr r6, [sp, #24] @ load AES key 2 38886464859SArd Biesheuvel prepare_key r6, r3 38986464859SArd Biesheuvel add ip, r6, #32 @ 3rd round key of key 2 39086464859SArd Biesheuvel b .Laes_encrypt_tweak @ tail call 39186464859SArd BiesheuvelENDPROC(ce_aes_xts_init) 39286464859SArd Biesheuvel 39386464859SArd BiesheuvelENTRY(ce_aes_xts_encrypt) 39486464859SArd Biesheuvel push {r4-r6, lr} 39586464859SArd Biesheuvel 39686464859SArd Biesheuvel bl ce_aes_xts_init @ run shared prologue 39786464859SArd Biesheuvel prepare_key r2, r3 39886464859SArd Biesheuvel vmov q3, q0 39986464859SArd Biesheuvel 40086464859SArd Biesheuvel teq r6, #0 @ start of a block? 40186464859SArd Biesheuvel bne .Lxtsenc3x 40286464859SArd Biesheuvel 40386464859SArd Biesheuvel.Lxtsencloop3x: 40486464859SArd Biesheuvel next_tweak q3, q3, q7, q6 40586464859SArd Biesheuvel.Lxtsenc3x: 40686464859SArd Biesheuvel subs r4, r4, #3 40786464859SArd Biesheuvel bmi .Lxtsenc1x 40886464859SArd Biesheuvel vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks 40986464859SArd Biesheuvel vld1.8 {q2}, [r1, :64]! 41086464859SArd Biesheuvel next_tweak q4, q3, q7, q6 41186464859SArd Biesheuvel veor q0, q0, q3 41286464859SArd Biesheuvel next_tweak q5, q4, q7, q6 41386464859SArd Biesheuvel veor q1, q1, q4 41486464859SArd Biesheuvel veor q2, q2, q5 41586464859SArd Biesheuvel bl aes_encrypt_3x 41686464859SArd Biesheuvel veor q0, q0, q3 41786464859SArd Biesheuvel veor q1, q1, q4 41886464859SArd Biesheuvel veor q2, q2, q5 41986464859SArd Biesheuvel vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks 42086464859SArd Biesheuvel vst1.8 {q2}, [r0, :64]! 42186464859SArd Biesheuvel vmov q3, q5 42286464859SArd Biesheuvel teq r4, #0 42386464859SArd Biesheuvel beq .Lxtsencout 42486464859SArd Biesheuvel b .Lxtsencloop3x 42586464859SArd Biesheuvel.Lxtsenc1x: 42686464859SArd Biesheuvel adds r4, r4, #3 42786464859SArd Biesheuvel beq .Lxtsencout 42886464859SArd Biesheuvel.Lxtsencloop: 42986464859SArd Biesheuvel vld1.8 {q0}, [r1, :64]! 43086464859SArd Biesheuvel veor q0, q0, q3 43186464859SArd Biesheuvel bl aes_encrypt 43286464859SArd Biesheuvel veor q0, q0, q3 43386464859SArd Biesheuvel vst1.8 {q0}, [r0, :64]! 43486464859SArd Biesheuvel subs r4, r4, #1 43586464859SArd Biesheuvel beq .Lxtsencout 43686464859SArd Biesheuvel next_tweak q3, q3, q7, q6 43786464859SArd Biesheuvel b .Lxtsencloop 43886464859SArd Biesheuvel.Lxtsencout: 43986464859SArd Biesheuvel vst1.8 {q3}, [r5] 44086464859SArd Biesheuvel pop {r4-r6, pc} 44186464859SArd BiesheuvelENDPROC(ce_aes_xts_encrypt) 44286464859SArd Biesheuvel 44386464859SArd Biesheuvel 44486464859SArd BiesheuvelENTRY(ce_aes_xts_decrypt) 44586464859SArd Biesheuvel push {r4-r6, lr} 44686464859SArd Biesheuvel 44786464859SArd Biesheuvel bl ce_aes_xts_init @ run shared prologue 44886464859SArd Biesheuvel prepare_key r2, r3 44986464859SArd Biesheuvel vmov q3, q0 45086464859SArd Biesheuvel 45186464859SArd Biesheuvel teq r6, #0 @ start of a block? 45286464859SArd Biesheuvel bne .Lxtsdec3x 45386464859SArd Biesheuvel 45486464859SArd Biesheuvel.Lxtsdecloop3x: 45586464859SArd Biesheuvel next_tweak q3, q3, q7, q6 45686464859SArd Biesheuvel.Lxtsdec3x: 45786464859SArd Biesheuvel subs r4, r4, #3 45886464859SArd Biesheuvel bmi .Lxtsdec1x 45986464859SArd Biesheuvel vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks 46086464859SArd Biesheuvel vld1.8 {q2}, [r1, :64]! 46186464859SArd Biesheuvel next_tweak q4, q3, q7, q6 46286464859SArd Biesheuvel veor q0, q0, q3 46386464859SArd Biesheuvel next_tweak q5, q4, q7, q6 46486464859SArd Biesheuvel veor q1, q1, q4 46586464859SArd Biesheuvel veor q2, q2, q5 46686464859SArd Biesheuvel bl aes_decrypt_3x 46786464859SArd Biesheuvel veor q0, q0, q3 46886464859SArd Biesheuvel veor q1, q1, q4 46986464859SArd Biesheuvel veor q2, q2, q5 47086464859SArd Biesheuvel vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks 47186464859SArd Biesheuvel vst1.8 {q2}, [r0, :64]! 47286464859SArd Biesheuvel vmov q3, q5 47386464859SArd Biesheuvel teq r4, #0 47486464859SArd Biesheuvel beq .Lxtsdecout 47586464859SArd Biesheuvel b .Lxtsdecloop3x 47686464859SArd Biesheuvel.Lxtsdec1x: 47786464859SArd Biesheuvel adds r4, r4, #3 47886464859SArd Biesheuvel beq .Lxtsdecout 47986464859SArd Biesheuvel.Lxtsdecloop: 48086464859SArd Biesheuvel vld1.8 {q0}, [r1, :64]! 48186464859SArd Biesheuvel veor q0, q0, q3 48286464859SArd Biesheuvel add ip, r2, #32 @ 3rd round key 48386464859SArd Biesheuvel bl aes_decrypt 48486464859SArd Biesheuvel veor q0, q0, q3 48586464859SArd Biesheuvel vst1.8 {q0}, [r0, :64]! 48686464859SArd Biesheuvel subs r4, r4, #1 48786464859SArd Biesheuvel beq .Lxtsdecout 48886464859SArd Biesheuvel next_tweak q3, q3, q7, q6 48986464859SArd Biesheuvel b .Lxtsdecloop 49086464859SArd Biesheuvel.Lxtsdecout: 49186464859SArd Biesheuvel vst1.8 {q3}, [r5] 49286464859SArd Biesheuvel pop {r4-r6, pc} 49386464859SArd BiesheuvelENDPROC(ce_aes_xts_decrypt) 49486464859SArd Biesheuvel 49586464859SArd Biesheuvel /* 49686464859SArd Biesheuvel * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the 49786464859SArd Biesheuvel * AES sbox substitution on each byte in 49886464859SArd Biesheuvel * 'input' 49986464859SArd Biesheuvel */ 50086464859SArd BiesheuvelENTRY(ce_aes_sub) 50186464859SArd Biesheuvel vdup.32 q1, r0 50286464859SArd Biesheuvel veor q0, q0, q0 50386464859SArd Biesheuvel aese.8 q0, q1 50486464859SArd Biesheuvel vmov r0, s0 50586464859SArd Biesheuvel bx lr 50686464859SArd BiesheuvelENDPROC(ce_aes_sub) 50786464859SArd Biesheuvel 50886464859SArd Biesheuvel /* 50986464859SArd Biesheuvel * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns 51086464859SArd Biesheuvel * operation on round key *src 51186464859SArd Biesheuvel */ 51286464859SArd BiesheuvelENTRY(ce_aes_invert) 51386464859SArd Biesheuvel vld1.8 {q0}, [r1] 51486464859SArd Biesheuvel aesimc.8 q0, q0 51586464859SArd Biesheuvel vst1.8 {q0}, [r0] 51686464859SArd Biesheuvel bx lr 51786464859SArd BiesheuvelENDPROC(ce_aes_invert) 518