1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 286464859SArd Biesheuvel/* 386464859SArd Biesheuvel * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions 486464859SArd Biesheuvel * 586464859SArd Biesheuvel * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> 686464859SArd Biesheuvel */ 786464859SArd Biesheuvel 886464859SArd Biesheuvel#include <linux/linkage.h> 986464859SArd Biesheuvel#include <asm/assembler.h> 1086464859SArd Biesheuvel 1186464859SArd Biesheuvel .text 123aa6d4abSArd Biesheuvel .arch armv8-a 1386464859SArd Biesheuvel .fpu crypto-neon-fp-armv8 1486464859SArd Biesheuvel .align 3 1586464859SArd Biesheuvel 1686464859SArd Biesheuvel .macro enc_round, state, key 1786464859SArd Biesheuvel aese.8 \state, \key 1886464859SArd Biesheuvel aesmc.8 \state, \state 1986464859SArd Biesheuvel .endm 2086464859SArd Biesheuvel 2186464859SArd Biesheuvel .macro dec_round, state, key 2286464859SArd Biesheuvel aesd.8 \state, \key 2386464859SArd Biesheuvel aesimc.8 \state, \state 2486464859SArd Biesheuvel .endm 2586464859SArd Biesheuvel 2686464859SArd Biesheuvel .macro enc_dround, key1, key2 2786464859SArd Biesheuvel enc_round q0, \key1 2886464859SArd Biesheuvel enc_round q0, \key2 2986464859SArd Biesheuvel .endm 3086464859SArd Biesheuvel 3186464859SArd Biesheuvel .macro dec_dround, key1, key2 3286464859SArd Biesheuvel dec_round q0, \key1 3386464859SArd Biesheuvel dec_round q0, \key2 3486464859SArd Biesheuvel .endm 3586464859SArd Biesheuvel 3686464859SArd Biesheuvel .macro enc_fround, key1, key2, key3 3786464859SArd Biesheuvel enc_round q0, \key1 3886464859SArd Biesheuvel aese.8 q0, \key2 3986464859SArd Biesheuvel veor q0, q0, \key3 4086464859SArd Biesheuvel .endm 4186464859SArd Biesheuvel 4286464859SArd Biesheuvel .macro dec_fround, key1, key2, key3 4386464859SArd Biesheuvel dec_round q0, \key1 4486464859SArd Biesheuvel aesd.8 q0, \key2 4586464859SArd Biesheuvel veor q0, q0, \key3 4686464859SArd Biesheuvel .endm 4786464859SArd Biesheuvel 481dede02bSArd Biesheuvel .macro enc_dround_4x, key1, key2 4986464859SArd Biesheuvel enc_round q0, \key1 5086464859SArd Biesheuvel enc_round q1, \key1 5186464859SArd Biesheuvel enc_round q2, \key1 521dede02bSArd Biesheuvel enc_round q3, \key1 5386464859SArd Biesheuvel enc_round q0, \key2 5486464859SArd Biesheuvel enc_round q1, \key2 5586464859SArd Biesheuvel enc_round q2, \key2 561dede02bSArd Biesheuvel enc_round q3, \key2 5786464859SArd Biesheuvel .endm 5886464859SArd Biesheuvel 591dede02bSArd Biesheuvel .macro dec_dround_4x, key1, key2 6086464859SArd Biesheuvel dec_round q0, \key1 6186464859SArd Biesheuvel dec_round q1, \key1 6286464859SArd Biesheuvel dec_round q2, \key1 631dede02bSArd Biesheuvel dec_round q3, \key1 6486464859SArd Biesheuvel dec_round q0, \key2 6586464859SArd Biesheuvel dec_round q1, \key2 6686464859SArd Biesheuvel dec_round q2, \key2 671dede02bSArd Biesheuvel dec_round q3, \key2 6886464859SArd Biesheuvel .endm 6986464859SArd Biesheuvel 701dede02bSArd Biesheuvel .macro enc_fround_4x, key1, key2, key3 7186464859SArd Biesheuvel enc_round q0, \key1 7286464859SArd Biesheuvel enc_round q1, \key1 7386464859SArd Biesheuvel enc_round q2, \key1 741dede02bSArd Biesheuvel enc_round q3, \key1 7586464859SArd Biesheuvel aese.8 q0, \key2 7686464859SArd Biesheuvel aese.8 q1, \key2 7786464859SArd Biesheuvel aese.8 q2, \key2 781dede02bSArd Biesheuvel aese.8 q3, \key2 7986464859SArd Biesheuvel veor q0, q0, \key3 8086464859SArd Biesheuvel veor q1, q1, \key3 8186464859SArd Biesheuvel veor q2, q2, \key3 821dede02bSArd Biesheuvel veor q3, q3, \key3 8386464859SArd Biesheuvel .endm 8486464859SArd Biesheuvel 851dede02bSArd Biesheuvel .macro dec_fround_4x, key1, key2, key3 8686464859SArd Biesheuvel dec_round q0, \key1 8786464859SArd Biesheuvel dec_round q1, \key1 8886464859SArd Biesheuvel dec_round q2, \key1 891dede02bSArd Biesheuvel dec_round q3, \key1 9086464859SArd Biesheuvel aesd.8 q0, \key2 9186464859SArd Biesheuvel aesd.8 q1, \key2 9286464859SArd Biesheuvel aesd.8 q2, \key2 931dede02bSArd Biesheuvel aesd.8 q3, \key2 9486464859SArd Biesheuvel veor q0, q0, \key3 9586464859SArd Biesheuvel veor q1, q1, \key3 9686464859SArd Biesheuvel veor q2, q2, \key3 971dede02bSArd Biesheuvel veor q3, q3, \key3 9886464859SArd Biesheuvel .endm 9986464859SArd Biesheuvel 10086464859SArd Biesheuvel .macro do_block, dround, fround 10186464859SArd Biesheuvel cmp r3, #12 @ which key size? 102fafb1dcaSArd Biesheuvel vld1.32 {q10-q11}, [ip]! 10386464859SArd Biesheuvel \dround q8, q9 104fafb1dcaSArd Biesheuvel vld1.32 {q12-q13}, [ip]! 10586464859SArd Biesheuvel \dround q10, q11 106fafb1dcaSArd Biesheuvel vld1.32 {q10-q11}, [ip]! 10786464859SArd Biesheuvel \dround q12, q13 108fafb1dcaSArd Biesheuvel vld1.32 {q12-q13}, [ip]! 10986464859SArd Biesheuvel \dround q10, q11 11086464859SArd Biesheuvel blo 0f @ AES-128: 10 rounds 111fafb1dcaSArd Biesheuvel vld1.32 {q10-q11}, [ip]! 11286464859SArd Biesheuvel \dround q12, q13 1136499e8cfSArd Biesheuvel beq 1f @ AES-192: 12 rounds 114fafb1dcaSArd Biesheuvel vld1.32 {q12-q13}, [ip] 11586464859SArd Biesheuvel \dround q10, q11 11686464859SArd Biesheuvel0: \fround q12, q13, q14 11786464859SArd Biesheuvel bx lr 11886464859SArd Biesheuvel 1196499e8cfSArd Biesheuvel1: \fround q10, q11, q14 12086464859SArd Biesheuvel bx lr 12186464859SArd Biesheuvel .endm 12286464859SArd Biesheuvel 12386464859SArd Biesheuvel /* 12486464859SArd Biesheuvel * Internal, non-AAPCS compliant functions that implement the core AES 12586464859SArd Biesheuvel * transforms. These should preserve all registers except q0 - q2 and ip 12686464859SArd Biesheuvel * Arguments: 12786464859SArd Biesheuvel * q0 : first in/output block 1281dede02bSArd Biesheuvel * q1 : second in/output block (_4x version only) 1291dede02bSArd Biesheuvel * q2 : third in/output block (_4x version only) 1301dede02bSArd Biesheuvel * q3 : fourth in/output block (_4x version only) 13186464859SArd Biesheuvel * q8 : first round key 13286464859SArd Biesheuvel * q9 : secound round key 13386464859SArd Biesheuvel * q14 : final round key 1346499e8cfSArd Biesheuvel * r2 : address of round key array 13586464859SArd Biesheuvel * r3 : number of rounds 13686464859SArd Biesheuvel */ 13786464859SArd Biesheuvel .align 6 13886464859SArd Biesheuvelaes_encrypt: 13986464859SArd Biesheuvel add ip, r2, #32 @ 3rd round key 14086464859SArd Biesheuvel.Laes_encrypt_tweak: 14186464859SArd Biesheuvel do_block enc_dround, enc_fround 14286464859SArd BiesheuvelENDPROC(aes_encrypt) 14386464859SArd Biesheuvel 14486464859SArd Biesheuvel .align 6 14586464859SArd Biesheuvelaes_decrypt: 14686464859SArd Biesheuvel add ip, r2, #32 @ 3rd round key 14786464859SArd Biesheuvel do_block dec_dround, dec_fround 14886464859SArd BiesheuvelENDPROC(aes_decrypt) 14986464859SArd Biesheuvel 15086464859SArd Biesheuvel .align 6 1511dede02bSArd Biesheuvelaes_encrypt_4x: 15286464859SArd Biesheuvel add ip, r2, #32 @ 3rd round key 1531dede02bSArd Biesheuvel do_block enc_dround_4x, enc_fround_4x 1541dede02bSArd BiesheuvelENDPROC(aes_encrypt_4x) 15586464859SArd Biesheuvel 15686464859SArd Biesheuvel .align 6 1571dede02bSArd Biesheuvelaes_decrypt_4x: 15886464859SArd Biesheuvel add ip, r2, #32 @ 3rd round key 1591dede02bSArd Biesheuvel do_block dec_dround_4x, dec_fround_4x 1601dede02bSArd BiesheuvelENDPROC(aes_decrypt_4x) 16186464859SArd Biesheuvel 16286464859SArd Biesheuvel .macro prepare_key, rk, rounds 16386464859SArd Biesheuvel add ip, \rk, \rounds, lsl #4 164fafb1dcaSArd Biesheuvel vld1.32 {q8-q9}, [\rk] @ load first 2 round keys 165fafb1dcaSArd Biesheuvel vld1.32 {q14}, [ip] @ load last round key 16686464859SArd Biesheuvel .endm 16786464859SArd Biesheuvel 16886464859SArd Biesheuvel /* 169fcb0e30dSArd Biesheuvel * aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, 17086464859SArd Biesheuvel * int blocks) 171fcb0e30dSArd Biesheuvel * aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, 17286464859SArd Biesheuvel * int blocks) 17386464859SArd Biesheuvel */ 17486464859SArd BiesheuvelENTRY(ce_aes_ecb_encrypt) 17586464859SArd Biesheuvel push {r4, lr} 17686464859SArd Biesheuvel ldr r4, [sp, #8] 17786464859SArd Biesheuvel prepare_key r2, r3 1781dede02bSArd Biesheuvel.Lecbencloop4x: 1791dede02bSArd Biesheuvel subs r4, r4, #4 18086464859SArd Biesheuvel bmi .Lecbenc1x 1811465fb13SArd Biesheuvel vld1.8 {q0-q1}, [r1]! 1821dede02bSArd Biesheuvel vld1.8 {q2-q3}, [r1]! 1831dede02bSArd Biesheuvel bl aes_encrypt_4x 1841465fb13SArd Biesheuvel vst1.8 {q0-q1}, [r0]! 1851dede02bSArd Biesheuvel vst1.8 {q2-q3}, [r0]! 1861dede02bSArd Biesheuvel b .Lecbencloop4x 18786464859SArd Biesheuvel.Lecbenc1x: 1881dede02bSArd Biesheuvel adds r4, r4, #4 18986464859SArd Biesheuvel beq .Lecbencout 19086464859SArd Biesheuvel.Lecbencloop: 1911465fb13SArd Biesheuvel vld1.8 {q0}, [r1]! 19286464859SArd Biesheuvel bl aes_encrypt 1931465fb13SArd Biesheuvel vst1.8 {q0}, [r0]! 19486464859SArd Biesheuvel subs r4, r4, #1 19586464859SArd Biesheuvel bne .Lecbencloop 19686464859SArd Biesheuvel.Lecbencout: 19786464859SArd Biesheuvel pop {r4, pc} 19886464859SArd BiesheuvelENDPROC(ce_aes_ecb_encrypt) 19986464859SArd Biesheuvel 20086464859SArd BiesheuvelENTRY(ce_aes_ecb_decrypt) 20186464859SArd Biesheuvel push {r4, lr} 20286464859SArd Biesheuvel ldr r4, [sp, #8] 20386464859SArd Biesheuvel prepare_key r2, r3 2041dede02bSArd Biesheuvel.Lecbdecloop4x: 2051dede02bSArd Biesheuvel subs r4, r4, #4 20686464859SArd Biesheuvel bmi .Lecbdec1x 2071465fb13SArd Biesheuvel vld1.8 {q0-q1}, [r1]! 2081dede02bSArd Biesheuvel vld1.8 {q2-q3}, [r1]! 2091dede02bSArd Biesheuvel bl aes_decrypt_4x 2101465fb13SArd Biesheuvel vst1.8 {q0-q1}, [r0]! 2111dede02bSArd Biesheuvel vst1.8 {q2-q3}, [r0]! 2121dede02bSArd Biesheuvel b .Lecbdecloop4x 21386464859SArd Biesheuvel.Lecbdec1x: 2141dede02bSArd Biesheuvel adds r4, r4, #4 21586464859SArd Biesheuvel beq .Lecbdecout 21686464859SArd Biesheuvel.Lecbdecloop: 2171465fb13SArd Biesheuvel vld1.8 {q0}, [r1]! 21886464859SArd Biesheuvel bl aes_decrypt 2191465fb13SArd Biesheuvel vst1.8 {q0}, [r0]! 22086464859SArd Biesheuvel subs r4, r4, #1 22186464859SArd Biesheuvel bne .Lecbdecloop 22286464859SArd Biesheuvel.Lecbdecout: 22386464859SArd Biesheuvel pop {r4, pc} 22486464859SArd BiesheuvelENDPROC(ce_aes_ecb_decrypt) 22586464859SArd Biesheuvel 22686464859SArd Biesheuvel /* 227fcb0e30dSArd Biesheuvel * aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, 22886464859SArd Biesheuvel * int blocks, u8 iv[]) 229fcb0e30dSArd Biesheuvel * aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, 23086464859SArd Biesheuvel * int blocks, u8 iv[]) 23186464859SArd Biesheuvel */ 23286464859SArd BiesheuvelENTRY(ce_aes_cbc_encrypt) 23386464859SArd Biesheuvel push {r4-r6, lr} 23486464859SArd Biesheuvel ldrd r4, r5, [sp, #16] 23586464859SArd Biesheuvel vld1.8 {q0}, [r5] 23686464859SArd Biesheuvel prepare_key r2, r3 23786464859SArd Biesheuvel.Lcbcencloop: 2381465fb13SArd Biesheuvel vld1.8 {q1}, [r1]! @ get next pt block 23986464859SArd Biesheuvel veor q0, q0, q1 @ ..and xor with iv 24086464859SArd Biesheuvel bl aes_encrypt 2411465fb13SArd Biesheuvel vst1.8 {q0}, [r0]! 24286464859SArd Biesheuvel subs r4, r4, #1 24386464859SArd Biesheuvel bne .Lcbcencloop 24486464859SArd Biesheuvel vst1.8 {q0}, [r5] 24586464859SArd Biesheuvel pop {r4-r6, pc} 24686464859SArd BiesheuvelENDPROC(ce_aes_cbc_encrypt) 24786464859SArd Biesheuvel 24886464859SArd BiesheuvelENTRY(ce_aes_cbc_decrypt) 24986464859SArd Biesheuvel push {r4-r6, lr} 25086464859SArd Biesheuvel ldrd r4, r5, [sp, #16] 2511dede02bSArd Biesheuvel vld1.8 {q15}, [r5] @ keep iv in q15 25286464859SArd Biesheuvel prepare_key r2, r3 2531dede02bSArd Biesheuvel.Lcbcdecloop4x: 2541dede02bSArd Biesheuvel subs r4, r4, #4 25586464859SArd Biesheuvel bmi .Lcbcdec1x 2561465fb13SArd Biesheuvel vld1.8 {q0-q1}, [r1]! 2571dede02bSArd Biesheuvel vld1.8 {q2-q3}, [r1]! 2581dede02bSArd Biesheuvel vmov q4, q0 2591dede02bSArd Biesheuvel vmov q5, q1 2601dede02bSArd Biesheuvel vmov q6, q2 2611dede02bSArd Biesheuvel vmov q7, q3 2621dede02bSArd Biesheuvel bl aes_decrypt_4x 2631dede02bSArd Biesheuvel veor q0, q0, q15 2641dede02bSArd Biesheuvel veor q1, q1, q4 2651dede02bSArd Biesheuvel veor q2, q2, q5 2661dede02bSArd Biesheuvel veor q3, q3, q6 2671dede02bSArd Biesheuvel vmov q15, q7 2681465fb13SArd Biesheuvel vst1.8 {q0-q1}, [r0]! 2691dede02bSArd Biesheuvel vst1.8 {q2-q3}, [r0]! 2701dede02bSArd Biesheuvel b .Lcbcdecloop4x 27186464859SArd Biesheuvel.Lcbcdec1x: 2721dede02bSArd Biesheuvel adds r4, r4, #4 27386464859SArd Biesheuvel beq .Lcbcdecout 2741dede02bSArd Biesheuvel vmov q6, q14 @ preserve last round key 27586464859SArd Biesheuvel.Lcbcdecloop: 2761465fb13SArd Biesheuvel vld1.8 {q0}, [r1]! @ get next ct block 27786464859SArd Biesheuvel veor q14, q15, q6 @ combine prev ct with last key 2781dede02bSArd Biesheuvel vmov q15, q0 27986464859SArd Biesheuvel bl aes_decrypt 2801465fb13SArd Biesheuvel vst1.8 {q0}, [r0]! 28186464859SArd Biesheuvel subs r4, r4, #1 28286464859SArd Biesheuvel bne .Lcbcdecloop 28386464859SArd Biesheuvel.Lcbcdecout: 2841dede02bSArd Biesheuvel vst1.8 {q15}, [r5] @ keep iv in q15 28586464859SArd Biesheuvel pop {r4-r6, pc} 28686464859SArd BiesheuvelENDPROC(ce_aes_cbc_decrypt) 28786464859SArd Biesheuvel 288143d2647SArd Biesheuvel 289143d2647SArd Biesheuvel /* 290143d2647SArd Biesheuvel * ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[], 291143d2647SArd Biesheuvel * int rounds, int bytes, u8 const iv[]) 292143d2647SArd Biesheuvel * ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], 293143d2647SArd Biesheuvel * int rounds, int bytes, u8 const iv[]) 294143d2647SArd Biesheuvel */ 295143d2647SArd Biesheuvel 296143d2647SArd BiesheuvelENTRY(ce_aes_cbc_cts_encrypt) 297143d2647SArd Biesheuvel push {r4-r6, lr} 298143d2647SArd Biesheuvel ldrd r4, r5, [sp, #16] 299143d2647SArd Biesheuvel 300143d2647SArd Biesheuvel movw ip, :lower16:.Lcts_permute_table 301143d2647SArd Biesheuvel movt ip, :upper16:.Lcts_permute_table 302143d2647SArd Biesheuvel sub r4, r4, #16 303143d2647SArd Biesheuvel add lr, ip, #32 304143d2647SArd Biesheuvel add ip, ip, r4 305143d2647SArd Biesheuvel sub lr, lr, r4 306143d2647SArd Biesheuvel vld1.8 {q5}, [ip] 307143d2647SArd Biesheuvel vld1.8 {q6}, [lr] 308143d2647SArd Biesheuvel 309143d2647SArd Biesheuvel add ip, r1, r4 310143d2647SArd Biesheuvel vld1.8 {q0}, [r1] @ overlapping loads 311143d2647SArd Biesheuvel vld1.8 {q3}, [ip] 312143d2647SArd Biesheuvel 313143d2647SArd Biesheuvel vld1.8 {q1}, [r5] @ get iv 314143d2647SArd Biesheuvel prepare_key r2, r3 315143d2647SArd Biesheuvel 316143d2647SArd Biesheuvel veor q0, q0, q1 @ xor with iv 317143d2647SArd Biesheuvel bl aes_encrypt 318143d2647SArd Biesheuvel 319143d2647SArd Biesheuvel vtbl.8 d4, {d0-d1}, d10 320143d2647SArd Biesheuvel vtbl.8 d5, {d0-d1}, d11 321143d2647SArd Biesheuvel vtbl.8 d2, {d6-d7}, d12 322143d2647SArd Biesheuvel vtbl.8 d3, {d6-d7}, d13 323143d2647SArd Biesheuvel 324143d2647SArd Biesheuvel veor q0, q0, q1 325143d2647SArd Biesheuvel bl aes_encrypt 326143d2647SArd Biesheuvel 327143d2647SArd Biesheuvel add r4, r0, r4 328143d2647SArd Biesheuvel vst1.8 {q2}, [r4] @ overlapping stores 329143d2647SArd Biesheuvel vst1.8 {q0}, [r0] 330143d2647SArd Biesheuvel 331143d2647SArd Biesheuvel pop {r4-r6, pc} 332143d2647SArd BiesheuvelENDPROC(ce_aes_cbc_cts_encrypt) 333143d2647SArd Biesheuvel 334143d2647SArd BiesheuvelENTRY(ce_aes_cbc_cts_decrypt) 335143d2647SArd Biesheuvel push {r4-r6, lr} 336143d2647SArd Biesheuvel ldrd r4, r5, [sp, #16] 337143d2647SArd Biesheuvel 338143d2647SArd Biesheuvel movw ip, :lower16:.Lcts_permute_table 339143d2647SArd Biesheuvel movt ip, :upper16:.Lcts_permute_table 340143d2647SArd Biesheuvel sub r4, r4, #16 341143d2647SArd Biesheuvel add lr, ip, #32 342143d2647SArd Biesheuvel add ip, ip, r4 343143d2647SArd Biesheuvel sub lr, lr, r4 344143d2647SArd Biesheuvel vld1.8 {q5}, [ip] 345143d2647SArd Biesheuvel vld1.8 {q6}, [lr] 346143d2647SArd Biesheuvel 347143d2647SArd Biesheuvel add ip, r1, r4 348143d2647SArd Biesheuvel vld1.8 {q0}, [r1] @ overlapping loads 349143d2647SArd Biesheuvel vld1.8 {q1}, [ip] 350143d2647SArd Biesheuvel 351143d2647SArd Biesheuvel vld1.8 {q3}, [r5] @ get iv 352143d2647SArd Biesheuvel prepare_key r2, r3 353143d2647SArd Biesheuvel 354143d2647SArd Biesheuvel bl aes_decrypt 355143d2647SArd Biesheuvel 356143d2647SArd Biesheuvel vtbl.8 d4, {d0-d1}, d10 357143d2647SArd Biesheuvel vtbl.8 d5, {d0-d1}, d11 358143d2647SArd Biesheuvel vtbx.8 d0, {d2-d3}, d12 359143d2647SArd Biesheuvel vtbx.8 d1, {d2-d3}, d13 360143d2647SArd Biesheuvel 361143d2647SArd Biesheuvel veor q1, q1, q2 362143d2647SArd Biesheuvel bl aes_decrypt 363143d2647SArd Biesheuvel veor q0, q0, q3 @ xor with iv 364143d2647SArd Biesheuvel 365143d2647SArd Biesheuvel add r4, r0, r4 366143d2647SArd Biesheuvel vst1.8 {q1}, [r4] @ overlapping stores 367143d2647SArd Biesheuvel vst1.8 {q0}, [r0] 368143d2647SArd Biesheuvel 369143d2647SArd Biesheuvel pop {r4-r6, pc} 370143d2647SArd BiesheuvelENDPROC(ce_aes_cbc_cts_decrypt) 371143d2647SArd Biesheuvel 372143d2647SArd Biesheuvel 37386464859SArd Biesheuvel /* 374fcb0e30dSArd Biesheuvel * aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, 37586464859SArd Biesheuvel * int blocks, u8 ctr[]) 37686464859SArd Biesheuvel */ 37786464859SArd BiesheuvelENTRY(ce_aes_ctr_encrypt) 37886464859SArd Biesheuvel push {r4-r6, lr} 37986464859SArd Biesheuvel ldrd r4, r5, [sp, #16] 3801dede02bSArd Biesheuvel vld1.8 {q7}, [r5] @ load ctr 38186464859SArd Biesheuvel prepare_key r2, r3 3821dede02bSArd Biesheuvel vmov r6, s31 @ keep swabbed ctr in r6 38386464859SArd Biesheuvel rev r6, r6 38486464859SArd Biesheuvel cmn r6, r4 @ 32 bit overflow? 38586464859SArd Biesheuvel bcs .Lctrloop 3861dede02bSArd Biesheuvel.Lctrloop4x: 3871dede02bSArd Biesheuvel subs r4, r4, #4 38886464859SArd Biesheuvel bmi .Lctr1x 389*f3456b9fSArd Biesheuvel 390*f3456b9fSArd Biesheuvel /* 391*f3456b9fSArd Biesheuvel * NOTE: the sequence below has been carefully tweaked to avoid 392*f3456b9fSArd Biesheuvel * a silicon erratum that exists in Cortex-A57 (#1742098) and 393*f3456b9fSArd Biesheuvel * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs 394*f3456b9fSArd Biesheuvel * may produce an incorrect result if they take their input from a 395*f3456b9fSArd Biesheuvel * register of which a single 32-bit lane has been updated the last 396*f3456b9fSArd Biesheuvel * time it was modified. To work around this, the lanes of registers 397*f3456b9fSArd Biesheuvel * q0-q3 below are not manipulated individually, and the different 398*f3456b9fSArd Biesheuvel * counter values are prepared by successive manipulations of q7. 399*f3456b9fSArd Biesheuvel */ 400*f3456b9fSArd Biesheuvel add ip, r6, #1 4011dede02bSArd Biesheuvel vmov q0, q7 402*f3456b9fSArd Biesheuvel rev ip, ip 403*f3456b9fSArd Biesheuvel add lr, r6, #2 404*f3456b9fSArd Biesheuvel vmov s31, ip @ set lane 3 of q1 via q7 405*f3456b9fSArd Biesheuvel add ip, r6, #3 406*f3456b9fSArd Biesheuvel rev lr, lr 4071dede02bSArd Biesheuvel vmov q1, q7 408*f3456b9fSArd Biesheuvel vmov s31, lr @ set lane 3 of q2 via q7 409*f3456b9fSArd Biesheuvel rev ip, ip 4101dede02bSArd Biesheuvel vmov q2, q7 411*f3456b9fSArd Biesheuvel vmov s31, ip @ set lane 3 of q3 via q7 412*f3456b9fSArd Biesheuvel add r6, r6, #4 4131dede02bSArd Biesheuvel vmov q3, q7 414*f3456b9fSArd Biesheuvel 4151dede02bSArd Biesheuvel vld1.8 {q4-q5}, [r1]! 4161dede02bSArd Biesheuvel vld1.8 {q6}, [r1]! 4171dede02bSArd Biesheuvel vld1.8 {q15}, [r1]! 4181dede02bSArd Biesheuvel bl aes_encrypt_4x 4191dede02bSArd Biesheuvel veor q0, q0, q4 4201dede02bSArd Biesheuvel veor q1, q1, q5 4211dede02bSArd Biesheuvel veor q2, q2, q6 4221dede02bSArd Biesheuvel veor q3, q3, q15 42386464859SArd Biesheuvel rev ip, r6 4241465fb13SArd Biesheuvel vst1.8 {q0-q1}, [r0]! 4251dede02bSArd Biesheuvel vst1.8 {q2-q3}, [r0]! 4261dede02bSArd Biesheuvel vmov s31, ip 4271dede02bSArd Biesheuvel b .Lctrloop4x 42886464859SArd Biesheuvel.Lctr1x: 4291dede02bSArd Biesheuvel adds r4, r4, #4 43086464859SArd Biesheuvel beq .Lctrout 43186464859SArd Biesheuvel.Lctrloop: 4321dede02bSArd Biesheuvel vmov q0, q7 43386464859SArd Biesheuvel bl aes_encrypt 43486464859SArd Biesheuvel 43586464859SArd Biesheuvel adds r6, r6, #1 @ increment BE ctr 43686464859SArd Biesheuvel rev ip, r6 4371dede02bSArd Biesheuvel vmov s31, ip 43886464859SArd Biesheuvel bcs .Lctrcarry 439511306b2SEric Biggers 440511306b2SEric Biggers.Lctrcarrydone: 441511306b2SEric Biggers subs r4, r4, #1 442511306b2SEric Biggers bmi .Lctrtailblock @ blocks < 0 means tail block 443511306b2SEric Biggers vld1.8 {q3}, [r1]! 444511306b2SEric Biggers veor q3, q0, q3 445511306b2SEric Biggers vst1.8 {q3}, [r0]! 44686464859SArd Biesheuvel bne .Lctrloop 447511306b2SEric Biggers 44886464859SArd Biesheuvel.Lctrout: 4491dede02bSArd Biesheuvel vst1.8 {q7}, [r5] @ return next CTR value 45086464859SArd Biesheuvel pop {r4-r6, pc} 45186464859SArd Biesheuvel 4521465fb13SArd Biesheuvel.Lctrtailblock: 453511306b2SEric Biggers vst1.8 {q0}, [r0, :64] @ return the key stream 454511306b2SEric Biggers b .Lctrout 45586464859SArd Biesheuvel 45686464859SArd Biesheuvel.Lctrcarry: 4571dede02bSArd Biesheuvel .irp sreg, s30, s29, s28 45886464859SArd Biesheuvel vmov ip, \sreg @ load next word of ctr 45986464859SArd Biesheuvel rev ip, ip @ ... to handle the carry 46086464859SArd Biesheuvel adds ip, ip, #1 46186464859SArd Biesheuvel rev ip, ip 46286464859SArd Biesheuvel vmov \sreg, ip 463511306b2SEric Biggers bcc .Lctrcarrydone 46486464859SArd Biesheuvel .endr 465511306b2SEric Biggers b .Lctrcarrydone 46686464859SArd BiesheuvelENDPROC(ce_aes_ctr_encrypt) 46786464859SArd Biesheuvel 46886464859SArd Biesheuvel /* 469fcb0e30dSArd Biesheuvel * aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, 470c61b1607SArd Biesheuvel * int bytes, u8 iv[], u32 const rk2[], int first) 471fcb0e30dSArd Biesheuvel * aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, 472c61b1607SArd Biesheuvel * int bytes, u8 iv[], u32 const rk2[], int first) 47386464859SArd Biesheuvel */ 47486464859SArd Biesheuvel 47586464859SArd Biesheuvel .macro next_tweak, out, in, const, tmp 47686464859SArd Biesheuvel vshr.s64 \tmp, \in, #63 47786464859SArd Biesheuvel vand \tmp, \tmp, \const 47886464859SArd Biesheuvel vadd.u64 \out, \in, \in 47986464859SArd Biesheuvel vext.8 \tmp, \tmp, \tmp, #8 48086464859SArd Biesheuvel veor \out, \out, \tmp 48186464859SArd Biesheuvel .endm 48286464859SArd Biesheuvel 48386464859SArd Biesheuvelce_aes_xts_init: 484e53b43d8SArd Biesheuvel vmov.i32 d30, #0x87 @ compose tweak mask vector 485e53b43d8SArd Biesheuvel vmovl.u32 q15, d30 486e53b43d8SArd Biesheuvel vshr.u64 d30, d31, #7 48786464859SArd Biesheuvel 48886464859SArd Biesheuvel ldrd r4, r5, [sp, #16] @ load args 48986464859SArd Biesheuvel ldr r6, [sp, #28] 49086464859SArd Biesheuvel vld1.8 {q0}, [r5] @ load iv 49186464859SArd Biesheuvel teq r6, #1 @ start of a block? 49286464859SArd Biesheuvel bxne lr 49386464859SArd Biesheuvel 49486464859SArd Biesheuvel @ Encrypt the IV in q0 with the second AES key. This should only 49586464859SArd Biesheuvel @ be done at the start of a block. 49686464859SArd Biesheuvel ldr r6, [sp, #24] @ load AES key 2 49786464859SArd Biesheuvel prepare_key r6, r3 49886464859SArd Biesheuvel add ip, r6, #32 @ 3rd round key of key 2 49986464859SArd Biesheuvel b .Laes_encrypt_tweak @ tail call 50086464859SArd BiesheuvelENDPROC(ce_aes_xts_init) 50186464859SArd Biesheuvel 50286464859SArd BiesheuvelENTRY(ce_aes_xts_encrypt) 50386464859SArd Biesheuvel push {r4-r6, lr} 50486464859SArd Biesheuvel 50586464859SArd Biesheuvel bl ce_aes_xts_init @ run shared prologue 50686464859SArd Biesheuvel prepare_key r2, r3 5071dede02bSArd Biesheuvel vmov q4, q0 50886464859SArd Biesheuvel 50986464859SArd Biesheuvel teq r6, #0 @ start of a block? 5101dede02bSArd Biesheuvel bne .Lxtsenc4x 51186464859SArd Biesheuvel 5121dede02bSArd Biesheuvel.Lxtsencloop4x: 5131dede02bSArd Biesheuvel next_tweak q4, q4, q15, q10 5141dede02bSArd Biesheuvel.Lxtsenc4x: 515c61b1607SArd Biesheuvel subs r4, r4, #64 51686464859SArd Biesheuvel bmi .Lxtsenc1x 5171dede02bSArd Biesheuvel vld1.8 {q0-q1}, [r1]! @ get 4 pt blocks 5181dede02bSArd Biesheuvel vld1.8 {q2-q3}, [r1]! 5191dede02bSArd Biesheuvel next_tweak q5, q4, q15, q10 5201dede02bSArd Biesheuvel veor q0, q0, q4 5211dede02bSArd Biesheuvel next_tweak q6, q5, q15, q10 5221dede02bSArd Biesheuvel veor q1, q1, q5 5231dede02bSArd Biesheuvel next_tweak q7, q6, q15, q10 5241dede02bSArd Biesheuvel veor q2, q2, q6 5251dede02bSArd Biesheuvel veor q3, q3, q7 5261dede02bSArd Biesheuvel bl aes_encrypt_4x 5271dede02bSArd Biesheuvel veor q0, q0, q4 5281dede02bSArd Biesheuvel veor q1, q1, q5 5291dede02bSArd Biesheuvel veor q2, q2, q6 5301dede02bSArd Biesheuvel veor q3, q3, q7 5311dede02bSArd Biesheuvel vst1.8 {q0-q1}, [r0]! @ write 4 ct blocks 5321dede02bSArd Biesheuvel vst1.8 {q2-q3}, [r0]! 5331dede02bSArd Biesheuvel vmov q4, q7 53486464859SArd Biesheuvel teq r4, #0 535c61b1607SArd Biesheuvel beq .Lxtsencret 5361dede02bSArd Biesheuvel b .Lxtsencloop4x 53786464859SArd Biesheuvel.Lxtsenc1x: 538c61b1607SArd Biesheuvel adds r4, r4, #64 53986464859SArd Biesheuvel beq .Lxtsencout 540c61b1607SArd Biesheuvel subs r4, r4, #16 541c61b1607SArd Biesheuvel bmi .LxtsencctsNx 54286464859SArd Biesheuvel.Lxtsencloop: 5431465fb13SArd Biesheuvel vld1.8 {q0}, [r1]! 544c61b1607SArd Biesheuvel.Lxtsencctsout: 5451dede02bSArd Biesheuvel veor q0, q0, q4 54686464859SArd Biesheuvel bl aes_encrypt 5471dede02bSArd Biesheuvel veor q0, q0, q4 548c61b1607SArd Biesheuvel teq r4, #0 54986464859SArd Biesheuvel beq .Lxtsencout 550c61b1607SArd Biesheuvel subs r4, r4, #16 5511dede02bSArd Biesheuvel next_tweak q4, q4, q15, q6 552c61b1607SArd Biesheuvel bmi .Lxtsenccts 553c61b1607SArd Biesheuvel vst1.8 {q0}, [r0]! 55486464859SArd Biesheuvel b .Lxtsencloop 55586464859SArd Biesheuvel.Lxtsencout: 556c61b1607SArd Biesheuvel vst1.8 {q0}, [r0] 557c61b1607SArd Biesheuvel.Lxtsencret: 5581dede02bSArd Biesheuvel vst1.8 {q4}, [r5] 55986464859SArd Biesheuvel pop {r4-r6, pc} 560c61b1607SArd Biesheuvel 561c61b1607SArd Biesheuvel.LxtsencctsNx: 562c61b1607SArd Biesheuvel vmov q0, q3 563c61b1607SArd Biesheuvel sub r0, r0, #16 564c61b1607SArd Biesheuvel.Lxtsenccts: 565c61b1607SArd Biesheuvel movw ip, :lower16:.Lcts_permute_table 566c61b1607SArd Biesheuvel movt ip, :upper16:.Lcts_permute_table 567c61b1607SArd Biesheuvel 568c61b1607SArd Biesheuvel add r1, r1, r4 @ rewind input pointer 569c61b1607SArd Biesheuvel add r4, r4, #16 @ # bytes in final block 570c61b1607SArd Biesheuvel add lr, ip, #32 571c61b1607SArd Biesheuvel add ip, ip, r4 572c61b1607SArd Biesheuvel sub lr, lr, r4 573c61b1607SArd Biesheuvel add r4, r0, r4 @ output address of final block 574c61b1607SArd Biesheuvel 575c61b1607SArd Biesheuvel vld1.8 {q1}, [r1] @ load final partial block 576c61b1607SArd Biesheuvel vld1.8 {q2}, [ip] 577c61b1607SArd Biesheuvel vld1.8 {q3}, [lr] 578c61b1607SArd Biesheuvel 579c61b1607SArd Biesheuvel vtbl.8 d4, {d0-d1}, d4 580c61b1607SArd Biesheuvel vtbl.8 d5, {d0-d1}, d5 581c61b1607SArd Biesheuvel vtbx.8 d0, {d2-d3}, d6 582c61b1607SArd Biesheuvel vtbx.8 d1, {d2-d3}, d7 583c61b1607SArd Biesheuvel 584c61b1607SArd Biesheuvel vst1.8 {q2}, [r4] @ overlapping stores 585c61b1607SArd Biesheuvel mov r4, #0 586c61b1607SArd Biesheuvel b .Lxtsencctsout 58786464859SArd BiesheuvelENDPROC(ce_aes_xts_encrypt) 58886464859SArd Biesheuvel 58986464859SArd Biesheuvel 59086464859SArd BiesheuvelENTRY(ce_aes_xts_decrypt) 59186464859SArd Biesheuvel push {r4-r6, lr} 59286464859SArd Biesheuvel 59386464859SArd Biesheuvel bl ce_aes_xts_init @ run shared prologue 59486464859SArd Biesheuvel prepare_key r2, r3 5951dede02bSArd Biesheuvel vmov q4, q0 59686464859SArd Biesheuvel 597c61b1607SArd Biesheuvel /* subtract 16 bytes if we are doing CTS */ 598c61b1607SArd Biesheuvel tst r4, #0xf 599c61b1607SArd Biesheuvel subne r4, r4, #0x10 600c61b1607SArd Biesheuvel 60186464859SArd Biesheuvel teq r6, #0 @ start of a block? 6021dede02bSArd Biesheuvel bne .Lxtsdec4x 60386464859SArd Biesheuvel 6041dede02bSArd Biesheuvel.Lxtsdecloop4x: 6051dede02bSArd Biesheuvel next_tweak q4, q4, q15, q10 6061dede02bSArd Biesheuvel.Lxtsdec4x: 607c61b1607SArd Biesheuvel subs r4, r4, #64 60886464859SArd Biesheuvel bmi .Lxtsdec1x 6091dede02bSArd Biesheuvel vld1.8 {q0-q1}, [r1]! @ get 4 ct blocks 6101dede02bSArd Biesheuvel vld1.8 {q2-q3}, [r1]! 6111dede02bSArd Biesheuvel next_tweak q5, q4, q15, q10 6121dede02bSArd Biesheuvel veor q0, q0, q4 6131dede02bSArd Biesheuvel next_tweak q6, q5, q15, q10 6141dede02bSArd Biesheuvel veor q1, q1, q5 6151dede02bSArd Biesheuvel next_tweak q7, q6, q15, q10 6161dede02bSArd Biesheuvel veor q2, q2, q6 6171dede02bSArd Biesheuvel veor q3, q3, q7 6181dede02bSArd Biesheuvel bl aes_decrypt_4x 6191dede02bSArd Biesheuvel veor q0, q0, q4 6201dede02bSArd Biesheuvel veor q1, q1, q5 6211dede02bSArd Biesheuvel veor q2, q2, q6 6221dede02bSArd Biesheuvel veor q3, q3, q7 6231dede02bSArd Biesheuvel vst1.8 {q0-q1}, [r0]! @ write 4 pt blocks 6241dede02bSArd Biesheuvel vst1.8 {q2-q3}, [r0]! 6251dede02bSArd Biesheuvel vmov q4, q7 62686464859SArd Biesheuvel teq r4, #0 62786464859SArd Biesheuvel beq .Lxtsdecout 6281dede02bSArd Biesheuvel b .Lxtsdecloop4x 62986464859SArd Biesheuvel.Lxtsdec1x: 630c61b1607SArd Biesheuvel adds r4, r4, #64 63186464859SArd Biesheuvel beq .Lxtsdecout 632c61b1607SArd Biesheuvel subs r4, r4, #16 63386464859SArd Biesheuvel.Lxtsdecloop: 6341465fb13SArd Biesheuvel vld1.8 {q0}, [r1]! 635c61b1607SArd Biesheuvel bmi .Lxtsdeccts 636c61b1607SArd Biesheuvel.Lxtsdecctsout: 6371dede02bSArd Biesheuvel veor q0, q0, q4 63886464859SArd Biesheuvel bl aes_decrypt 6391dede02bSArd Biesheuvel veor q0, q0, q4 6401465fb13SArd Biesheuvel vst1.8 {q0}, [r0]! 641c61b1607SArd Biesheuvel teq r4, #0 64286464859SArd Biesheuvel beq .Lxtsdecout 643c61b1607SArd Biesheuvel subs r4, r4, #16 6441dede02bSArd Biesheuvel next_tweak q4, q4, q15, q6 64586464859SArd Biesheuvel b .Lxtsdecloop 64686464859SArd Biesheuvel.Lxtsdecout: 6471dede02bSArd Biesheuvel vst1.8 {q4}, [r5] 64886464859SArd Biesheuvel pop {r4-r6, pc} 649c61b1607SArd Biesheuvel 650c61b1607SArd Biesheuvel.Lxtsdeccts: 651c61b1607SArd Biesheuvel movw ip, :lower16:.Lcts_permute_table 652c61b1607SArd Biesheuvel movt ip, :upper16:.Lcts_permute_table 653c61b1607SArd Biesheuvel 654c61b1607SArd Biesheuvel add r1, r1, r4 @ rewind input pointer 655c61b1607SArd Biesheuvel add r4, r4, #16 @ # bytes in final block 656c61b1607SArd Biesheuvel add lr, ip, #32 657c61b1607SArd Biesheuvel add ip, ip, r4 658c61b1607SArd Biesheuvel sub lr, lr, r4 659c61b1607SArd Biesheuvel add r4, r0, r4 @ output address of final block 660c61b1607SArd Biesheuvel 661c61b1607SArd Biesheuvel next_tweak q5, q4, q15, q6 662c61b1607SArd Biesheuvel 663c61b1607SArd Biesheuvel vld1.8 {q1}, [r1] @ load final partial block 664c61b1607SArd Biesheuvel vld1.8 {q2}, [ip] 665c61b1607SArd Biesheuvel vld1.8 {q3}, [lr] 666c61b1607SArd Biesheuvel 667c61b1607SArd Biesheuvel veor q0, q0, q5 668c61b1607SArd Biesheuvel bl aes_decrypt 669c61b1607SArd Biesheuvel veor q0, q0, q5 670c61b1607SArd Biesheuvel 671c61b1607SArd Biesheuvel vtbl.8 d4, {d0-d1}, d4 672c61b1607SArd Biesheuvel vtbl.8 d5, {d0-d1}, d5 673c61b1607SArd Biesheuvel vtbx.8 d0, {d2-d3}, d6 674c61b1607SArd Biesheuvel vtbx.8 d1, {d2-d3}, d7 675c61b1607SArd Biesheuvel 676c61b1607SArd Biesheuvel vst1.8 {q2}, [r4] @ overlapping stores 677c61b1607SArd Biesheuvel mov r4, #0 678c61b1607SArd Biesheuvel b .Lxtsdecctsout 67986464859SArd BiesheuvelENDPROC(ce_aes_xts_decrypt) 68086464859SArd Biesheuvel 68186464859SArd Biesheuvel /* 68286464859SArd Biesheuvel * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the 68386464859SArd Biesheuvel * AES sbox substitution on each byte in 68486464859SArd Biesheuvel * 'input' 68586464859SArd Biesheuvel */ 68686464859SArd BiesheuvelENTRY(ce_aes_sub) 68786464859SArd Biesheuvel vdup.32 q1, r0 68886464859SArd Biesheuvel veor q0, q0, q0 68986464859SArd Biesheuvel aese.8 q0, q1 69086464859SArd Biesheuvel vmov r0, s0 69186464859SArd Biesheuvel bx lr 69286464859SArd BiesheuvelENDPROC(ce_aes_sub) 69386464859SArd Biesheuvel 69486464859SArd Biesheuvel /* 69586464859SArd Biesheuvel * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns 69686464859SArd Biesheuvel * operation on round key *src 69786464859SArd Biesheuvel */ 69886464859SArd BiesheuvelENTRY(ce_aes_invert) 699fafb1dcaSArd Biesheuvel vld1.32 {q0}, [r1] 70086464859SArd Biesheuvel aesimc.8 q0, q0 701fafb1dcaSArd Biesheuvel vst1.32 {q0}, [r0] 70286464859SArd Biesheuvel bx lr 70386464859SArd BiesheuvelENDPROC(ce_aes_invert) 704c61b1607SArd Biesheuvel 705c61b1607SArd Biesheuvel .section ".rodata", "a" 706c61b1607SArd Biesheuvel .align 6 707c61b1607SArd Biesheuvel.Lcts_permute_table: 708c61b1607SArd Biesheuvel .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 709c61b1607SArd Biesheuvel .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 710c61b1607SArd Biesheuvel .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 711c61b1607SArd Biesheuvel .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 712c61b1607SArd Biesheuvel .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 713c61b1607SArd Biesheuvel .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 714