1*81d358b1SMichael Ellerman#! /usr/bin/env perl 2*81d358b1SMichael Ellerman# SPDX-License-Identifier: GPL-2.0 3*81d358b1SMichael Ellerman 4*81d358b1SMichael Ellerman# This code is taken from CRYPTOGAMs[1] and is included here using the option 5*81d358b1SMichael Ellerman# in the license to distribute the code under the GPL. Therefore this program 6*81d358b1SMichael Ellerman# is free software; you can redistribute it and/or modify it under the terms of 7*81d358b1SMichael Ellerman# the GNU General Public License version 2 as published by the Free Software 8*81d358b1SMichael Ellerman# Foundation. 9*81d358b1SMichael Ellerman# 10*81d358b1SMichael Ellerman# [1] https://www.openssl.org/~appro/cryptogams/ 11*81d358b1SMichael Ellerman 12*81d358b1SMichael Ellerman# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org> 13*81d358b1SMichael Ellerman# All rights reserved. 14*81d358b1SMichael Ellerman# 15*81d358b1SMichael Ellerman# Redistribution and use in source and binary forms, with or without 16*81d358b1SMichael Ellerman# modification, are permitted provided that the following conditions 17*81d358b1SMichael Ellerman# are met: 18*81d358b1SMichael Ellerman# 19*81d358b1SMichael Ellerman# * Redistributions of source code must retain copyright notices, 20*81d358b1SMichael Ellerman# this list of conditions and the following disclaimer. 21*81d358b1SMichael Ellerman# 22*81d358b1SMichael Ellerman# * Redistributions in binary form must reproduce the above 23*81d358b1SMichael Ellerman# copyright notice, this list of conditions and the following 24*81d358b1SMichael Ellerman# disclaimer in the documentation and/or other materials 25*81d358b1SMichael Ellerman# provided with the distribution. 26*81d358b1SMichael Ellerman# 27*81d358b1SMichael Ellerman# * Neither the name of the CRYPTOGAMS nor the names of its 28*81d358b1SMichael Ellerman# copyright holder and contributors may be used to endorse or 29*81d358b1SMichael Ellerman# promote products derived from this software without specific 30*81d358b1SMichael Ellerman# prior written permission. 31*81d358b1SMichael Ellerman# 32*81d358b1SMichael Ellerman# ALTERNATIVELY, provided that this notice is retained in full, this 33*81d358b1SMichael Ellerman# product may be distributed under the terms of the GNU General Public 34*81d358b1SMichael Ellerman# License (GPL), in which case the provisions of the GPL apply INSTEAD OF 35*81d358b1SMichael Ellerman# those given above. 36*81d358b1SMichael Ellerman# 37*81d358b1SMichael Ellerman# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 38*81d358b1SMichael Ellerman# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39*81d358b1SMichael Ellerman# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40*81d358b1SMichael Ellerman# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41*81d358b1SMichael Ellerman# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42*81d358b1SMichael Ellerman# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43*81d358b1SMichael Ellerman# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44*81d358b1SMichael Ellerman# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 45*81d358b1SMichael Ellerman# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 46*81d358b1SMichael Ellerman# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 47*81d358b1SMichael Ellerman# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48*81d358b1SMichael Ellerman 49*81d358b1SMichael Ellerman# ==================================================================== 50*81d358b1SMichael Ellerman# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 51*81d358b1SMichael Ellerman# project. The module is, however, dual licensed under OpenSSL and 52*81d358b1SMichael Ellerman# CRYPTOGAMS licenses depending on where you obtain it. For further 53*81d358b1SMichael Ellerman# details see https://www.openssl.org/~appro/cryptogams/. 54*81d358b1SMichael Ellerman# ==================================================================== 55*81d358b1SMichael Ellerman# 56*81d358b1SMichael Ellerman# This module implements support for AES instructions as per PowerISA 57*81d358b1SMichael Ellerman# specification version 2.07, first implemented by POWER8 processor. 58*81d358b1SMichael Ellerman# The module is endian-agnostic in sense that it supports both big- 59*81d358b1SMichael Ellerman# and little-endian cases. Data alignment in parallelizable modes is 60*81d358b1SMichael Ellerman# handled with VSX loads and stores, which implies MSR.VSX flag being 61*81d358b1SMichael Ellerman# set. It should also be noted that ISA specification doesn't prohibit 62*81d358b1SMichael Ellerman# alignment exceptions for these instructions on page boundaries. 63*81d358b1SMichael Ellerman# Initially alignment was handled in pure AltiVec/VMX way [when data 64*81d358b1SMichael Ellerman# is aligned programmatically, which in turn guarantees exception- 65*81d358b1SMichael Ellerman# free execution], but it turned to hamper performance when vcipher 66*81d358b1SMichael Ellerman# instructions are interleaved. It's reckoned that eventual 67*81d358b1SMichael Ellerman# misalignment penalties at page boundaries are in average lower 68*81d358b1SMichael Ellerman# than additional overhead in pure AltiVec approach. 69*81d358b1SMichael Ellerman# 70*81d358b1SMichael Ellerman# May 2016 71*81d358b1SMichael Ellerman# 72*81d358b1SMichael Ellerman# Add XTS subroutine, 9x on little- and 12x improvement on big-endian 73*81d358b1SMichael Ellerman# systems were measured. 74*81d358b1SMichael Ellerman# 75*81d358b1SMichael Ellerman###################################################################### 76*81d358b1SMichael Ellerman# Current large-block performance in cycles per byte processed with 77*81d358b1SMichael Ellerman# 128-bit key (less is better). 78*81d358b1SMichael Ellerman# 79*81d358b1SMichael Ellerman# CBC en-/decrypt CTR XTS 80*81d358b1SMichael Ellerman# POWER8[le] 3.96/0.72 0.74 1.1 81*81d358b1SMichael Ellerman# POWER8[be] 3.75/0.65 0.66 1.0 82*81d358b1SMichael Ellerman 83*81d358b1SMichael Ellerman$flavour = shift; 84*81d358b1SMichael Ellerman 85*81d358b1SMichael Ellermanif ($flavour =~ /64/) { 86*81d358b1SMichael Ellerman $SIZE_T =8; 87*81d358b1SMichael Ellerman $LRSAVE =2*$SIZE_T; 88*81d358b1SMichael Ellerman $STU ="stdu"; 89*81d358b1SMichael Ellerman $POP ="ld"; 90*81d358b1SMichael Ellerman $PUSH ="std"; 91*81d358b1SMichael Ellerman $UCMP ="cmpld"; 92*81d358b1SMichael Ellerman $SHL ="sldi"; 93*81d358b1SMichael Ellerman} elsif ($flavour =~ /32/) { 94*81d358b1SMichael Ellerman $SIZE_T =4; 95*81d358b1SMichael Ellerman $LRSAVE =$SIZE_T; 96*81d358b1SMichael Ellerman $STU ="stwu"; 97*81d358b1SMichael Ellerman $POP ="lwz"; 98*81d358b1SMichael Ellerman $PUSH ="stw"; 99*81d358b1SMichael Ellerman $UCMP ="cmplw"; 100*81d358b1SMichael Ellerman $SHL ="slwi"; 101*81d358b1SMichael Ellerman} else { die "nonsense $flavour"; } 102*81d358b1SMichael Ellerman 103*81d358b1SMichael Ellerman$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 104*81d358b1SMichael Ellerman 105*81d358b1SMichael Ellerman$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 106*81d358b1SMichael Ellerman( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 107*81d358b1SMichael Ellerman( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 108*81d358b1SMichael Ellermandie "can't locate ppc-xlate.pl"; 109*81d358b1SMichael Ellerman 110*81d358b1SMichael Ellermanopen STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 111*81d358b1SMichael Ellerman 112*81d358b1SMichael Ellerman$FRAME=8*$SIZE_T; 113*81d358b1SMichael Ellerman$prefix="aes_p10"; 114*81d358b1SMichael Ellerman 115*81d358b1SMichael Ellerman$sp="r1"; 116*81d358b1SMichael Ellerman$vrsave="r12"; 117*81d358b1SMichael Ellerman 118*81d358b1SMichael Ellerman######################################################################### 119*81d358b1SMichael Ellerman{{{ # Key setup procedures # 120*81d358b1SMichael Ellermanmy ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 121*81d358b1SMichael Ellermanmy ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 122*81d358b1SMichael Ellermanmy ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 123*81d358b1SMichael Ellerman 124*81d358b1SMichael Ellerman$code.=<<___; 125*81d358b1SMichael Ellerman.machine "any" 126*81d358b1SMichael Ellerman 127*81d358b1SMichael Ellerman.text 128*81d358b1SMichael Ellerman 129*81d358b1SMichael Ellerman.align 7 130*81d358b1SMichael Ellermanrcon: 131*81d358b1SMichael Ellerman.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 132*81d358b1SMichael Ellerman.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 133*81d358b1SMichael Ellerman.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 134*81d358b1SMichael Ellerman.long 0,0,0,0 ?asis 135*81d358b1SMichael EllermanLconsts: 136*81d358b1SMichael Ellerman mflr r0 137*81d358b1SMichael Ellerman bcl 20,31,\$+4 138*81d358b1SMichael Ellerman mflr $ptr #vvvvv "distance between . and rcon 139*81d358b1SMichael Ellerman addi $ptr,$ptr,-0x48 140*81d358b1SMichael Ellerman mtlr r0 141*81d358b1SMichael Ellerman blr 142*81d358b1SMichael Ellerman .long 0 143*81d358b1SMichael Ellerman .byte 0,12,0x14,0,0,0,0,0 144*81d358b1SMichael Ellerman.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 145*81d358b1SMichael Ellerman 146*81d358b1SMichael Ellerman.globl .${prefix}_set_encrypt_key 147*81d358b1SMichael EllermanLset_encrypt_key: 148*81d358b1SMichael Ellerman mflr r11 149*81d358b1SMichael Ellerman $PUSH r11,$LRSAVE($sp) 150*81d358b1SMichael Ellerman 151*81d358b1SMichael Ellerman li $ptr,-1 152*81d358b1SMichael Ellerman ${UCMP}i $inp,0 153*81d358b1SMichael Ellerman beq- Lenc_key_abort # if ($inp==0) return -1; 154*81d358b1SMichael Ellerman ${UCMP}i $out,0 155*81d358b1SMichael Ellerman beq- Lenc_key_abort # if ($out==0) return -1; 156*81d358b1SMichael Ellerman li $ptr,-2 157*81d358b1SMichael Ellerman cmpwi $bits,128 158*81d358b1SMichael Ellerman blt- Lenc_key_abort 159*81d358b1SMichael Ellerman cmpwi $bits,256 160*81d358b1SMichael Ellerman bgt- Lenc_key_abort 161*81d358b1SMichael Ellerman andi. r0,$bits,0x3f 162*81d358b1SMichael Ellerman bne- Lenc_key_abort 163*81d358b1SMichael Ellerman 164*81d358b1SMichael Ellerman lis r0,0xfff0 165*81d358b1SMichael Ellerman mfspr $vrsave,256 166*81d358b1SMichael Ellerman mtspr 256,r0 167*81d358b1SMichael Ellerman 168*81d358b1SMichael Ellerman bl Lconsts 169*81d358b1SMichael Ellerman mtlr r11 170*81d358b1SMichael Ellerman 171*81d358b1SMichael Ellerman neg r9,$inp 172*81d358b1SMichael Ellerman lvx $in0,0,$inp 173*81d358b1SMichael Ellerman addi $inp,$inp,15 # 15 is not typo 174*81d358b1SMichael Ellerman lvsr $key,0,r9 # borrow $key 175*81d358b1SMichael Ellerman li r8,0x20 176*81d358b1SMichael Ellerman cmpwi $bits,192 177*81d358b1SMichael Ellerman lvx $in1,0,$inp 178*81d358b1SMichael Ellerman le?vspltisb $mask,0x0f # borrow $mask 179*81d358b1SMichael Ellerman lvx $rcon,0,$ptr 180*81d358b1SMichael Ellerman le?vxor $key,$key,$mask # adjust for byte swap 181*81d358b1SMichael Ellerman lvx $mask,r8,$ptr 182*81d358b1SMichael Ellerman addi $ptr,$ptr,0x10 183*81d358b1SMichael Ellerman vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 184*81d358b1SMichael Ellerman li $cnt,8 185*81d358b1SMichael Ellerman vxor $zero,$zero,$zero 186*81d358b1SMichael Ellerman mtctr $cnt 187*81d358b1SMichael Ellerman 188*81d358b1SMichael Ellerman ?lvsr $outperm,0,$out 189*81d358b1SMichael Ellerman vspltisb $outmask,-1 190*81d358b1SMichael Ellerman lvx $outhead,0,$out 191*81d358b1SMichael Ellerman ?vperm $outmask,$zero,$outmask,$outperm 192*81d358b1SMichael Ellerman 193*81d358b1SMichael Ellerman blt Loop128 194*81d358b1SMichael Ellerman addi $inp,$inp,8 195*81d358b1SMichael Ellerman beq L192 196*81d358b1SMichael Ellerman addi $inp,$inp,8 197*81d358b1SMichael Ellerman b L256 198*81d358b1SMichael Ellerman 199*81d358b1SMichael Ellerman.align 4 200*81d358b1SMichael EllermanLoop128: 201*81d358b1SMichael Ellerman vperm $key,$in0,$in0,$mask # rotate-n-splat 202*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$in0,12 # >>32 203*81d358b1SMichael Ellerman vperm $outtail,$in0,$in0,$outperm # rotate 204*81d358b1SMichael Ellerman vsel $stage,$outhead,$outtail,$outmask 205*81d358b1SMichael Ellerman vmr $outhead,$outtail 206*81d358b1SMichael Ellerman vcipherlast $key,$key,$rcon 207*81d358b1SMichael Ellerman stvx $stage,0,$out 208*81d358b1SMichael Ellerman addi $out,$out,16 209*81d358b1SMichael Ellerman 210*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 211*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$tmp,12 # >>32 212*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 213*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$tmp,12 # >>32 214*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 215*81d358b1SMichael Ellerman vadduwm $rcon,$rcon,$rcon 216*81d358b1SMichael Ellerman vxor $in0,$in0,$key 217*81d358b1SMichael Ellerman bdnz Loop128 218*81d358b1SMichael Ellerman 219*81d358b1SMichael Ellerman lvx $rcon,0,$ptr # last two round keys 220*81d358b1SMichael Ellerman 221*81d358b1SMichael Ellerman vperm $key,$in0,$in0,$mask # rotate-n-splat 222*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$in0,12 # >>32 223*81d358b1SMichael Ellerman vperm $outtail,$in0,$in0,$outperm # rotate 224*81d358b1SMichael Ellerman vsel $stage,$outhead,$outtail,$outmask 225*81d358b1SMichael Ellerman vmr $outhead,$outtail 226*81d358b1SMichael Ellerman vcipherlast $key,$key,$rcon 227*81d358b1SMichael Ellerman stvx $stage,0,$out 228*81d358b1SMichael Ellerman addi $out,$out,16 229*81d358b1SMichael Ellerman 230*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 231*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$tmp,12 # >>32 232*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 233*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$tmp,12 # >>32 234*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 235*81d358b1SMichael Ellerman vadduwm $rcon,$rcon,$rcon 236*81d358b1SMichael Ellerman vxor $in0,$in0,$key 237*81d358b1SMichael Ellerman 238*81d358b1SMichael Ellerman vperm $key,$in0,$in0,$mask # rotate-n-splat 239*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$in0,12 # >>32 240*81d358b1SMichael Ellerman vperm $outtail,$in0,$in0,$outperm # rotate 241*81d358b1SMichael Ellerman vsel $stage,$outhead,$outtail,$outmask 242*81d358b1SMichael Ellerman vmr $outhead,$outtail 243*81d358b1SMichael Ellerman vcipherlast $key,$key,$rcon 244*81d358b1SMichael Ellerman stvx $stage,0,$out 245*81d358b1SMichael Ellerman addi $out,$out,16 246*81d358b1SMichael Ellerman 247*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 248*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$tmp,12 # >>32 249*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 250*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$tmp,12 # >>32 251*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 252*81d358b1SMichael Ellerman vxor $in0,$in0,$key 253*81d358b1SMichael Ellerman vperm $outtail,$in0,$in0,$outperm # rotate 254*81d358b1SMichael Ellerman vsel $stage,$outhead,$outtail,$outmask 255*81d358b1SMichael Ellerman vmr $outhead,$outtail 256*81d358b1SMichael Ellerman stvx $stage,0,$out 257*81d358b1SMichael Ellerman 258*81d358b1SMichael Ellerman addi $inp,$out,15 # 15 is not typo 259*81d358b1SMichael Ellerman addi $out,$out,0x50 260*81d358b1SMichael Ellerman 261*81d358b1SMichael Ellerman li $rounds,10 262*81d358b1SMichael Ellerman b Ldone 263*81d358b1SMichael Ellerman 264*81d358b1SMichael Ellerman.align 4 265*81d358b1SMichael EllermanL192: 266*81d358b1SMichael Ellerman lvx $tmp,0,$inp 267*81d358b1SMichael Ellerman li $cnt,4 268*81d358b1SMichael Ellerman vperm $outtail,$in0,$in0,$outperm # rotate 269*81d358b1SMichael Ellerman vsel $stage,$outhead,$outtail,$outmask 270*81d358b1SMichael Ellerman vmr $outhead,$outtail 271*81d358b1SMichael Ellerman stvx $stage,0,$out 272*81d358b1SMichael Ellerman addi $out,$out,16 273*81d358b1SMichael Ellerman vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 274*81d358b1SMichael Ellerman vspltisb $key,8 # borrow $key 275*81d358b1SMichael Ellerman mtctr $cnt 276*81d358b1SMichael Ellerman vsububm $mask,$mask,$key # adjust the mask 277*81d358b1SMichael Ellerman 278*81d358b1SMichael EllermanLoop192: 279*81d358b1SMichael Ellerman vperm $key,$in1,$in1,$mask # roate-n-splat 280*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$in0,12 # >>32 281*81d358b1SMichael Ellerman vcipherlast $key,$key,$rcon 282*81d358b1SMichael Ellerman 283*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 284*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$tmp,12 # >>32 285*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 286*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$tmp,12 # >>32 287*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 288*81d358b1SMichael Ellerman 289*81d358b1SMichael Ellerman vsldoi $stage,$zero,$in1,8 290*81d358b1SMichael Ellerman vspltw $tmp,$in0,3 291*81d358b1SMichael Ellerman vxor $tmp,$tmp,$in1 292*81d358b1SMichael Ellerman vsldoi $in1,$zero,$in1,12 # >>32 293*81d358b1SMichael Ellerman vadduwm $rcon,$rcon,$rcon 294*81d358b1SMichael Ellerman vxor $in1,$in1,$tmp 295*81d358b1SMichael Ellerman vxor $in0,$in0,$key 296*81d358b1SMichael Ellerman vxor $in1,$in1,$key 297*81d358b1SMichael Ellerman vsldoi $stage,$stage,$in0,8 298*81d358b1SMichael Ellerman 299*81d358b1SMichael Ellerman vperm $key,$in1,$in1,$mask # rotate-n-splat 300*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$in0,12 # >>32 301*81d358b1SMichael Ellerman vperm $outtail,$stage,$stage,$outperm # rotate 302*81d358b1SMichael Ellerman vsel $stage,$outhead,$outtail,$outmask 303*81d358b1SMichael Ellerman vmr $outhead,$outtail 304*81d358b1SMichael Ellerman vcipherlast $key,$key,$rcon 305*81d358b1SMichael Ellerman stvx $stage,0,$out 306*81d358b1SMichael Ellerman addi $out,$out,16 307*81d358b1SMichael Ellerman 308*81d358b1SMichael Ellerman vsldoi $stage,$in0,$in1,8 309*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 310*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$tmp,12 # >>32 311*81d358b1SMichael Ellerman vperm $outtail,$stage,$stage,$outperm # rotate 312*81d358b1SMichael Ellerman vsel $stage,$outhead,$outtail,$outmask 313*81d358b1SMichael Ellerman vmr $outhead,$outtail 314*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 315*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$tmp,12 # >>32 316*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 317*81d358b1SMichael Ellerman stvx $stage,0,$out 318*81d358b1SMichael Ellerman addi $out,$out,16 319*81d358b1SMichael Ellerman 320*81d358b1SMichael Ellerman vspltw $tmp,$in0,3 321*81d358b1SMichael Ellerman vxor $tmp,$tmp,$in1 322*81d358b1SMichael Ellerman vsldoi $in1,$zero,$in1,12 # >>32 323*81d358b1SMichael Ellerman vadduwm $rcon,$rcon,$rcon 324*81d358b1SMichael Ellerman vxor $in1,$in1,$tmp 325*81d358b1SMichael Ellerman vxor $in0,$in0,$key 326*81d358b1SMichael Ellerman vxor $in1,$in1,$key 327*81d358b1SMichael Ellerman vperm $outtail,$in0,$in0,$outperm # rotate 328*81d358b1SMichael Ellerman vsel $stage,$outhead,$outtail,$outmask 329*81d358b1SMichael Ellerman vmr $outhead,$outtail 330*81d358b1SMichael Ellerman stvx $stage,0,$out 331*81d358b1SMichael Ellerman addi $inp,$out,15 # 15 is not typo 332*81d358b1SMichael Ellerman addi $out,$out,16 333*81d358b1SMichael Ellerman bdnz Loop192 334*81d358b1SMichael Ellerman 335*81d358b1SMichael Ellerman li $rounds,12 336*81d358b1SMichael Ellerman addi $out,$out,0x20 337*81d358b1SMichael Ellerman b Ldone 338*81d358b1SMichael Ellerman 339*81d358b1SMichael Ellerman.align 4 340*81d358b1SMichael EllermanL256: 341*81d358b1SMichael Ellerman lvx $tmp,0,$inp 342*81d358b1SMichael Ellerman li $cnt,7 343*81d358b1SMichael Ellerman li $rounds,14 344*81d358b1SMichael Ellerman vperm $outtail,$in0,$in0,$outperm # rotate 345*81d358b1SMichael Ellerman vsel $stage,$outhead,$outtail,$outmask 346*81d358b1SMichael Ellerman vmr $outhead,$outtail 347*81d358b1SMichael Ellerman stvx $stage,0,$out 348*81d358b1SMichael Ellerman addi $out,$out,16 349*81d358b1SMichael Ellerman vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 350*81d358b1SMichael Ellerman mtctr $cnt 351*81d358b1SMichael Ellerman 352*81d358b1SMichael EllermanLoop256: 353*81d358b1SMichael Ellerman vperm $key,$in1,$in1,$mask # rotate-n-splat 354*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$in0,12 # >>32 355*81d358b1SMichael Ellerman vperm $outtail,$in1,$in1,$outperm # rotate 356*81d358b1SMichael Ellerman vsel $stage,$outhead,$outtail,$outmask 357*81d358b1SMichael Ellerman vmr $outhead,$outtail 358*81d358b1SMichael Ellerman vcipherlast $key,$key,$rcon 359*81d358b1SMichael Ellerman stvx $stage,0,$out 360*81d358b1SMichael Ellerman addi $out,$out,16 361*81d358b1SMichael Ellerman 362*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 363*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$tmp,12 # >>32 364*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 365*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$tmp,12 # >>32 366*81d358b1SMichael Ellerman vxor $in0,$in0,$tmp 367*81d358b1SMichael Ellerman vadduwm $rcon,$rcon,$rcon 368*81d358b1SMichael Ellerman vxor $in0,$in0,$key 369*81d358b1SMichael Ellerman vperm $outtail,$in0,$in0,$outperm # rotate 370*81d358b1SMichael Ellerman vsel $stage,$outhead,$outtail,$outmask 371*81d358b1SMichael Ellerman vmr $outhead,$outtail 372*81d358b1SMichael Ellerman stvx $stage,0,$out 373*81d358b1SMichael Ellerman addi $inp,$out,15 # 15 is not typo 374*81d358b1SMichael Ellerman addi $out,$out,16 375*81d358b1SMichael Ellerman bdz Ldone 376*81d358b1SMichael Ellerman 377*81d358b1SMichael Ellerman vspltw $key,$in0,3 # just splat 378*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$in1,12 # >>32 379*81d358b1SMichael Ellerman vsbox $key,$key 380*81d358b1SMichael Ellerman 381*81d358b1SMichael Ellerman vxor $in1,$in1,$tmp 382*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$tmp,12 # >>32 383*81d358b1SMichael Ellerman vxor $in1,$in1,$tmp 384*81d358b1SMichael Ellerman vsldoi $tmp,$zero,$tmp,12 # >>32 385*81d358b1SMichael Ellerman vxor $in1,$in1,$tmp 386*81d358b1SMichael Ellerman 387*81d358b1SMichael Ellerman vxor $in1,$in1,$key 388*81d358b1SMichael Ellerman b Loop256 389*81d358b1SMichael Ellerman 390*81d358b1SMichael Ellerman.align 4 391*81d358b1SMichael EllermanLdone: 392*81d358b1SMichael Ellerman lvx $in1,0,$inp # redundant in aligned case 393*81d358b1SMichael Ellerman vsel $in1,$outhead,$in1,$outmask 394*81d358b1SMichael Ellerman stvx $in1,0,$inp 395*81d358b1SMichael Ellerman li $ptr,0 396*81d358b1SMichael Ellerman mtspr 256,$vrsave 397*81d358b1SMichael Ellerman stw $rounds,0($out) 398*81d358b1SMichael Ellerman 399*81d358b1SMichael EllermanLenc_key_abort: 400*81d358b1SMichael Ellerman mr r3,$ptr 401*81d358b1SMichael Ellerman blr 402*81d358b1SMichael Ellerman .long 0 403*81d358b1SMichael Ellerman .byte 0,12,0x14,1,0,0,3,0 404*81d358b1SMichael Ellerman .long 0 405*81d358b1SMichael Ellerman.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 406*81d358b1SMichael Ellerman 407*81d358b1SMichael Ellerman.globl .${prefix}_set_decrypt_key 408*81d358b1SMichael Ellerman $STU $sp,-$FRAME($sp) 409*81d358b1SMichael Ellerman mflr r10 410*81d358b1SMichael Ellerman $PUSH r10,$FRAME+$LRSAVE($sp) 411*81d358b1SMichael Ellerman bl Lset_encrypt_key 412*81d358b1SMichael Ellerman mtlr r10 413*81d358b1SMichael Ellerman 414*81d358b1SMichael Ellerman cmpwi r3,0 415*81d358b1SMichael Ellerman bne- Ldec_key_abort 416*81d358b1SMichael Ellerman 417*81d358b1SMichael Ellerman slwi $cnt,$rounds,4 418*81d358b1SMichael Ellerman subi $inp,$out,240 # first round key 419*81d358b1SMichael Ellerman srwi $rounds,$rounds,1 420*81d358b1SMichael Ellerman add $out,$inp,$cnt # last round key 421*81d358b1SMichael Ellerman mtctr $rounds 422*81d358b1SMichael Ellerman 423*81d358b1SMichael EllermanLdeckey: 424*81d358b1SMichael Ellerman lwz r0, 0($inp) 425*81d358b1SMichael Ellerman lwz r6, 4($inp) 426*81d358b1SMichael Ellerman lwz r7, 8($inp) 427*81d358b1SMichael Ellerman lwz r8, 12($inp) 428*81d358b1SMichael Ellerman addi $inp,$inp,16 429*81d358b1SMichael Ellerman lwz r9, 0($out) 430*81d358b1SMichael Ellerman lwz r10,4($out) 431*81d358b1SMichael Ellerman lwz r11,8($out) 432*81d358b1SMichael Ellerman lwz r12,12($out) 433*81d358b1SMichael Ellerman stw r0, 0($out) 434*81d358b1SMichael Ellerman stw r6, 4($out) 435*81d358b1SMichael Ellerman stw r7, 8($out) 436*81d358b1SMichael Ellerman stw r8, 12($out) 437*81d358b1SMichael Ellerman subi $out,$out,16 438*81d358b1SMichael Ellerman stw r9, -16($inp) 439*81d358b1SMichael Ellerman stw r10,-12($inp) 440*81d358b1SMichael Ellerman stw r11,-8($inp) 441*81d358b1SMichael Ellerman stw r12,-4($inp) 442*81d358b1SMichael Ellerman bdnz Ldeckey 443*81d358b1SMichael Ellerman 444*81d358b1SMichael Ellerman xor r3,r3,r3 # return value 445*81d358b1SMichael EllermanLdec_key_abort: 446*81d358b1SMichael Ellerman addi $sp,$sp,$FRAME 447*81d358b1SMichael Ellerman blr 448*81d358b1SMichael Ellerman .long 0 449*81d358b1SMichael Ellerman .byte 0,12,4,1,0x80,0,3,0 450*81d358b1SMichael Ellerman .long 0 451*81d358b1SMichael Ellerman.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 452*81d358b1SMichael Ellerman___ 453*81d358b1SMichael Ellerman}}} 454*81d358b1SMichael Ellerman######################################################################### 455*81d358b1SMichael Ellerman{{{ # Single block en- and decrypt procedures # 456*81d358b1SMichael Ellermansub gen_block () { 457*81d358b1SMichael Ellermanmy $dir = shift; 458*81d358b1SMichael Ellermanmy $n = $dir eq "de" ? "n" : ""; 459*81d358b1SMichael Ellermanmy ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 460*81d358b1SMichael Ellerman 461*81d358b1SMichael Ellerman$code.=<<___; 462*81d358b1SMichael Ellerman.globl .${prefix}_${dir}crypt 463*81d358b1SMichael Ellerman lwz $rounds,240($key) 464*81d358b1SMichael Ellerman lis r0,0xfc00 465*81d358b1SMichael Ellerman mfspr $vrsave,256 466*81d358b1SMichael Ellerman li $idx,15 # 15 is not typo 467*81d358b1SMichael Ellerman mtspr 256,r0 468*81d358b1SMichael Ellerman 469*81d358b1SMichael Ellerman lvx v0,0,$inp 470*81d358b1SMichael Ellerman neg r11,$out 471*81d358b1SMichael Ellerman lvx v1,$idx,$inp 472*81d358b1SMichael Ellerman lvsl v2,0,$inp # inpperm 473*81d358b1SMichael Ellerman le?vspltisb v4,0x0f 474*81d358b1SMichael Ellerman ?lvsl v3,0,r11 # outperm 475*81d358b1SMichael Ellerman le?vxor v2,v2,v4 476*81d358b1SMichael Ellerman li $idx,16 477*81d358b1SMichael Ellerman vperm v0,v0,v1,v2 # align [and byte swap in LE] 478*81d358b1SMichael Ellerman lvx v1,0,$key 479*81d358b1SMichael Ellerman ?lvsl v5,0,$key # keyperm 480*81d358b1SMichael Ellerman srwi $rounds,$rounds,1 481*81d358b1SMichael Ellerman lvx v2,$idx,$key 482*81d358b1SMichael Ellerman addi $idx,$idx,16 483*81d358b1SMichael Ellerman subi $rounds,$rounds,1 484*81d358b1SMichael Ellerman ?vperm v1,v1,v2,v5 # align round key 485*81d358b1SMichael Ellerman 486*81d358b1SMichael Ellerman vxor v0,v0,v1 487*81d358b1SMichael Ellerman lvx v1,$idx,$key 488*81d358b1SMichael Ellerman addi $idx,$idx,16 489*81d358b1SMichael Ellerman mtctr $rounds 490*81d358b1SMichael Ellerman 491*81d358b1SMichael EllermanLoop_${dir}c: 492*81d358b1SMichael Ellerman ?vperm v2,v2,v1,v5 493*81d358b1SMichael Ellerman v${n}cipher v0,v0,v2 494*81d358b1SMichael Ellerman lvx v2,$idx,$key 495*81d358b1SMichael Ellerman addi $idx,$idx,16 496*81d358b1SMichael Ellerman ?vperm v1,v1,v2,v5 497*81d358b1SMichael Ellerman v${n}cipher v0,v0,v1 498*81d358b1SMichael Ellerman lvx v1,$idx,$key 499*81d358b1SMichael Ellerman addi $idx,$idx,16 500*81d358b1SMichael Ellerman bdnz Loop_${dir}c 501*81d358b1SMichael Ellerman 502*81d358b1SMichael Ellerman ?vperm v2,v2,v1,v5 503*81d358b1SMichael Ellerman v${n}cipher v0,v0,v2 504*81d358b1SMichael Ellerman lvx v2,$idx,$key 505*81d358b1SMichael Ellerman ?vperm v1,v1,v2,v5 506*81d358b1SMichael Ellerman v${n}cipherlast v0,v0,v1 507*81d358b1SMichael Ellerman 508*81d358b1SMichael Ellerman vspltisb v2,-1 509*81d358b1SMichael Ellerman vxor v1,v1,v1 510*81d358b1SMichael Ellerman li $idx,15 # 15 is not typo 511*81d358b1SMichael Ellerman ?vperm v2,v1,v2,v3 # outmask 512*81d358b1SMichael Ellerman le?vxor v3,v3,v4 513*81d358b1SMichael Ellerman lvx v1,0,$out # outhead 514*81d358b1SMichael Ellerman vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 515*81d358b1SMichael Ellerman vsel v1,v1,v0,v2 516*81d358b1SMichael Ellerman lvx v4,$idx,$out 517*81d358b1SMichael Ellerman stvx v1,0,$out 518*81d358b1SMichael Ellerman vsel v0,v0,v4,v2 519*81d358b1SMichael Ellerman stvx v0,$idx,$out 520*81d358b1SMichael Ellerman 521*81d358b1SMichael Ellerman mtspr 256,$vrsave 522*81d358b1SMichael Ellerman blr 523*81d358b1SMichael Ellerman .long 0 524*81d358b1SMichael Ellerman .byte 0,12,0x14,0,0,0,3,0 525*81d358b1SMichael Ellerman .long 0 526*81d358b1SMichael Ellerman.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 527*81d358b1SMichael Ellerman___ 528*81d358b1SMichael Ellerman} 529*81d358b1SMichael Ellerman&gen_block("en"); 530*81d358b1SMichael Ellerman&gen_block("de"); 531*81d358b1SMichael Ellerman}}} 532*81d358b1SMichael Ellerman 533*81d358b1SMichael Ellermanmy $consts=1; 534*81d358b1SMichael Ellermanforeach(split("\n",$code)) { 535*81d358b1SMichael Ellerman s/\`([^\`]*)\`/eval($1)/geo; 536*81d358b1SMichael Ellerman 537*81d358b1SMichael Ellerman # constants table endian-specific conversion 538*81d358b1SMichael Ellerman if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 539*81d358b1SMichael Ellerman my $conv=$3; 540*81d358b1SMichael Ellerman my @bytes=(); 541*81d358b1SMichael Ellerman 542*81d358b1SMichael Ellerman # convert to endian-agnostic format 543*81d358b1SMichael Ellerman if ($1 eq "long") { 544*81d358b1SMichael Ellerman foreach (split(/,\s*/,$2)) { 545*81d358b1SMichael Ellerman my $l = /^0/?oct:int; 546*81d358b1SMichael Ellerman push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 547*81d358b1SMichael Ellerman } 548*81d358b1SMichael Ellerman } else { 549*81d358b1SMichael Ellerman @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 550*81d358b1SMichael Ellerman } 551*81d358b1SMichael Ellerman 552*81d358b1SMichael Ellerman # little-endian conversion 553*81d358b1SMichael Ellerman if ($flavour =~ /le$/o) { 554*81d358b1SMichael Ellerman SWITCH: for($conv) { 555*81d358b1SMichael Ellerman /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 556*81d358b1SMichael Ellerman /\?rev/ && do { @bytes=reverse(@bytes); last; }; 557*81d358b1SMichael Ellerman } 558*81d358b1SMichael Ellerman } 559*81d358b1SMichael Ellerman 560*81d358b1SMichael Ellerman #emit 561*81d358b1SMichael Ellerman print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 562*81d358b1SMichael Ellerman next; 563*81d358b1SMichael Ellerman } 564*81d358b1SMichael Ellerman $consts=0 if (m/Lconsts:/o); # end of table 565*81d358b1SMichael Ellerman 566*81d358b1SMichael Ellerman # instructions prefixed with '?' are endian-specific and need 567*81d358b1SMichael Ellerman # to be adjusted accordingly... 568*81d358b1SMichael Ellerman if ($flavour =~ /le$/o) { # little-endian 569*81d358b1SMichael Ellerman s/le\?//o or 570*81d358b1SMichael Ellerman s/be\?/#be#/o or 571*81d358b1SMichael Ellerman s/\?lvsr/lvsl/o or 572*81d358b1SMichael Ellerman s/\?lvsl/lvsr/o or 573*81d358b1SMichael Ellerman s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 574*81d358b1SMichael Ellerman s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 575*81d358b1SMichael Ellerman s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 576*81d358b1SMichael Ellerman } else { # big-endian 577*81d358b1SMichael Ellerman s/le\?/#le#/o or 578*81d358b1SMichael Ellerman s/be\?//o or 579*81d358b1SMichael Ellerman s/\?([a-z]+)/$1/o; 580*81d358b1SMichael Ellerman } 581*81d358b1SMichael Ellerman 582*81d358b1SMichael Ellerman print $_,"\n"; 583*81d358b1SMichael Ellerman} 584*81d358b1SMichael Ellerman 585*81d358b1SMichael Ellermanclose STDOUT; 586