1*81d358b1SMichael Ellerman#! /usr/bin/env perl
2*81d358b1SMichael Ellerman# SPDX-License-Identifier: GPL-2.0
3*81d358b1SMichael Ellerman
4*81d358b1SMichael Ellerman# This code is taken from CRYPTOGAMs[1] and is included here using the option
5*81d358b1SMichael Ellerman# in the license to distribute the code under the GPL. Therefore this program
6*81d358b1SMichael Ellerman# is free software; you can redistribute it and/or modify it under the terms of
7*81d358b1SMichael Ellerman# the GNU General Public License version 2 as published by the Free Software
8*81d358b1SMichael Ellerman# Foundation.
9*81d358b1SMichael Ellerman#
10*81d358b1SMichael Ellerman# [1] https://www.openssl.org/~appro/cryptogams/
11*81d358b1SMichael Ellerman
12*81d358b1SMichael Ellerman# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
13*81d358b1SMichael Ellerman# All rights reserved.
14*81d358b1SMichael Ellerman#
15*81d358b1SMichael Ellerman# Redistribution and use in source and binary forms, with or without
16*81d358b1SMichael Ellerman# modification, are permitted provided that the following conditions
17*81d358b1SMichael Ellerman# are met:
18*81d358b1SMichael Ellerman#
19*81d358b1SMichael Ellerman#       * Redistributions of source code must retain copyright notices,
20*81d358b1SMichael Ellerman#         this list of conditions and the following disclaimer.
21*81d358b1SMichael Ellerman#
22*81d358b1SMichael Ellerman#       * Redistributions in binary form must reproduce the above
23*81d358b1SMichael Ellerman#         copyright notice, this list of conditions and the following
24*81d358b1SMichael Ellerman#         disclaimer in the documentation and/or other materials
25*81d358b1SMichael Ellerman#         provided with the distribution.
26*81d358b1SMichael Ellerman#
27*81d358b1SMichael Ellerman#       * Neither the name of the CRYPTOGAMS nor the names of its
28*81d358b1SMichael Ellerman#         copyright holder and contributors may be used to endorse or
29*81d358b1SMichael Ellerman#         promote products derived from this software without specific
30*81d358b1SMichael Ellerman#         prior written permission.
31*81d358b1SMichael Ellerman#
32*81d358b1SMichael Ellerman# ALTERNATIVELY, provided that this notice is retained in full, this
33*81d358b1SMichael Ellerman# product may be distributed under the terms of the GNU General Public
34*81d358b1SMichael Ellerman# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
35*81d358b1SMichael Ellerman# those given above.
36*81d358b1SMichael Ellerman#
37*81d358b1SMichael Ellerman# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38*81d358b1SMichael Ellerman# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39*81d358b1SMichael Ellerman# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40*81d358b1SMichael Ellerman# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41*81d358b1SMichael Ellerman# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42*81d358b1SMichael Ellerman# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43*81d358b1SMichael Ellerman# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44*81d358b1SMichael Ellerman# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45*81d358b1SMichael Ellerman# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46*81d358b1SMichael Ellerman# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47*81d358b1SMichael Ellerman# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48*81d358b1SMichael Ellerman
49*81d358b1SMichael Ellerman# ====================================================================
50*81d358b1SMichael Ellerman# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
51*81d358b1SMichael Ellerman# project. The module is, however, dual licensed under OpenSSL and
52*81d358b1SMichael Ellerman# CRYPTOGAMS licenses depending on where you obtain it. For further
53*81d358b1SMichael Ellerman# details see https://www.openssl.org/~appro/cryptogams/.
54*81d358b1SMichael Ellerman# ====================================================================
55*81d358b1SMichael Ellerman#
56*81d358b1SMichael Ellerman# This module implements support for AES instructions as per PowerISA
57*81d358b1SMichael Ellerman# specification version 2.07, first implemented by POWER8 processor.
58*81d358b1SMichael Ellerman# The module is endian-agnostic in sense that it supports both big-
59*81d358b1SMichael Ellerman# and little-endian cases. Data alignment in parallelizable modes is
60*81d358b1SMichael Ellerman# handled with VSX loads and stores, which implies MSR.VSX flag being
61*81d358b1SMichael Ellerman# set. It should also be noted that ISA specification doesn't prohibit
62*81d358b1SMichael Ellerman# alignment exceptions for these instructions on page boundaries.
63*81d358b1SMichael Ellerman# Initially alignment was handled in pure AltiVec/VMX way [when data
64*81d358b1SMichael Ellerman# is aligned programmatically, which in turn guarantees exception-
65*81d358b1SMichael Ellerman# free execution], but it turned to hamper performance when vcipher
66*81d358b1SMichael Ellerman# instructions are interleaved. It's reckoned that eventual
67*81d358b1SMichael Ellerman# misalignment penalties at page boundaries are in average lower
68*81d358b1SMichael Ellerman# than additional overhead in pure AltiVec approach.
69*81d358b1SMichael Ellerman#
70*81d358b1SMichael Ellerman# May 2016
71*81d358b1SMichael Ellerman#
72*81d358b1SMichael Ellerman# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73*81d358b1SMichael Ellerman# systems were measured.
74*81d358b1SMichael Ellerman#
75*81d358b1SMichael Ellerman######################################################################
76*81d358b1SMichael Ellerman# Current large-block performance in cycles per byte processed with
77*81d358b1SMichael Ellerman# 128-bit key (less is better).
78*81d358b1SMichael Ellerman#
79*81d358b1SMichael Ellerman#		CBC en-/decrypt	CTR	XTS
80*81d358b1SMichael Ellerman# POWER8[le]	3.96/0.72	0.74	1.1
81*81d358b1SMichael Ellerman# POWER8[be]	3.75/0.65	0.66	1.0
82*81d358b1SMichael Ellerman
83*81d358b1SMichael Ellerman$flavour = shift;
84*81d358b1SMichael Ellerman
85*81d358b1SMichael Ellermanif ($flavour =~ /64/) {
86*81d358b1SMichael Ellerman	$SIZE_T	=8;
87*81d358b1SMichael Ellerman	$LRSAVE	=2*$SIZE_T;
88*81d358b1SMichael Ellerman	$STU	="stdu";
89*81d358b1SMichael Ellerman	$POP	="ld";
90*81d358b1SMichael Ellerman	$PUSH	="std";
91*81d358b1SMichael Ellerman	$UCMP	="cmpld";
92*81d358b1SMichael Ellerman	$SHL	="sldi";
93*81d358b1SMichael Ellerman} elsif ($flavour =~ /32/) {
94*81d358b1SMichael Ellerman	$SIZE_T	=4;
95*81d358b1SMichael Ellerman	$LRSAVE	=$SIZE_T;
96*81d358b1SMichael Ellerman	$STU	="stwu";
97*81d358b1SMichael Ellerman	$POP	="lwz";
98*81d358b1SMichael Ellerman	$PUSH	="stw";
99*81d358b1SMichael Ellerman	$UCMP	="cmplw";
100*81d358b1SMichael Ellerman	$SHL	="slwi";
101*81d358b1SMichael Ellerman} else { die "nonsense $flavour"; }
102*81d358b1SMichael Ellerman
103*81d358b1SMichael Ellerman$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
104*81d358b1SMichael Ellerman
105*81d358b1SMichael Ellerman$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106*81d358b1SMichael Ellerman( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107*81d358b1SMichael Ellerman( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108*81d358b1SMichael Ellermandie "can't locate ppc-xlate.pl";
109*81d358b1SMichael Ellerman
110*81d358b1SMichael Ellermanopen STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
111*81d358b1SMichael Ellerman
112*81d358b1SMichael Ellerman$FRAME=8*$SIZE_T;
113*81d358b1SMichael Ellerman$prefix="aes_p10";
114*81d358b1SMichael Ellerman
115*81d358b1SMichael Ellerman$sp="r1";
116*81d358b1SMichael Ellerman$vrsave="r12";
117*81d358b1SMichael Ellerman
118*81d358b1SMichael Ellerman#########################################################################
119*81d358b1SMichael Ellerman{{{	# Key setup procedures						#
120*81d358b1SMichael Ellermanmy ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121*81d358b1SMichael Ellermanmy ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122*81d358b1SMichael Ellermanmy ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
123*81d358b1SMichael Ellerman
124*81d358b1SMichael Ellerman$code.=<<___;
125*81d358b1SMichael Ellerman.machine	"any"
126*81d358b1SMichael Ellerman
127*81d358b1SMichael Ellerman.text
128*81d358b1SMichael Ellerman
129*81d358b1SMichael Ellerman.align	7
130*81d358b1SMichael Ellermanrcon:
131*81d358b1SMichael Ellerman.long	0x01000000, 0x01000000, 0x01000000, 0x01000000	?rev
132*81d358b1SMichael Ellerman.long	0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000	?rev
133*81d358b1SMichael Ellerman.long	0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c	?rev
134*81d358b1SMichael Ellerman.long	0,0,0,0						?asis
135*81d358b1SMichael EllermanLconsts:
136*81d358b1SMichael Ellerman	mflr	r0
137*81d358b1SMichael Ellerman	bcl	20,31,\$+4
138*81d358b1SMichael Ellerman	mflr	$ptr	 #vvvvv "distance between . and rcon
139*81d358b1SMichael Ellerman	addi	$ptr,$ptr,-0x48
140*81d358b1SMichael Ellerman	mtlr	r0
141*81d358b1SMichael Ellerman	blr
142*81d358b1SMichael Ellerman	.long	0
143*81d358b1SMichael Ellerman	.byte	0,12,0x14,0,0,0,0,0
144*81d358b1SMichael Ellerman.asciz	"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
145*81d358b1SMichael Ellerman
146*81d358b1SMichael Ellerman.globl	.${prefix}_set_encrypt_key
147*81d358b1SMichael EllermanLset_encrypt_key:
148*81d358b1SMichael Ellerman	mflr		r11
149*81d358b1SMichael Ellerman	$PUSH		r11,$LRSAVE($sp)
150*81d358b1SMichael Ellerman
151*81d358b1SMichael Ellerman	li		$ptr,-1
152*81d358b1SMichael Ellerman	${UCMP}i	$inp,0
153*81d358b1SMichael Ellerman	beq-		Lenc_key_abort		# if ($inp==0) return -1;
154*81d358b1SMichael Ellerman	${UCMP}i	$out,0
155*81d358b1SMichael Ellerman	beq-		Lenc_key_abort		# if ($out==0) return -1;
156*81d358b1SMichael Ellerman	li		$ptr,-2
157*81d358b1SMichael Ellerman	cmpwi		$bits,128
158*81d358b1SMichael Ellerman	blt-		Lenc_key_abort
159*81d358b1SMichael Ellerman	cmpwi		$bits,256
160*81d358b1SMichael Ellerman	bgt-		Lenc_key_abort
161*81d358b1SMichael Ellerman	andi.		r0,$bits,0x3f
162*81d358b1SMichael Ellerman	bne-		Lenc_key_abort
163*81d358b1SMichael Ellerman
164*81d358b1SMichael Ellerman	lis		r0,0xfff0
165*81d358b1SMichael Ellerman	mfspr		$vrsave,256
166*81d358b1SMichael Ellerman	mtspr		256,r0
167*81d358b1SMichael Ellerman
168*81d358b1SMichael Ellerman	bl		Lconsts
169*81d358b1SMichael Ellerman	mtlr		r11
170*81d358b1SMichael Ellerman
171*81d358b1SMichael Ellerman	neg		r9,$inp
172*81d358b1SMichael Ellerman	lvx		$in0,0,$inp
173*81d358b1SMichael Ellerman	addi		$inp,$inp,15		# 15 is not typo
174*81d358b1SMichael Ellerman	lvsr		$key,0,r9		# borrow $key
175*81d358b1SMichael Ellerman	li		r8,0x20
176*81d358b1SMichael Ellerman	cmpwi		$bits,192
177*81d358b1SMichael Ellerman	lvx		$in1,0,$inp
178*81d358b1SMichael Ellerman	le?vspltisb	$mask,0x0f		# borrow $mask
179*81d358b1SMichael Ellerman	lvx		$rcon,0,$ptr
180*81d358b1SMichael Ellerman	le?vxor		$key,$key,$mask		# adjust for byte swap
181*81d358b1SMichael Ellerman	lvx		$mask,r8,$ptr
182*81d358b1SMichael Ellerman	addi		$ptr,$ptr,0x10
183*81d358b1SMichael Ellerman	vperm		$in0,$in0,$in1,$key	# align [and byte swap in LE]
184*81d358b1SMichael Ellerman	li		$cnt,8
185*81d358b1SMichael Ellerman	vxor		$zero,$zero,$zero
186*81d358b1SMichael Ellerman	mtctr		$cnt
187*81d358b1SMichael Ellerman
188*81d358b1SMichael Ellerman	?lvsr		$outperm,0,$out
189*81d358b1SMichael Ellerman	vspltisb	$outmask,-1
190*81d358b1SMichael Ellerman	lvx		$outhead,0,$out
191*81d358b1SMichael Ellerman	?vperm		$outmask,$zero,$outmask,$outperm
192*81d358b1SMichael Ellerman
193*81d358b1SMichael Ellerman	blt		Loop128
194*81d358b1SMichael Ellerman	addi		$inp,$inp,8
195*81d358b1SMichael Ellerman	beq		L192
196*81d358b1SMichael Ellerman	addi		$inp,$inp,8
197*81d358b1SMichael Ellerman	b		L256
198*81d358b1SMichael Ellerman
199*81d358b1SMichael Ellerman.align	4
200*81d358b1SMichael EllermanLoop128:
201*81d358b1SMichael Ellerman	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
202*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$in0,12	# >>32
203*81d358b1SMichael Ellerman	 vperm		$outtail,$in0,$in0,$outperm	# rotate
204*81d358b1SMichael Ellerman	 vsel		$stage,$outhead,$outtail,$outmask
205*81d358b1SMichael Ellerman	 vmr		$outhead,$outtail
206*81d358b1SMichael Ellerman	vcipherlast	$key,$key,$rcon
207*81d358b1SMichael Ellerman	 stvx		$stage,0,$out
208*81d358b1SMichael Ellerman	 addi		$out,$out,16
209*81d358b1SMichael Ellerman
210*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
211*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$tmp,12	# >>32
212*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
213*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$tmp,12	# >>32
214*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
215*81d358b1SMichael Ellerman	 vadduwm	$rcon,$rcon,$rcon
216*81d358b1SMichael Ellerman	vxor		$in0,$in0,$key
217*81d358b1SMichael Ellerman	bdnz		Loop128
218*81d358b1SMichael Ellerman
219*81d358b1SMichael Ellerman	lvx		$rcon,0,$ptr		# last two round keys
220*81d358b1SMichael Ellerman
221*81d358b1SMichael Ellerman	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
222*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$in0,12	# >>32
223*81d358b1SMichael Ellerman	 vperm		$outtail,$in0,$in0,$outperm	# rotate
224*81d358b1SMichael Ellerman	 vsel		$stage,$outhead,$outtail,$outmask
225*81d358b1SMichael Ellerman	 vmr		$outhead,$outtail
226*81d358b1SMichael Ellerman	vcipherlast	$key,$key,$rcon
227*81d358b1SMichael Ellerman	 stvx		$stage,0,$out
228*81d358b1SMichael Ellerman	 addi		$out,$out,16
229*81d358b1SMichael Ellerman
230*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
231*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$tmp,12	# >>32
232*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
233*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$tmp,12	# >>32
234*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
235*81d358b1SMichael Ellerman	 vadduwm	$rcon,$rcon,$rcon
236*81d358b1SMichael Ellerman	vxor		$in0,$in0,$key
237*81d358b1SMichael Ellerman
238*81d358b1SMichael Ellerman	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
239*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$in0,12	# >>32
240*81d358b1SMichael Ellerman	 vperm		$outtail,$in0,$in0,$outperm	# rotate
241*81d358b1SMichael Ellerman	 vsel		$stage,$outhead,$outtail,$outmask
242*81d358b1SMichael Ellerman	 vmr		$outhead,$outtail
243*81d358b1SMichael Ellerman	vcipherlast	$key,$key,$rcon
244*81d358b1SMichael Ellerman	 stvx		$stage,0,$out
245*81d358b1SMichael Ellerman	 addi		$out,$out,16
246*81d358b1SMichael Ellerman
247*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
248*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$tmp,12	# >>32
249*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
250*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$tmp,12	# >>32
251*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
252*81d358b1SMichael Ellerman	vxor		$in0,$in0,$key
253*81d358b1SMichael Ellerman	 vperm		$outtail,$in0,$in0,$outperm	# rotate
254*81d358b1SMichael Ellerman	 vsel		$stage,$outhead,$outtail,$outmask
255*81d358b1SMichael Ellerman	 vmr		$outhead,$outtail
256*81d358b1SMichael Ellerman	 stvx		$stage,0,$out
257*81d358b1SMichael Ellerman
258*81d358b1SMichael Ellerman	addi		$inp,$out,15		# 15 is not typo
259*81d358b1SMichael Ellerman	addi		$out,$out,0x50
260*81d358b1SMichael Ellerman
261*81d358b1SMichael Ellerman	li		$rounds,10
262*81d358b1SMichael Ellerman	b		Ldone
263*81d358b1SMichael Ellerman
264*81d358b1SMichael Ellerman.align	4
265*81d358b1SMichael EllermanL192:
266*81d358b1SMichael Ellerman	lvx		$tmp,0,$inp
267*81d358b1SMichael Ellerman	li		$cnt,4
268*81d358b1SMichael Ellerman	 vperm		$outtail,$in0,$in0,$outperm	# rotate
269*81d358b1SMichael Ellerman	 vsel		$stage,$outhead,$outtail,$outmask
270*81d358b1SMichael Ellerman	 vmr		$outhead,$outtail
271*81d358b1SMichael Ellerman	 stvx		$stage,0,$out
272*81d358b1SMichael Ellerman	 addi		$out,$out,16
273*81d358b1SMichael Ellerman	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
274*81d358b1SMichael Ellerman	vspltisb	$key,8			# borrow $key
275*81d358b1SMichael Ellerman	mtctr		$cnt
276*81d358b1SMichael Ellerman	vsububm		$mask,$mask,$key	# adjust the mask
277*81d358b1SMichael Ellerman
278*81d358b1SMichael EllermanLoop192:
279*81d358b1SMichael Ellerman	vperm		$key,$in1,$in1,$mask	# roate-n-splat
280*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$in0,12	# >>32
281*81d358b1SMichael Ellerman	vcipherlast	$key,$key,$rcon
282*81d358b1SMichael Ellerman
283*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
284*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$tmp,12	# >>32
285*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
286*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$tmp,12	# >>32
287*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
288*81d358b1SMichael Ellerman
289*81d358b1SMichael Ellerman	 vsldoi		$stage,$zero,$in1,8
290*81d358b1SMichael Ellerman	vspltw		$tmp,$in0,3
291*81d358b1SMichael Ellerman	vxor		$tmp,$tmp,$in1
292*81d358b1SMichael Ellerman	vsldoi		$in1,$zero,$in1,12	# >>32
293*81d358b1SMichael Ellerman	 vadduwm	$rcon,$rcon,$rcon
294*81d358b1SMichael Ellerman	vxor		$in1,$in1,$tmp
295*81d358b1SMichael Ellerman	vxor		$in0,$in0,$key
296*81d358b1SMichael Ellerman	vxor		$in1,$in1,$key
297*81d358b1SMichael Ellerman	 vsldoi		$stage,$stage,$in0,8
298*81d358b1SMichael Ellerman
299*81d358b1SMichael Ellerman	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
300*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$in0,12	# >>32
301*81d358b1SMichael Ellerman	 vperm		$outtail,$stage,$stage,$outperm	# rotate
302*81d358b1SMichael Ellerman	 vsel		$stage,$outhead,$outtail,$outmask
303*81d358b1SMichael Ellerman	 vmr		$outhead,$outtail
304*81d358b1SMichael Ellerman	vcipherlast	$key,$key,$rcon
305*81d358b1SMichael Ellerman	 stvx		$stage,0,$out
306*81d358b1SMichael Ellerman	 addi		$out,$out,16
307*81d358b1SMichael Ellerman
308*81d358b1SMichael Ellerman	 vsldoi		$stage,$in0,$in1,8
309*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
310*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$tmp,12	# >>32
311*81d358b1SMichael Ellerman	 vperm		$outtail,$stage,$stage,$outperm	# rotate
312*81d358b1SMichael Ellerman	 vsel		$stage,$outhead,$outtail,$outmask
313*81d358b1SMichael Ellerman	 vmr		$outhead,$outtail
314*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
315*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$tmp,12	# >>32
316*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
317*81d358b1SMichael Ellerman	 stvx		$stage,0,$out
318*81d358b1SMichael Ellerman	 addi		$out,$out,16
319*81d358b1SMichael Ellerman
320*81d358b1SMichael Ellerman	vspltw		$tmp,$in0,3
321*81d358b1SMichael Ellerman	vxor		$tmp,$tmp,$in1
322*81d358b1SMichael Ellerman	vsldoi		$in1,$zero,$in1,12	# >>32
323*81d358b1SMichael Ellerman	 vadduwm	$rcon,$rcon,$rcon
324*81d358b1SMichael Ellerman	vxor		$in1,$in1,$tmp
325*81d358b1SMichael Ellerman	vxor		$in0,$in0,$key
326*81d358b1SMichael Ellerman	vxor		$in1,$in1,$key
327*81d358b1SMichael Ellerman	 vperm		$outtail,$in0,$in0,$outperm	# rotate
328*81d358b1SMichael Ellerman	 vsel		$stage,$outhead,$outtail,$outmask
329*81d358b1SMichael Ellerman	 vmr		$outhead,$outtail
330*81d358b1SMichael Ellerman	 stvx		$stage,0,$out
331*81d358b1SMichael Ellerman	 addi		$inp,$out,15		# 15 is not typo
332*81d358b1SMichael Ellerman	 addi		$out,$out,16
333*81d358b1SMichael Ellerman	bdnz		Loop192
334*81d358b1SMichael Ellerman
335*81d358b1SMichael Ellerman	li		$rounds,12
336*81d358b1SMichael Ellerman	addi		$out,$out,0x20
337*81d358b1SMichael Ellerman	b		Ldone
338*81d358b1SMichael Ellerman
339*81d358b1SMichael Ellerman.align	4
340*81d358b1SMichael EllermanL256:
341*81d358b1SMichael Ellerman	lvx		$tmp,0,$inp
342*81d358b1SMichael Ellerman	li		$cnt,7
343*81d358b1SMichael Ellerman	li		$rounds,14
344*81d358b1SMichael Ellerman	 vperm		$outtail,$in0,$in0,$outperm	# rotate
345*81d358b1SMichael Ellerman	 vsel		$stage,$outhead,$outtail,$outmask
346*81d358b1SMichael Ellerman	 vmr		$outhead,$outtail
347*81d358b1SMichael Ellerman	 stvx		$stage,0,$out
348*81d358b1SMichael Ellerman	 addi		$out,$out,16
349*81d358b1SMichael Ellerman	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
350*81d358b1SMichael Ellerman	mtctr		$cnt
351*81d358b1SMichael Ellerman
352*81d358b1SMichael EllermanLoop256:
353*81d358b1SMichael Ellerman	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
354*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$in0,12	# >>32
355*81d358b1SMichael Ellerman	 vperm		$outtail,$in1,$in1,$outperm	# rotate
356*81d358b1SMichael Ellerman	 vsel		$stage,$outhead,$outtail,$outmask
357*81d358b1SMichael Ellerman	 vmr		$outhead,$outtail
358*81d358b1SMichael Ellerman	vcipherlast	$key,$key,$rcon
359*81d358b1SMichael Ellerman	 stvx		$stage,0,$out
360*81d358b1SMichael Ellerman	 addi		$out,$out,16
361*81d358b1SMichael Ellerman
362*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
363*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$tmp,12	# >>32
364*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
365*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$tmp,12	# >>32
366*81d358b1SMichael Ellerman	vxor		$in0,$in0,$tmp
367*81d358b1SMichael Ellerman	 vadduwm	$rcon,$rcon,$rcon
368*81d358b1SMichael Ellerman	vxor		$in0,$in0,$key
369*81d358b1SMichael Ellerman	 vperm		$outtail,$in0,$in0,$outperm	# rotate
370*81d358b1SMichael Ellerman	 vsel		$stage,$outhead,$outtail,$outmask
371*81d358b1SMichael Ellerman	 vmr		$outhead,$outtail
372*81d358b1SMichael Ellerman	 stvx		$stage,0,$out
373*81d358b1SMichael Ellerman	 addi		$inp,$out,15		# 15 is not typo
374*81d358b1SMichael Ellerman	 addi		$out,$out,16
375*81d358b1SMichael Ellerman	bdz		Ldone
376*81d358b1SMichael Ellerman
377*81d358b1SMichael Ellerman	vspltw		$key,$in0,3		# just splat
378*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$in1,12	# >>32
379*81d358b1SMichael Ellerman	vsbox		$key,$key
380*81d358b1SMichael Ellerman
381*81d358b1SMichael Ellerman	vxor		$in1,$in1,$tmp
382*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$tmp,12	# >>32
383*81d358b1SMichael Ellerman	vxor		$in1,$in1,$tmp
384*81d358b1SMichael Ellerman	vsldoi		$tmp,$zero,$tmp,12	# >>32
385*81d358b1SMichael Ellerman	vxor		$in1,$in1,$tmp
386*81d358b1SMichael Ellerman
387*81d358b1SMichael Ellerman	vxor		$in1,$in1,$key
388*81d358b1SMichael Ellerman	b		Loop256
389*81d358b1SMichael Ellerman
390*81d358b1SMichael Ellerman.align	4
391*81d358b1SMichael EllermanLdone:
392*81d358b1SMichael Ellerman	lvx		$in1,0,$inp		# redundant in aligned case
393*81d358b1SMichael Ellerman	vsel		$in1,$outhead,$in1,$outmask
394*81d358b1SMichael Ellerman	stvx		$in1,0,$inp
395*81d358b1SMichael Ellerman	li		$ptr,0
396*81d358b1SMichael Ellerman	mtspr		256,$vrsave
397*81d358b1SMichael Ellerman	stw		$rounds,0($out)
398*81d358b1SMichael Ellerman
399*81d358b1SMichael EllermanLenc_key_abort:
400*81d358b1SMichael Ellerman	mr		r3,$ptr
401*81d358b1SMichael Ellerman	blr
402*81d358b1SMichael Ellerman	.long		0
403*81d358b1SMichael Ellerman	.byte		0,12,0x14,1,0,0,3,0
404*81d358b1SMichael Ellerman	.long		0
405*81d358b1SMichael Ellerman.size	.${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
406*81d358b1SMichael Ellerman
407*81d358b1SMichael Ellerman.globl	.${prefix}_set_decrypt_key
408*81d358b1SMichael Ellerman	$STU		$sp,-$FRAME($sp)
409*81d358b1SMichael Ellerman	mflr		r10
410*81d358b1SMichael Ellerman	$PUSH		r10,$FRAME+$LRSAVE($sp)
411*81d358b1SMichael Ellerman	bl		Lset_encrypt_key
412*81d358b1SMichael Ellerman	mtlr		r10
413*81d358b1SMichael Ellerman
414*81d358b1SMichael Ellerman	cmpwi		r3,0
415*81d358b1SMichael Ellerman	bne-		Ldec_key_abort
416*81d358b1SMichael Ellerman
417*81d358b1SMichael Ellerman	slwi		$cnt,$rounds,4
418*81d358b1SMichael Ellerman	subi		$inp,$out,240		# first round key
419*81d358b1SMichael Ellerman	srwi		$rounds,$rounds,1
420*81d358b1SMichael Ellerman	add		$out,$inp,$cnt		# last round key
421*81d358b1SMichael Ellerman	mtctr		$rounds
422*81d358b1SMichael Ellerman
423*81d358b1SMichael EllermanLdeckey:
424*81d358b1SMichael Ellerman	lwz		r0, 0($inp)
425*81d358b1SMichael Ellerman	lwz		r6, 4($inp)
426*81d358b1SMichael Ellerman	lwz		r7, 8($inp)
427*81d358b1SMichael Ellerman	lwz		r8, 12($inp)
428*81d358b1SMichael Ellerman	addi		$inp,$inp,16
429*81d358b1SMichael Ellerman	lwz		r9, 0($out)
430*81d358b1SMichael Ellerman	lwz		r10,4($out)
431*81d358b1SMichael Ellerman	lwz		r11,8($out)
432*81d358b1SMichael Ellerman	lwz		r12,12($out)
433*81d358b1SMichael Ellerman	stw		r0, 0($out)
434*81d358b1SMichael Ellerman	stw		r6, 4($out)
435*81d358b1SMichael Ellerman	stw		r7, 8($out)
436*81d358b1SMichael Ellerman	stw		r8, 12($out)
437*81d358b1SMichael Ellerman	subi		$out,$out,16
438*81d358b1SMichael Ellerman	stw		r9, -16($inp)
439*81d358b1SMichael Ellerman	stw		r10,-12($inp)
440*81d358b1SMichael Ellerman	stw		r11,-8($inp)
441*81d358b1SMichael Ellerman	stw		r12,-4($inp)
442*81d358b1SMichael Ellerman	bdnz		Ldeckey
443*81d358b1SMichael Ellerman
444*81d358b1SMichael Ellerman	xor		r3,r3,r3		# return value
445*81d358b1SMichael EllermanLdec_key_abort:
446*81d358b1SMichael Ellerman	addi		$sp,$sp,$FRAME
447*81d358b1SMichael Ellerman	blr
448*81d358b1SMichael Ellerman	.long		0
449*81d358b1SMichael Ellerman	.byte		0,12,4,1,0x80,0,3,0
450*81d358b1SMichael Ellerman	.long		0
451*81d358b1SMichael Ellerman.size	.${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
452*81d358b1SMichael Ellerman___
453*81d358b1SMichael Ellerman}}}
454*81d358b1SMichael Ellerman#########################################################################
455*81d358b1SMichael Ellerman{{{	# Single block en- and decrypt procedures			#
456*81d358b1SMichael Ellermansub gen_block () {
457*81d358b1SMichael Ellermanmy $dir = shift;
458*81d358b1SMichael Ellermanmy $n   = $dir eq "de" ? "n" : "";
459*81d358b1SMichael Ellermanmy ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
460*81d358b1SMichael Ellerman
461*81d358b1SMichael Ellerman$code.=<<___;
462*81d358b1SMichael Ellerman.globl	.${prefix}_${dir}crypt
463*81d358b1SMichael Ellerman	lwz		$rounds,240($key)
464*81d358b1SMichael Ellerman	lis		r0,0xfc00
465*81d358b1SMichael Ellerman	mfspr		$vrsave,256
466*81d358b1SMichael Ellerman	li		$idx,15			# 15 is not typo
467*81d358b1SMichael Ellerman	mtspr		256,r0
468*81d358b1SMichael Ellerman
469*81d358b1SMichael Ellerman	lvx		v0,0,$inp
470*81d358b1SMichael Ellerman	neg		r11,$out
471*81d358b1SMichael Ellerman	lvx		v1,$idx,$inp
472*81d358b1SMichael Ellerman	lvsl		v2,0,$inp		# inpperm
473*81d358b1SMichael Ellerman	le?vspltisb	v4,0x0f
474*81d358b1SMichael Ellerman	?lvsl		v3,0,r11		# outperm
475*81d358b1SMichael Ellerman	le?vxor		v2,v2,v4
476*81d358b1SMichael Ellerman	li		$idx,16
477*81d358b1SMichael Ellerman	vperm		v0,v0,v1,v2		# align [and byte swap in LE]
478*81d358b1SMichael Ellerman	lvx		v1,0,$key
479*81d358b1SMichael Ellerman	?lvsl		v5,0,$key		# keyperm
480*81d358b1SMichael Ellerman	srwi		$rounds,$rounds,1
481*81d358b1SMichael Ellerman	lvx		v2,$idx,$key
482*81d358b1SMichael Ellerman	addi		$idx,$idx,16
483*81d358b1SMichael Ellerman	subi		$rounds,$rounds,1
484*81d358b1SMichael Ellerman	?vperm		v1,v1,v2,v5		# align round key
485*81d358b1SMichael Ellerman
486*81d358b1SMichael Ellerman	vxor		v0,v0,v1
487*81d358b1SMichael Ellerman	lvx		v1,$idx,$key
488*81d358b1SMichael Ellerman	addi		$idx,$idx,16
489*81d358b1SMichael Ellerman	mtctr		$rounds
490*81d358b1SMichael Ellerman
491*81d358b1SMichael EllermanLoop_${dir}c:
492*81d358b1SMichael Ellerman	?vperm		v2,v2,v1,v5
493*81d358b1SMichael Ellerman	v${n}cipher	v0,v0,v2
494*81d358b1SMichael Ellerman	lvx		v2,$idx,$key
495*81d358b1SMichael Ellerman	addi		$idx,$idx,16
496*81d358b1SMichael Ellerman	?vperm		v1,v1,v2,v5
497*81d358b1SMichael Ellerman	v${n}cipher	v0,v0,v1
498*81d358b1SMichael Ellerman	lvx		v1,$idx,$key
499*81d358b1SMichael Ellerman	addi		$idx,$idx,16
500*81d358b1SMichael Ellerman	bdnz		Loop_${dir}c
501*81d358b1SMichael Ellerman
502*81d358b1SMichael Ellerman	?vperm		v2,v2,v1,v5
503*81d358b1SMichael Ellerman	v${n}cipher	v0,v0,v2
504*81d358b1SMichael Ellerman	lvx		v2,$idx,$key
505*81d358b1SMichael Ellerman	?vperm		v1,v1,v2,v5
506*81d358b1SMichael Ellerman	v${n}cipherlast	v0,v0,v1
507*81d358b1SMichael Ellerman
508*81d358b1SMichael Ellerman	vspltisb	v2,-1
509*81d358b1SMichael Ellerman	vxor		v1,v1,v1
510*81d358b1SMichael Ellerman	li		$idx,15			# 15 is not typo
511*81d358b1SMichael Ellerman	?vperm		v2,v1,v2,v3		# outmask
512*81d358b1SMichael Ellerman	le?vxor		v3,v3,v4
513*81d358b1SMichael Ellerman	lvx		v1,0,$out		# outhead
514*81d358b1SMichael Ellerman	vperm		v0,v0,v0,v3		# rotate [and byte swap in LE]
515*81d358b1SMichael Ellerman	vsel		v1,v1,v0,v2
516*81d358b1SMichael Ellerman	lvx		v4,$idx,$out
517*81d358b1SMichael Ellerman	stvx		v1,0,$out
518*81d358b1SMichael Ellerman	vsel		v0,v0,v4,v2
519*81d358b1SMichael Ellerman	stvx		v0,$idx,$out
520*81d358b1SMichael Ellerman
521*81d358b1SMichael Ellerman	mtspr		256,$vrsave
522*81d358b1SMichael Ellerman	blr
523*81d358b1SMichael Ellerman	.long		0
524*81d358b1SMichael Ellerman	.byte		0,12,0x14,0,0,0,3,0
525*81d358b1SMichael Ellerman	.long		0
526*81d358b1SMichael Ellerman.size	.${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
527*81d358b1SMichael Ellerman___
528*81d358b1SMichael Ellerman}
529*81d358b1SMichael Ellerman&gen_block("en");
530*81d358b1SMichael Ellerman&gen_block("de");
531*81d358b1SMichael Ellerman}}}
532*81d358b1SMichael Ellerman
533*81d358b1SMichael Ellermanmy $consts=1;
534*81d358b1SMichael Ellermanforeach(split("\n",$code)) {
535*81d358b1SMichael Ellerman        s/\`([^\`]*)\`/eval($1)/geo;
536*81d358b1SMichael Ellerman
537*81d358b1SMichael Ellerman	# constants table endian-specific conversion
538*81d358b1SMichael Ellerman	if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
539*81d358b1SMichael Ellerman	    my $conv=$3;
540*81d358b1SMichael Ellerman	    my @bytes=();
541*81d358b1SMichael Ellerman
542*81d358b1SMichael Ellerman	    # convert to endian-agnostic format
543*81d358b1SMichael Ellerman	    if ($1 eq "long") {
544*81d358b1SMichael Ellerman	      foreach (split(/,\s*/,$2)) {
545*81d358b1SMichael Ellerman		my $l = /^0/?oct:int;
546*81d358b1SMichael Ellerman		push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
547*81d358b1SMichael Ellerman	      }
548*81d358b1SMichael Ellerman	    } else {
549*81d358b1SMichael Ellerman		@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
550*81d358b1SMichael Ellerman	    }
551*81d358b1SMichael Ellerman
552*81d358b1SMichael Ellerman	    # little-endian conversion
553*81d358b1SMichael Ellerman	    if ($flavour =~ /le$/o) {
554*81d358b1SMichael Ellerman		SWITCH: for($conv)  {
555*81d358b1SMichael Ellerman		    /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
556*81d358b1SMichael Ellerman		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
557*81d358b1SMichael Ellerman		}
558*81d358b1SMichael Ellerman	    }
559*81d358b1SMichael Ellerman
560*81d358b1SMichael Ellerman	    #emit
561*81d358b1SMichael Ellerman	    print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
562*81d358b1SMichael Ellerman	    next;
563*81d358b1SMichael Ellerman	}
564*81d358b1SMichael Ellerman	$consts=0 if (m/Lconsts:/o);	# end of table
565*81d358b1SMichael Ellerman
566*81d358b1SMichael Ellerman	# instructions prefixed with '?' are endian-specific and need
567*81d358b1SMichael Ellerman	# to be adjusted accordingly...
568*81d358b1SMichael Ellerman	if ($flavour =~ /le$/o) {	# little-endian
569*81d358b1SMichael Ellerman	    s/le\?//o		or
570*81d358b1SMichael Ellerman	    s/be\?/#be#/o	or
571*81d358b1SMichael Ellerman	    s/\?lvsr/lvsl/o	or
572*81d358b1SMichael Ellerman	    s/\?lvsl/lvsr/o	or
573*81d358b1SMichael Ellerman	    s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
574*81d358b1SMichael Ellerman	    s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
575*81d358b1SMichael Ellerman	    s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
576*81d358b1SMichael Ellerman	} else {			# big-endian
577*81d358b1SMichael Ellerman	    s/le\?/#le#/o	or
578*81d358b1SMichael Ellerman	    s/be\?//o		or
579*81d358b1SMichael Ellerman	    s/\?([a-z]+)/$1/o;
580*81d358b1SMichael Ellerman	}
581*81d358b1SMichael Ellerman
582*81d358b1SMichael Ellerman        print $_,"\n";
583*81d358b1SMichael Ellerman}
584*81d358b1SMichael Ellerman
585*81d358b1SMichael Ellermanclose STDOUT;
586