1#! /usr/bin/env perl 2# SPDX-License-Identifier: GPL-2.0 3 4# This code is taken from CRYPTOGAMs[1] and is included here using the option 5# in the license to distribute the code under the GPL. Therefore this program 6# is free software; you can redistribute it and/or modify it under the terms of 7# the GNU General Public License version 2 as published by the Free Software 8# Foundation. 9# 10# [1] https://www.openssl.org/~appro/cryptogams/ 11 12# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org> 13# All rights reserved. 14# 15# Redistribution and use in source and binary forms, with or without 16# modification, are permitted provided that the following conditions 17# are met: 18# 19# * Redistributions of source code must retain copyright notices, 20# this list of conditions and the following disclaimer. 21# 22# * Redistributions in binary form must reproduce the above 23# copyright notice, this list of conditions and the following 24# disclaimer in the documentation and/or other materials 25# provided with the distribution. 26# 27# * Neither the name of the CRYPTOGAMS nor the names of its 28# copyright holder and contributors may be used to endorse or 29# promote products derived from this software without specific 30# prior written permission. 31# 32# ALTERNATIVELY, provided that this notice is retained in full, this 33# product may be distributed under the terms of the GNU General Public 34# License (GPL), in which case the provisions of the GPL apply INSTEAD OF 35# those given above. 36# 37# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 38# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 45# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 46# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 47# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 49# ==================================================================== 50# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 51# project. The module is, however, dual licensed under OpenSSL and 52# CRYPTOGAMS licenses depending on where you obtain it. For further 53# details see http://www.openssl.org/~appro/cryptogams/. 54# ==================================================================== 55# 56# This module implements support for AES instructions as per PowerISA 57# specification version 2.07, first implemented by POWER8 processor. 58# The module is endian-agnostic in sense that it supports both big- 59# and little-endian cases. Data alignment in parallelizable modes is 60# handled with VSX loads and stores, which implies MSR.VSX flag being 61# set. It should also be noted that ISA specification doesn't prohibit 62# alignment exceptions for these instructions on page boundaries. 63# Initially alignment was handled in pure AltiVec/VMX way [when data 64# is aligned programmatically, which in turn guarantees exception- 65# free execution], but it turned to hamper performance when vcipher 66# instructions are interleaved. It's reckoned that eventual 67# misalignment penalties at page boundaries are in average lower 68# than additional overhead in pure AltiVec approach. 69# 70# May 2016 71# 72# Add XTS subroutine, 9x on little- and 12x improvement on big-endian 73# systems were measured. 74# 75###################################################################### 76# Current large-block performance in cycles per byte processed with 77# 128-bit key (less is better). 78# 79# CBC en-/decrypt CTR XTS 80# POWER8[le] 3.96/0.72 0.74 1.1 81# POWER8[be] 3.75/0.65 0.66 1.0 82 83$flavour = shift; 84 85if ($flavour =~ /64/) { 86 $SIZE_T =8; 87 $LRSAVE =2*$SIZE_T; 88 $STU ="stdu"; 89 $POP ="ld"; 90 $PUSH ="std"; 91 $UCMP ="cmpld"; 92 $SHL ="sldi"; 93} elsif ($flavour =~ /32/) { 94 $SIZE_T =4; 95 $LRSAVE =$SIZE_T; 96 $STU ="stwu"; 97 $POP ="lwz"; 98 $PUSH ="stw"; 99 $UCMP ="cmplw"; 100 $SHL ="slwi"; 101} else { die "nonsense $flavour"; } 102 103$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 104 105$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 106( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 107( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 108die "can't locate ppc-xlate.pl"; 109 110open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 111 112$FRAME=8*$SIZE_T; 113$prefix="aes_p8"; 114 115$sp="r1"; 116$vrsave="r12"; 117 118######################################################################### 119{{{ # Key setup procedures # 120my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 121my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 122my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 123 124$code.=<<___; 125.machine "any" 126 127.text 128 129.align 7 130rcon: 131.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 132.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 133.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 134.long 0,0,0,0 ?asis 135Lconsts: 136 mflr r0 137 bcl 20,31,\$+4 138 mflr $ptr #vvvvv "distance between . and rcon 139 addi $ptr,$ptr,-0x48 140 mtlr r0 141 blr 142 .long 0 143 .byte 0,12,0x14,0,0,0,0,0 144.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 145 146.globl .${prefix}_set_encrypt_key 147Lset_encrypt_key: 148 mflr r11 149 $PUSH r11,$LRSAVE($sp) 150 151 li $ptr,-1 152 ${UCMP}i $inp,0 153 beq- Lenc_key_abort # if ($inp==0) return -1; 154 ${UCMP}i $out,0 155 beq- Lenc_key_abort # if ($out==0) return -1; 156 li $ptr,-2 157 cmpwi $bits,128 158 blt- Lenc_key_abort 159 cmpwi $bits,256 160 bgt- Lenc_key_abort 161 andi. r0,$bits,0x3f 162 bne- Lenc_key_abort 163 164 lis r0,0xfff0 165 mfspr $vrsave,256 166 mtspr 256,r0 167 168 bl Lconsts 169 mtlr r11 170 171 neg r9,$inp 172 lvx $in0,0,$inp 173 addi $inp,$inp,15 # 15 is not typo 174 lvsr $key,0,r9 # borrow $key 175 li r8,0x20 176 cmpwi $bits,192 177 lvx $in1,0,$inp 178 le?vspltisb $mask,0x0f # borrow $mask 179 lvx $rcon,0,$ptr 180 le?vxor $key,$key,$mask # adjust for byte swap 181 lvx $mask,r8,$ptr 182 addi $ptr,$ptr,0x10 183 vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 184 li $cnt,8 185 vxor $zero,$zero,$zero 186 mtctr $cnt 187 188 ?lvsr $outperm,0,$out 189 vspltisb $outmask,-1 190 lvx $outhead,0,$out 191 ?vperm $outmask,$zero,$outmask,$outperm 192 193 blt Loop128 194 addi $inp,$inp,8 195 beq L192 196 addi $inp,$inp,8 197 b L256 198 199.align 4 200Loop128: 201 vperm $key,$in0,$in0,$mask # rotate-n-splat 202 vsldoi $tmp,$zero,$in0,12 # >>32 203 vperm $outtail,$in0,$in0,$outperm # rotate 204 vsel $stage,$outhead,$outtail,$outmask 205 vmr $outhead,$outtail 206 vcipherlast $key,$key,$rcon 207 stvx $stage,0,$out 208 addi $out,$out,16 209 210 vxor $in0,$in0,$tmp 211 vsldoi $tmp,$zero,$tmp,12 # >>32 212 vxor $in0,$in0,$tmp 213 vsldoi $tmp,$zero,$tmp,12 # >>32 214 vxor $in0,$in0,$tmp 215 vadduwm $rcon,$rcon,$rcon 216 vxor $in0,$in0,$key 217 bdnz Loop128 218 219 lvx $rcon,0,$ptr # last two round keys 220 221 vperm $key,$in0,$in0,$mask # rotate-n-splat 222 vsldoi $tmp,$zero,$in0,12 # >>32 223 vperm $outtail,$in0,$in0,$outperm # rotate 224 vsel $stage,$outhead,$outtail,$outmask 225 vmr $outhead,$outtail 226 vcipherlast $key,$key,$rcon 227 stvx $stage,0,$out 228 addi $out,$out,16 229 230 vxor $in0,$in0,$tmp 231 vsldoi $tmp,$zero,$tmp,12 # >>32 232 vxor $in0,$in0,$tmp 233 vsldoi $tmp,$zero,$tmp,12 # >>32 234 vxor $in0,$in0,$tmp 235 vadduwm $rcon,$rcon,$rcon 236 vxor $in0,$in0,$key 237 238 vperm $key,$in0,$in0,$mask # rotate-n-splat 239 vsldoi $tmp,$zero,$in0,12 # >>32 240 vperm $outtail,$in0,$in0,$outperm # rotate 241 vsel $stage,$outhead,$outtail,$outmask 242 vmr $outhead,$outtail 243 vcipherlast $key,$key,$rcon 244 stvx $stage,0,$out 245 addi $out,$out,16 246 247 vxor $in0,$in0,$tmp 248 vsldoi $tmp,$zero,$tmp,12 # >>32 249 vxor $in0,$in0,$tmp 250 vsldoi $tmp,$zero,$tmp,12 # >>32 251 vxor $in0,$in0,$tmp 252 vxor $in0,$in0,$key 253 vperm $outtail,$in0,$in0,$outperm # rotate 254 vsel $stage,$outhead,$outtail,$outmask 255 vmr $outhead,$outtail 256 stvx $stage,0,$out 257 258 addi $inp,$out,15 # 15 is not typo 259 addi $out,$out,0x50 260 261 li $rounds,10 262 b Ldone 263 264.align 4 265L192: 266 lvx $tmp,0,$inp 267 li $cnt,4 268 vperm $outtail,$in0,$in0,$outperm # rotate 269 vsel $stage,$outhead,$outtail,$outmask 270 vmr $outhead,$outtail 271 stvx $stage,0,$out 272 addi $out,$out,16 273 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 274 vspltisb $key,8 # borrow $key 275 mtctr $cnt 276 vsububm $mask,$mask,$key # adjust the mask 277 278Loop192: 279 vperm $key,$in1,$in1,$mask # roate-n-splat 280 vsldoi $tmp,$zero,$in0,12 # >>32 281 vcipherlast $key,$key,$rcon 282 283 vxor $in0,$in0,$tmp 284 vsldoi $tmp,$zero,$tmp,12 # >>32 285 vxor $in0,$in0,$tmp 286 vsldoi $tmp,$zero,$tmp,12 # >>32 287 vxor $in0,$in0,$tmp 288 289 vsldoi $stage,$zero,$in1,8 290 vspltw $tmp,$in0,3 291 vxor $tmp,$tmp,$in1 292 vsldoi $in1,$zero,$in1,12 # >>32 293 vadduwm $rcon,$rcon,$rcon 294 vxor $in1,$in1,$tmp 295 vxor $in0,$in0,$key 296 vxor $in1,$in1,$key 297 vsldoi $stage,$stage,$in0,8 298 299 vperm $key,$in1,$in1,$mask # rotate-n-splat 300 vsldoi $tmp,$zero,$in0,12 # >>32 301 vperm $outtail,$stage,$stage,$outperm # rotate 302 vsel $stage,$outhead,$outtail,$outmask 303 vmr $outhead,$outtail 304 vcipherlast $key,$key,$rcon 305 stvx $stage,0,$out 306 addi $out,$out,16 307 308 vsldoi $stage,$in0,$in1,8 309 vxor $in0,$in0,$tmp 310 vsldoi $tmp,$zero,$tmp,12 # >>32 311 vperm $outtail,$stage,$stage,$outperm # rotate 312 vsel $stage,$outhead,$outtail,$outmask 313 vmr $outhead,$outtail 314 vxor $in0,$in0,$tmp 315 vsldoi $tmp,$zero,$tmp,12 # >>32 316 vxor $in0,$in0,$tmp 317 stvx $stage,0,$out 318 addi $out,$out,16 319 320 vspltw $tmp,$in0,3 321 vxor $tmp,$tmp,$in1 322 vsldoi $in1,$zero,$in1,12 # >>32 323 vadduwm $rcon,$rcon,$rcon 324 vxor $in1,$in1,$tmp 325 vxor $in0,$in0,$key 326 vxor $in1,$in1,$key 327 vperm $outtail,$in0,$in0,$outperm # rotate 328 vsel $stage,$outhead,$outtail,$outmask 329 vmr $outhead,$outtail 330 stvx $stage,0,$out 331 addi $inp,$out,15 # 15 is not typo 332 addi $out,$out,16 333 bdnz Loop192 334 335 li $rounds,12 336 addi $out,$out,0x20 337 b Ldone 338 339.align 4 340L256: 341 lvx $tmp,0,$inp 342 li $cnt,7 343 li $rounds,14 344 vperm $outtail,$in0,$in0,$outperm # rotate 345 vsel $stage,$outhead,$outtail,$outmask 346 vmr $outhead,$outtail 347 stvx $stage,0,$out 348 addi $out,$out,16 349 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 350 mtctr $cnt 351 352Loop256: 353 vperm $key,$in1,$in1,$mask # rotate-n-splat 354 vsldoi $tmp,$zero,$in0,12 # >>32 355 vperm $outtail,$in1,$in1,$outperm # rotate 356 vsel $stage,$outhead,$outtail,$outmask 357 vmr $outhead,$outtail 358 vcipherlast $key,$key,$rcon 359 stvx $stage,0,$out 360 addi $out,$out,16 361 362 vxor $in0,$in0,$tmp 363 vsldoi $tmp,$zero,$tmp,12 # >>32 364 vxor $in0,$in0,$tmp 365 vsldoi $tmp,$zero,$tmp,12 # >>32 366 vxor $in0,$in0,$tmp 367 vadduwm $rcon,$rcon,$rcon 368 vxor $in0,$in0,$key 369 vperm $outtail,$in0,$in0,$outperm # rotate 370 vsel $stage,$outhead,$outtail,$outmask 371 vmr $outhead,$outtail 372 stvx $stage,0,$out 373 addi $inp,$out,15 # 15 is not typo 374 addi $out,$out,16 375 bdz Ldone 376 377 vspltw $key,$in0,3 # just splat 378 vsldoi $tmp,$zero,$in1,12 # >>32 379 vsbox $key,$key 380 381 vxor $in1,$in1,$tmp 382 vsldoi $tmp,$zero,$tmp,12 # >>32 383 vxor $in1,$in1,$tmp 384 vsldoi $tmp,$zero,$tmp,12 # >>32 385 vxor $in1,$in1,$tmp 386 387 vxor $in1,$in1,$key 388 b Loop256 389 390.align 4 391Ldone: 392 lvx $in1,0,$inp # redundant in aligned case 393 vsel $in1,$outhead,$in1,$outmask 394 stvx $in1,0,$inp 395 li $ptr,0 396 mtspr 256,$vrsave 397 stw $rounds,0($out) 398 399Lenc_key_abort: 400 mr r3,$ptr 401 blr 402 .long 0 403 .byte 0,12,0x14,1,0,0,3,0 404 .long 0 405.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 406 407.globl .${prefix}_set_decrypt_key 408 $STU $sp,-$FRAME($sp) 409 mflr r10 410 $PUSH r10,$FRAME+$LRSAVE($sp) 411 bl Lset_encrypt_key 412 mtlr r10 413 414 cmpwi r3,0 415 bne- Ldec_key_abort 416 417 slwi $cnt,$rounds,4 418 subi $inp,$out,240 # first round key 419 srwi $rounds,$rounds,1 420 add $out,$inp,$cnt # last round key 421 mtctr $rounds 422 423Ldeckey: 424 lwz r0, 0($inp) 425 lwz r6, 4($inp) 426 lwz r7, 8($inp) 427 lwz r8, 12($inp) 428 addi $inp,$inp,16 429 lwz r9, 0($out) 430 lwz r10,4($out) 431 lwz r11,8($out) 432 lwz r12,12($out) 433 stw r0, 0($out) 434 stw r6, 4($out) 435 stw r7, 8($out) 436 stw r8, 12($out) 437 subi $out,$out,16 438 stw r9, -16($inp) 439 stw r10,-12($inp) 440 stw r11,-8($inp) 441 stw r12,-4($inp) 442 bdnz Ldeckey 443 444 xor r3,r3,r3 # return value 445Ldec_key_abort: 446 addi $sp,$sp,$FRAME 447 blr 448 .long 0 449 .byte 0,12,4,1,0x80,0,3,0 450 .long 0 451.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 452___ 453}}} 454######################################################################### 455{{{ # Single block en- and decrypt procedures # 456sub gen_block () { 457my $dir = shift; 458my $n = $dir eq "de" ? "n" : ""; 459my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 460 461$code.=<<___; 462.globl .${prefix}_${dir}crypt 463 lwz $rounds,240($key) 464 lis r0,0xfc00 465 mfspr $vrsave,256 466 li $idx,15 # 15 is not typo 467 mtspr 256,r0 468 469 lvx v0,0,$inp 470 neg r11,$out 471 lvx v1,$idx,$inp 472 lvsl v2,0,$inp # inpperm 473 le?vspltisb v4,0x0f 474 ?lvsl v3,0,r11 # outperm 475 le?vxor v2,v2,v4 476 li $idx,16 477 vperm v0,v0,v1,v2 # align [and byte swap in LE] 478 lvx v1,0,$key 479 ?lvsl v5,0,$key # keyperm 480 srwi $rounds,$rounds,1 481 lvx v2,$idx,$key 482 addi $idx,$idx,16 483 subi $rounds,$rounds,1 484 ?vperm v1,v1,v2,v5 # align round key 485 486 vxor v0,v0,v1 487 lvx v1,$idx,$key 488 addi $idx,$idx,16 489 mtctr $rounds 490 491Loop_${dir}c: 492 ?vperm v2,v2,v1,v5 493 v${n}cipher v0,v0,v2 494 lvx v2,$idx,$key 495 addi $idx,$idx,16 496 ?vperm v1,v1,v2,v5 497 v${n}cipher v0,v0,v1 498 lvx v1,$idx,$key 499 addi $idx,$idx,16 500 bdnz Loop_${dir}c 501 502 ?vperm v2,v2,v1,v5 503 v${n}cipher v0,v0,v2 504 lvx v2,$idx,$key 505 ?vperm v1,v1,v2,v5 506 v${n}cipherlast v0,v0,v1 507 508 vspltisb v2,-1 509 vxor v1,v1,v1 510 li $idx,15 # 15 is not typo 511 ?vperm v2,v1,v2,v3 # outmask 512 le?vxor v3,v3,v4 513 lvx v1,0,$out # outhead 514 vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 515 vsel v1,v1,v0,v2 516 lvx v4,$idx,$out 517 stvx v1,0,$out 518 vsel v0,v0,v4,v2 519 stvx v0,$idx,$out 520 521 mtspr 256,$vrsave 522 blr 523 .long 0 524 .byte 0,12,0x14,0,0,0,3,0 525 .long 0 526.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 527___ 528} 529&gen_block("en"); 530&gen_block("de"); 531}}} 532######################################################################### 533{{{ # CBC en- and decrypt procedures # 534my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); 535my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 536my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= 537 map("v$_",(4..10)); 538$code.=<<___; 539.globl .${prefix}_cbc_encrypt 540 ${UCMP}i $len,16 541 bltlr- 542 543 cmpwi $enc,0 # test direction 544 lis r0,0xffe0 545 mfspr $vrsave,256 546 mtspr 256,r0 547 548 li $idx,15 549 vxor $rndkey0,$rndkey0,$rndkey0 550 le?vspltisb $tmp,0x0f 551 552 lvx $ivec,0,$ivp # load [unaligned] iv 553 lvsl $inpperm,0,$ivp 554 lvx $inptail,$idx,$ivp 555 le?vxor $inpperm,$inpperm,$tmp 556 vperm $ivec,$ivec,$inptail,$inpperm 557 558 neg r11,$inp 559 ?lvsl $keyperm,0,$key # prepare for unaligned key 560 lwz $rounds,240($key) 561 562 lvsr $inpperm,0,r11 # prepare for unaligned load 563 lvx $inptail,0,$inp 564 addi $inp,$inp,15 # 15 is not typo 565 le?vxor $inpperm,$inpperm,$tmp 566 567 ?lvsr $outperm,0,$out # prepare for unaligned store 568 vspltisb $outmask,-1 569 lvx $outhead,0,$out 570 ?vperm $outmask,$rndkey0,$outmask,$outperm 571 le?vxor $outperm,$outperm,$tmp 572 573 srwi $rounds,$rounds,1 574 li $idx,16 575 subi $rounds,$rounds,1 576 beq Lcbc_dec 577 578Lcbc_enc: 579 vmr $inout,$inptail 580 lvx $inptail,0,$inp 581 addi $inp,$inp,16 582 mtctr $rounds 583 subi $len,$len,16 # len-=16 584 585 lvx $rndkey0,0,$key 586 vperm $inout,$inout,$inptail,$inpperm 587 lvx $rndkey1,$idx,$key 588 addi $idx,$idx,16 589 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 590 vxor $inout,$inout,$rndkey0 591 lvx $rndkey0,$idx,$key 592 addi $idx,$idx,16 593 vxor $inout,$inout,$ivec 594 595Loop_cbc_enc: 596 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 597 vcipher $inout,$inout,$rndkey1 598 lvx $rndkey1,$idx,$key 599 addi $idx,$idx,16 600 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 601 vcipher $inout,$inout,$rndkey0 602 lvx $rndkey0,$idx,$key 603 addi $idx,$idx,16 604 bdnz Loop_cbc_enc 605 606 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 607 vcipher $inout,$inout,$rndkey1 608 lvx $rndkey1,$idx,$key 609 li $idx,16 610 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 611 vcipherlast $ivec,$inout,$rndkey0 612 ${UCMP}i $len,16 613 614 vperm $tmp,$ivec,$ivec,$outperm 615 vsel $inout,$outhead,$tmp,$outmask 616 vmr $outhead,$tmp 617 stvx $inout,0,$out 618 addi $out,$out,16 619 bge Lcbc_enc 620 621 b Lcbc_done 622 623.align 4 624Lcbc_dec: 625 ${UCMP}i $len,128 626 bge _aesp8_cbc_decrypt8x 627 vmr $tmp,$inptail 628 lvx $inptail,0,$inp 629 addi $inp,$inp,16 630 mtctr $rounds 631 subi $len,$len,16 # len-=16 632 633 lvx $rndkey0,0,$key 634 vperm $tmp,$tmp,$inptail,$inpperm 635 lvx $rndkey1,$idx,$key 636 addi $idx,$idx,16 637 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 638 vxor $inout,$tmp,$rndkey0 639 lvx $rndkey0,$idx,$key 640 addi $idx,$idx,16 641 642Loop_cbc_dec: 643 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 644 vncipher $inout,$inout,$rndkey1 645 lvx $rndkey1,$idx,$key 646 addi $idx,$idx,16 647 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 648 vncipher $inout,$inout,$rndkey0 649 lvx $rndkey0,$idx,$key 650 addi $idx,$idx,16 651 bdnz Loop_cbc_dec 652 653 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 654 vncipher $inout,$inout,$rndkey1 655 lvx $rndkey1,$idx,$key 656 li $idx,16 657 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 658 vncipherlast $inout,$inout,$rndkey0 659 ${UCMP}i $len,16 660 661 vxor $inout,$inout,$ivec 662 vmr $ivec,$tmp 663 vperm $tmp,$inout,$inout,$outperm 664 vsel $inout,$outhead,$tmp,$outmask 665 vmr $outhead,$tmp 666 stvx $inout,0,$out 667 addi $out,$out,16 668 bge Lcbc_dec 669 670Lcbc_done: 671 addi $out,$out,-1 672 lvx $inout,0,$out # redundant in aligned case 673 vsel $inout,$outhead,$inout,$outmask 674 stvx $inout,0,$out 675 676 neg $enc,$ivp # write [unaligned] iv 677 li $idx,15 # 15 is not typo 678 vxor $rndkey0,$rndkey0,$rndkey0 679 vspltisb $outmask,-1 680 le?vspltisb $tmp,0x0f 681 ?lvsl $outperm,0,$enc 682 ?vperm $outmask,$rndkey0,$outmask,$outperm 683 le?vxor $outperm,$outperm,$tmp 684 lvx $outhead,0,$ivp 685 vperm $ivec,$ivec,$ivec,$outperm 686 vsel $inout,$outhead,$ivec,$outmask 687 lvx $inptail,$idx,$ivp 688 stvx $inout,0,$ivp 689 vsel $inout,$ivec,$inptail,$outmask 690 stvx $inout,$idx,$ivp 691 692 mtspr 256,$vrsave 693 blr 694 .long 0 695 .byte 0,12,0x14,0,0,0,6,0 696 .long 0 697___ 698######################################################################### 699{{ # Optimized CBC decrypt procedure # 700my $key_="r11"; 701my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 702my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); 703my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); 704my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 705 # v26-v31 last 6 round keys 706my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 707 708$code.=<<___; 709.align 5 710_aesp8_cbc_decrypt8x: 711 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 712 li r10,`$FRAME+8*16+15` 713 li r11,`$FRAME+8*16+31` 714 stvx v20,r10,$sp # ABI says so 715 addi r10,r10,32 716 stvx v21,r11,$sp 717 addi r11,r11,32 718 stvx v22,r10,$sp 719 addi r10,r10,32 720 stvx v23,r11,$sp 721 addi r11,r11,32 722 stvx v24,r10,$sp 723 addi r10,r10,32 724 stvx v25,r11,$sp 725 addi r11,r11,32 726 stvx v26,r10,$sp 727 addi r10,r10,32 728 stvx v27,r11,$sp 729 addi r11,r11,32 730 stvx v28,r10,$sp 731 addi r10,r10,32 732 stvx v29,r11,$sp 733 addi r11,r11,32 734 stvx v30,r10,$sp 735 stvx v31,r11,$sp 736 li r0,-1 737 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 738 li $x10,0x10 739 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 740 li $x20,0x20 741 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 742 li $x30,0x30 743 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 744 li $x40,0x40 745 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 746 li $x50,0x50 747 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 748 li $x60,0x60 749 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 750 li $x70,0x70 751 mtspr 256,r0 752 753 subi $rounds,$rounds,3 # -4 in total 754 subi $len,$len,128 # bias 755 756 lvx $rndkey0,$x00,$key # load key schedule 757 lvx v30,$x10,$key 758 addi $key,$key,0x20 759 lvx v31,$x00,$key 760 ?vperm $rndkey0,$rndkey0,v30,$keyperm 761 addi $key_,$sp,$FRAME+15 762 mtctr $rounds 763 764Load_cbc_dec_key: 765 ?vperm v24,v30,v31,$keyperm 766 lvx v30,$x10,$key 767 addi $key,$key,0x20 768 stvx v24,$x00,$key_ # off-load round[1] 769 ?vperm v25,v31,v30,$keyperm 770 lvx v31,$x00,$key 771 stvx v25,$x10,$key_ # off-load round[2] 772 addi $key_,$key_,0x20 773 bdnz Load_cbc_dec_key 774 775 lvx v26,$x10,$key 776 ?vperm v24,v30,v31,$keyperm 777 lvx v27,$x20,$key 778 stvx v24,$x00,$key_ # off-load round[3] 779 ?vperm v25,v31,v26,$keyperm 780 lvx v28,$x30,$key 781 stvx v25,$x10,$key_ # off-load round[4] 782 addi $key_,$sp,$FRAME+15 # rewind $key_ 783 ?vperm v26,v26,v27,$keyperm 784 lvx v29,$x40,$key 785 ?vperm v27,v27,v28,$keyperm 786 lvx v30,$x50,$key 787 ?vperm v28,v28,v29,$keyperm 788 lvx v31,$x60,$key 789 ?vperm v29,v29,v30,$keyperm 790 lvx $out0,$x70,$key # borrow $out0 791 ?vperm v30,v30,v31,$keyperm 792 lvx v24,$x00,$key_ # pre-load round[1] 793 ?vperm v31,v31,$out0,$keyperm 794 lvx v25,$x10,$key_ # pre-load round[2] 795 796 #lvx $inptail,0,$inp # "caller" already did this 797 #addi $inp,$inp,15 # 15 is not typo 798 subi $inp,$inp,15 # undo "caller" 799 800 le?li $idx,8 801 lvx_u $in0,$x00,$inp # load first 8 "words" 802 le?lvsl $inpperm,0,$idx 803 le?vspltisb $tmp,0x0f 804 lvx_u $in1,$x10,$inp 805 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 806 lvx_u $in2,$x20,$inp 807 le?vperm $in0,$in0,$in0,$inpperm 808 lvx_u $in3,$x30,$inp 809 le?vperm $in1,$in1,$in1,$inpperm 810 lvx_u $in4,$x40,$inp 811 le?vperm $in2,$in2,$in2,$inpperm 812 vxor $out0,$in0,$rndkey0 813 lvx_u $in5,$x50,$inp 814 le?vperm $in3,$in3,$in3,$inpperm 815 vxor $out1,$in1,$rndkey0 816 lvx_u $in6,$x60,$inp 817 le?vperm $in4,$in4,$in4,$inpperm 818 vxor $out2,$in2,$rndkey0 819 lvx_u $in7,$x70,$inp 820 addi $inp,$inp,0x80 821 le?vperm $in5,$in5,$in5,$inpperm 822 vxor $out3,$in3,$rndkey0 823 le?vperm $in6,$in6,$in6,$inpperm 824 vxor $out4,$in4,$rndkey0 825 le?vperm $in7,$in7,$in7,$inpperm 826 vxor $out5,$in5,$rndkey0 827 vxor $out6,$in6,$rndkey0 828 vxor $out7,$in7,$rndkey0 829 830 mtctr $rounds 831 b Loop_cbc_dec8x 832.align 5 833Loop_cbc_dec8x: 834 vncipher $out0,$out0,v24 835 vncipher $out1,$out1,v24 836 vncipher $out2,$out2,v24 837 vncipher $out3,$out3,v24 838 vncipher $out4,$out4,v24 839 vncipher $out5,$out5,v24 840 vncipher $out6,$out6,v24 841 vncipher $out7,$out7,v24 842 lvx v24,$x20,$key_ # round[3] 843 addi $key_,$key_,0x20 844 845 vncipher $out0,$out0,v25 846 vncipher $out1,$out1,v25 847 vncipher $out2,$out2,v25 848 vncipher $out3,$out3,v25 849 vncipher $out4,$out4,v25 850 vncipher $out5,$out5,v25 851 vncipher $out6,$out6,v25 852 vncipher $out7,$out7,v25 853 lvx v25,$x10,$key_ # round[4] 854 bdnz Loop_cbc_dec8x 855 856 subic $len,$len,128 # $len-=128 857 vncipher $out0,$out0,v24 858 vncipher $out1,$out1,v24 859 vncipher $out2,$out2,v24 860 vncipher $out3,$out3,v24 861 vncipher $out4,$out4,v24 862 vncipher $out5,$out5,v24 863 vncipher $out6,$out6,v24 864 vncipher $out7,$out7,v24 865 866 subfe. r0,r0,r0 # borrow?-1:0 867 vncipher $out0,$out0,v25 868 vncipher $out1,$out1,v25 869 vncipher $out2,$out2,v25 870 vncipher $out3,$out3,v25 871 vncipher $out4,$out4,v25 872 vncipher $out5,$out5,v25 873 vncipher $out6,$out6,v25 874 vncipher $out7,$out7,v25 875 876 and r0,r0,$len 877 vncipher $out0,$out0,v26 878 vncipher $out1,$out1,v26 879 vncipher $out2,$out2,v26 880 vncipher $out3,$out3,v26 881 vncipher $out4,$out4,v26 882 vncipher $out5,$out5,v26 883 vncipher $out6,$out6,v26 884 vncipher $out7,$out7,v26 885 886 add $inp,$inp,r0 # $inp is adjusted in such 887 # way that at exit from the 888 # loop inX-in7 are loaded 889 # with last "words" 890 vncipher $out0,$out0,v27 891 vncipher $out1,$out1,v27 892 vncipher $out2,$out2,v27 893 vncipher $out3,$out3,v27 894 vncipher $out4,$out4,v27 895 vncipher $out5,$out5,v27 896 vncipher $out6,$out6,v27 897 vncipher $out7,$out7,v27 898 899 addi $key_,$sp,$FRAME+15 # rewind $key_ 900 vncipher $out0,$out0,v28 901 vncipher $out1,$out1,v28 902 vncipher $out2,$out2,v28 903 vncipher $out3,$out3,v28 904 vncipher $out4,$out4,v28 905 vncipher $out5,$out5,v28 906 vncipher $out6,$out6,v28 907 vncipher $out7,$out7,v28 908 lvx v24,$x00,$key_ # re-pre-load round[1] 909 910 vncipher $out0,$out0,v29 911 vncipher $out1,$out1,v29 912 vncipher $out2,$out2,v29 913 vncipher $out3,$out3,v29 914 vncipher $out4,$out4,v29 915 vncipher $out5,$out5,v29 916 vncipher $out6,$out6,v29 917 vncipher $out7,$out7,v29 918 lvx v25,$x10,$key_ # re-pre-load round[2] 919 920 vncipher $out0,$out0,v30 921 vxor $ivec,$ivec,v31 # xor with last round key 922 vncipher $out1,$out1,v30 923 vxor $in0,$in0,v31 924 vncipher $out2,$out2,v30 925 vxor $in1,$in1,v31 926 vncipher $out3,$out3,v30 927 vxor $in2,$in2,v31 928 vncipher $out4,$out4,v30 929 vxor $in3,$in3,v31 930 vncipher $out5,$out5,v30 931 vxor $in4,$in4,v31 932 vncipher $out6,$out6,v30 933 vxor $in5,$in5,v31 934 vncipher $out7,$out7,v30 935 vxor $in6,$in6,v31 936 937 vncipherlast $out0,$out0,$ivec 938 vncipherlast $out1,$out1,$in0 939 lvx_u $in0,$x00,$inp # load next input block 940 vncipherlast $out2,$out2,$in1 941 lvx_u $in1,$x10,$inp 942 vncipherlast $out3,$out3,$in2 943 le?vperm $in0,$in0,$in0,$inpperm 944 lvx_u $in2,$x20,$inp 945 vncipherlast $out4,$out4,$in3 946 le?vperm $in1,$in1,$in1,$inpperm 947 lvx_u $in3,$x30,$inp 948 vncipherlast $out5,$out5,$in4 949 le?vperm $in2,$in2,$in2,$inpperm 950 lvx_u $in4,$x40,$inp 951 vncipherlast $out6,$out6,$in5 952 le?vperm $in3,$in3,$in3,$inpperm 953 lvx_u $in5,$x50,$inp 954 vncipherlast $out7,$out7,$in6 955 le?vperm $in4,$in4,$in4,$inpperm 956 lvx_u $in6,$x60,$inp 957 vmr $ivec,$in7 958 le?vperm $in5,$in5,$in5,$inpperm 959 lvx_u $in7,$x70,$inp 960 addi $inp,$inp,0x80 961 962 le?vperm $out0,$out0,$out0,$inpperm 963 le?vperm $out1,$out1,$out1,$inpperm 964 stvx_u $out0,$x00,$out 965 le?vperm $in6,$in6,$in6,$inpperm 966 vxor $out0,$in0,$rndkey0 967 le?vperm $out2,$out2,$out2,$inpperm 968 stvx_u $out1,$x10,$out 969 le?vperm $in7,$in7,$in7,$inpperm 970 vxor $out1,$in1,$rndkey0 971 le?vperm $out3,$out3,$out3,$inpperm 972 stvx_u $out2,$x20,$out 973 vxor $out2,$in2,$rndkey0 974 le?vperm $out4,$out4,$out4,$inpperm 975 stvx_u $out3,$x30,$out 976 vxor $out3,$in3,$rndkey0 977 le?vperm $out5,$out5,$out5,$inpperm 978 stvx_u $out4,$x40,$out 979 vxor $out4,$in4,$rndkey0 980 le?vperm $out6,$out6,$out6,$inpperm 981 stvx_u $out5,$x50,$out 982 vxor $out5,$in5,$rndkey0 983 le?vperm $out7,$out7,$out7,$inpperm 984 stvx_u $out6,$x60,$out 985 vxor $out6,$in6,$rndkey0 986 stvx_u $out7,$x70,$out 987 addi $out,$out,0x80 988 vxor $out7,$in7,$rndkey0 989 990 mtctr $rounds 991 beq Loop_cbc_dec8x # did $len-=128 borrow? 992 993 addic. $len,$len,128 994 beq Lcbc_dec8x_done 995 nop 996 nop 997 998Loop_cbc_dec8x_tail: # up to 7 "words" tail... 999 vncipher $out1,$out1,v24 1000 vncipher $out2,$out2,v24 1001 vncipher $out3,$out3,v24 1002 vncipher $out4,$out4,v24 1003 vncipher $out5,$out5,v24 1004 vncipher $out6,$out6,v24 1005 vncipher $out7,$out7,v24 1006 lvx v24,$x20,$key_ # round[3] 1007 addi $key_,$key_,0x20 1008 1009 vncipher $out1,$out1,v25 1010 vncipher $out2,$out2,v25 1011 vncipher $out3,$out3,v25 1012 vncipher $out4,$out4,v25 1013 vncipher $out5,$out5,v25 1014 vncipher $out6,$out6,v25 1015 vncipher $out7,$out7,v25 1016 lvx v25,$x10,$key_ # round[4] 1017 bdnz Loop_cbc_dec8x_tail 1018 1019 vncipher $out1,$out1,v24 1020 vncipher $out2,$out2,v24 1021 vncipher $out3,$out3,v24 1022 vncipher $out4,$out4,v24 1023 vncipher $out5,$out5,v24 1024 vncipher $out6,$out6,v24 1025 vncipher $out7,$out7,v24 1026 1027 vncipher $out1,$out1,v25 1028 vncipher $out2,$out2,v25 1029 vncipher $out3,$out3,v25 1030 vncipher $out4,$out4,v25 1031 vncipher $out5,$out5,v25 1032 vncipher $out6,$out6,v25 1033 vncipher $out7,$out7,v25 1034 1035 vncipher $out1,$out1,v26 1036 vncipher $out2,$out2,v26 1037 vncipher $out3,$out3,v26 1038 vncipher $out4,$out4,v26 1039 vncipher $out5,$out5,v26 1040 vncipher $out6,$out6,v26 1041 vncipher $out7,$out7,v26 1042 1043 vncipher $out1,$out1,v27 1044 vncipher $out2,$out2,v27 1045 vncipher $out3,$out3,v27 1046 vncipher $out4,$out4,v27 1047 vncipher $out5,$out5,v27 1048 vncipher $out6,$out6,v27 1049 vncipher $out7,$out7,v27 1050 1051 vncipher $out1,$out1,v28 1052 vncipher $out2,$out2,v28 1053 vncipher $out3,$out3,v28 1054 vncipher $out4,$out4,v28 1055 vncipher $out5,$out5,v28 1056 vncipher $out6,$out6,v28 1057 vncipher $out7,$out7,v28 1058 1059 vncipher $out1,$out1,v29 1060 vncipher $out2,$out2,v29 1061 vncipher $out3,$out3,v29 1062 vncipher $out4,$out4,v29 1063 vncipher $out5,$out5,v29 1064 vncipher $out6,$out6,v29 1065 vncipher $out7,$out7,v29 1066 1067 vncipher $out1,$out1,v30 1068 vxor $ivec,$ivec,v31 # last round key 1069 vncipher $out2,$out2,v30 1070 vxor $in1,$in1,v31 1071 vncipher $out3,$out3,v30 1072 vxor $in2,$in2,v31 1073 vncipher $out4,$out4,v30 1074 vxor $in3,$in3,v31 1075 vncipher $out5,$out5,v30 1076 vxor $in4,$in4,v31 1077 vncipher $out6,$out6,v30 1078 vxor $in5,$in5,v31 1079 vncipher $out7,$out7,v30 1080 vxor $in6,$in6,v31 1081 1082 cmplwi $len,32 # switch($len) 1083 blt Lcbc_dec8x_one 1084 nop 1085 beq Lcbc_dec8x_two 1086 cmplwi $len,64 1087 blt Lcbc_dec8x_three 1088 nop 1089 beq Lcbc_dec8x_four 1090 cmplwi $len,96 1091 blt Lcbc_dec8x_five 1092 nop 1093 beq Lcbc_dec8x_six 1094 1095Lcbc_dec8x_seven: 1096 vncipherlast $out1,$out1,$ivec 1097 vncipherlast $out2,$out2,$in1 1098 vncipherlast $out3,$out3,$in2 1099 vncipherlast $out4,$out4,$in3 1100 vncipherlast $out5,$out5,$in4 1101 vncipherlast $out6,$out6,$in5 1102 vncipherlast $out7,$out7,$in6 1103 vmr $ivec,$in7 1104 1105 le?vperm $out1,$out1,$out1,$inpperm 1106 le?vperm $out2,$out2,$out2,$inpperm 1107 stvx_u $out1,$x00,$out 1108 le?vperm $out3,$out3,$out3,$inpperm 1109 stvx_u $out2,$x10,$out 1110 le?vperm $out4,$out4,$out4,$inpperm 1111 stvx_u $out3,$x20,$out 1112 le?vperm $out5,$out5,$out5,$inpperm 1113 stvx_u $out4,$x30,$out 1114 le?vperm $out6,$out6,$out6,$inpperm 1115 stvx_u $out5,$x40,$out 1116 le?vperm $out7,$out7,$out7,$inpperm 1117 stvx_u $out6,$x50,$out 1118 stvx_u $out7,$x60,$out 1119 addi $out,$out,0x70 1120 b Lcbc_dec8x_done 1121 1122.align 5 1123Lcbc_dec8x_six: 1124 vncipherlast $out2,$out2,$ivec 1125 vncipherlast $out3,$out3,$in2 1126 vncipherlast $out4,$out4,$in3 1127 vncipherlast $out5,$out5,$in4 1128 vncipherlast $out6,$out6,$in5 1129 vncipherlast $out7,$out7,$in6 1130 vmr $ivec,$in7 1131 1132 le?vperm $out2,$out2,$out2,$inpperm 1133 le?vperm $out3,$out3,$out3,$inpperm 1134 stvx_u $out2,$x00,$out 1135 le?vperm $out4,$out4,$out4,$inpperm 1136 stvx_u $out3,$x10,$out 1137 le?vperm $out5,$out5,$out5,$inpperm 1138 stvx_u $out4,$x20,$out 1139 le?vperm $out6,$out6,$out6,$inpperm 1140 stvx_u $out5,$x30,$out 1141 le?vperm $out7,$out7,$out7,$inpperm 1142 stvx_u $out6,$x40,$out 1143 stvx_u $out7,$x50,$out 1144 addi $out,$out,0x60 1145 b Lcbc_dec8x_done 1146 1147.align 5 1148Lcbc_dec8x_five: 1149 vncipherlast $out3,$out3,$ivec 1150 vncipherlast $out4,$out4,$in3 1151 vncipherlast $out5,$out5,$in4 1152 vncipherlast $out6,$out6,$in5 1153 vncipherlast $out7,$out7,$in6 1154 vmr $ivec,$in7 1155 1156 le?vperm $out3,$out3,$out3,$inpperm 1157 le?vperm $out4,$out4,$out4,$inpperm 1158 stvx_u $out3,$x00,$out 1159 le?vperm $out5,$out5,$out5,$inpperm 1160 stvx_u $out4,$x10,$out 1161 le?vperm $out6,$out6,$out6,$inpperm 1162 stvx_u $out5,$x20,$out 1163 le?vperm $out7,$out7,$out7,$inpperm 1164 stvx_u $out6,$x30,$out 1165 stvx_u $out7,$x40,$out 1166 addi $out,$out,0x50 1167 b Lcbc_dec8x_done 1168 1169.align 5 1170Lcbc_dec8x_four: 1171 vncipherlast $out4,$out4,$ivec 1172 vncipherlast $out5,$out5,$in4 1173 vncipherlast $out6,$out6,$in5 1174 vncipherlast $out7,$out7,$in6 1175 vmr $ivec,$in7 1176 1177 le?vperm $out4,$out4,$out4,$inpperm 1178 le?vperm $out5,$out5,$out5,$inpperm 1179 stvx_u $out4,$x00,$out 1180 le?vperm $out6,$out6,$out6,$inpperm 1181 stvx_u $out5,$x10,$out 1182 le?vperm $out7,$out7,$out7,$inpperm 1183 stvx_u $out6,$x20,$out 1184 stvx_u $out7,$x30,$out 1185 addi $out,$out,0x40 1186 b Lcbc_dec8x_done 1187 1188.align 5 1189Lcbc_dec8x_three: 1190 vncipherlast $out5,$out5,$ivec 1191 vncipherlast $out6,$out6,$in5 1192 vncipherlast $out7,$out7,$in6 1193 vmr $ivec,$in7 1194 1195 le?vperm $out5,$out5,$out5,$inpperm 1196 le?vperm $out6,$out6,$out6,$inpperm 1197 stvx_u $out5,$x00,$out 1198 le?vperm $out7,$out7,$out7,$inpperm 1199 stvx_u $out6,$x10,$out 1200 stvx_u $out7,$x20,$out 1201 addi $out,$out,0x30 1202 b Lcbc_dec8x_done 1203 1204.align 5 1205Lcbc_dec8x_two: 1206 vncipherlast $out6,$out6,$ivec 1207 vncipherlast $out7,$out7,$in6 1208 vmr $ivec,$in7 1209 1210 le?vperm $out6,$out6,$out6,$inpperm 1211 le?vperm $out7,$out7,$out7,$inpperm 1212 stvx_u $out6,$x00,$out 1213 stvx_u $out7,$x10,$out 1214 addi $out,$out,0x20 1215 b Lcbc_dec8x_done 1216 1217.align 5 1218Lcbc_dec8x_one: 1219 vncipherlast $out7,$out7,$ivec 1220 vmr $ivec,$in7 1221 1222 le?vperm $out7,$out7,$out7,$inpperm 1223 stvx_u $out7,0,$out 1224 addi $out,$out,0x10 1225 1226Lcbc_dec8x_done: 1227 le?vperm $ivec,$ivec,$ivec,$inpperm 1228 stvx_u $ivec,0,$ivp # write [unaligned] iv 1229 1230 li r10,`$FRAME+15` 1231 li r11,`$FRAME+31` 1232 stvx $inpperm,r10,$sp # wipe copies of round keys 1233 addi r10,r10,32 1234 stvx $inpperm,r11,$sp 1235 addi r11,r11,32 1236 stvx $inpperm,r10,$sp 1237 addi r10,r10,32 1238 stvx $inpperm,r11,$sp 1239 addi r11,r11,32 1240 stvx $inpperm,r10,$sp 1241 addi r10,r10,32 1242 stvx $inpperm,r11,$sp 1243 addi r11,r11,32 1244 stvx $inpperm,r10,$sp 1245 addi r10,r10,32 1246 stvx $inpperm,r11,$sp 1247 addi r11,r11,32 1248 1249 mtspr 256,$vrsave 1250 lvx v20,r10,$sp # ABI says so 1251 addi r10,r10,32 1252 lvx v21,r11,$sp 1253 addi r11,r11,32 1254 lvx v22,r10,$sp 1255 addi r10,r10,32 1256 lvx v23,r11,$sp 1257 addi r11,r11,32 1258 lvx v24,r10,$sp 1259 addi r10,r10,32 1260 lvx v25,r11,$sp 1261 addi r11,r11,32 1262 lvx v26,r10,$sp 1263 addi r10,r10,32 1264 lvx v27,r11,$sp 1265 addi r11,r11,32 1266 lvx v28,r10,$sp 1267 addi r10,r10,32 1268 lvx v29,r11,$sp 1269 addi r11,r11,32 1270 lvx v30,r10,$sp 1271 lvx v31,r11,$sp 1272 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1273 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1274 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1275 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1276 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1277 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1278 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1279 blr 1280 .long 0 1281 .byte 0,12,0x14,0,0x80,6,6,0 1282 .long 0 1283.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt 1284___ 1285}} }}} 1286 1287######################################################################### 1288{{{ # CTR procedure[s] # 1289my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); 1290my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 1291my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= 1292 map("v$_",(4..11)); 1293my $dat=$tmp; 1294 1295$code.=<<___; 1296.globl .${prefix}_ctr32_encrypt_blocks 1297 ${UCMP}i $len,1 1298 bltlr- 1299 1300 lis r0,0xfff0 1301 mfspr $vrsave,256 1302 mtspr 256,r0 1303 1304 li $idx,15 1305 vxor $rndkey0,$rndkey0,$rndkey0 1306 le?vspltisb $tmp,0x0f 1307 1308 lvx $ivec,0,$ivp # load [unaligned] iv 1309 lvsl $inpperm,0,$ivp 1310 lvx $inptail,$idx,$ivp 1311 vspltisb $one,1 1312 le?vxor $inpperm,$inpperm,$tmp 1313 vperm $ivec,$ivec,$inptail,$inpperm 1314 vsldoi $one,$rndkey0,$one,1 1315 1316 neg r11,$inp 1317 ?lvsl $keyperm,0,$key # prepare for unaligned key 1318 lwz $rounds,240($key) 1319 1320 lvsr $inpperm,0,r11 # prepare for unaligned load 1321 lvx $inptail,0,$inp 1322 addi $inp,$inp,15 # 15 is not typo 1323 le?vxor $inpperm,$inpperm,$tmp 1324 1325 srwi $rounds,$rounds,1 1326 li $idx,16 1327 subi $rounds,$rounds,1 1328 1329 ${UCMP}i $len,8 1330 bge _aesp8_ctr32_encrypt8x 1331 1332 ?lvsr $outperm,0,$out # prepare for unaligned store 1333 vspltisb $outmask,-1 1334 lvx $outhead,0,$out 1335 ?vperm $outmask,$rndkey0,$outmask,$outperm 1336 le?vxor $outperm,$outperm,$tmp 1337 1338 lvx $rndkey0,0,$key 1339 mtctr $rounds 1340 lvx $rndkey1,$idx,$key 1341 addi $idx,$idx,16 1342 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1343 vxor $inout,$ivec,$rndkey0 1344 lvx $rndkey0,$idx,$key 1345 addi $idx,$idx,16 1346 b Loop_ctr32_enc 1347 1348.align 5 1349Loop_ctr32_enc: 1350 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1351 vcipher $inout,$inout,$rndkey1 1352 lvx $rndkey1,$idx,$key 1353 addi $idx,$idx,16 1354 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1355 vcipher $inout,$inout,$rndkey0 1356 lvx $rndkey0,$idx,$key 1357 addi $idx,$idx,16 1358 bdnz Loop_ctr32_enc 1359 1360 vadduwm $ivec,$ivec,$one 1361 vmr $dat,$inptail 1362 lvx $inptail,0,$inp 1363 addi $inp,$inp,16 1364 subic. $len,$len,1 # blocks-- 1365 1366 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1367 vcipher $inout,$inout,$rndkey1 1368 lvx $rndkey1,$idx,$key 1369 vperm $dat,$dat,$inptail,$inpperm 1370 li $idx,16 1371 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm 1372 lvx $rndkey0,0,$key 1373 vxor $dat,$dat,$rndkey1 # last round key 1374 vcipherlast $inout,$inout,$dat 1375 1376 lvx $rndkey1,$idx,$key 1377 addi $idx,$idx,16 1378 vperm $inout,$inout,$inout,$outperm 1379 vsel $dat,$outhead,$inout,$outmask 1380 mtctr $rounds 1381 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1382 vmr $outhead,$inout 1383 vxor $inout,$ivec,$rndkey0 1384 lvx $rndkey0,$idx,$key 1385 addi $idx,$idx,16 1386 stvx $dat,0,$out 1387 addi $out,$out,16 1388 bne Loop_ctr32_enc 1389 1390 addi $out,$out,-1 1391 lvx $inout,0,$out # redundant in aligned case 1392 vsel $inout,$outhead,$inout,$outmask 1393 stvx $inout,0,$out 1394 1395 mtspr 256,$vrsave 1396 blr 1397 .long 0 1398 .byte 0,12,0x14,0,0,0,6,0 1399 .long 0 1400___ 1401######################################################################### 1402{{ # Optimized CTR procedure # 1403my $key_="r11"; 1404my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 1405my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); 1406my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); 1407my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 1408 # v26-v31 last 6 round keys 1409my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 1410my ($two,$three,$four)=($outhead,$outperm,$outmask); 1411 1412$code.=<<___; 1413.align 5 1414_aesp8_ctr32_encrypt8x: 1415 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 1416 li r10,`$FRAME+8*16+15` 1417 li r11,`$FRAME+8*16+31` 1418 stvx v20,r10,$sp # ABI says so 1419 addi r10,r10,32 1420 stvx v21,r11,$sp 1421 addi r11,r11,32 1422 stvx v22,r10,$sp 1423 addi r10,r10,32 1424 stvx v23,r11,$sp 1425 addi r11,r11,32 1426 stvx v24,r10,$sp 1427 addi r10,r10,32 1428 stvx v25,r11,$sp 1429 addi r11,r11,32 1430 stvx v26,r10,$sp 1431 addi r10,r10,32 1432 stvx v27,r11,$sp 1433 addi r11,r11,32 1434 stvx v28,r10,$sp 1435 addi r10,r10,32 1436 stvx v29,r11,$sp 1437 addi r11,r11,32 1438 stvx v30,r10,$sp 1439 stvx v31,r11,$sp 1440 li r0,-1 1441 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 1442 li $x10,0x10 1443 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1444 li $x20,0x20 1445 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1446 li $x30,0x30 1447 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1448 li $x40,0x40 1449 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1450 li $x50,0x50 1451 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1452 li $x60,0x60 1453 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1454 li $x70,0x70 1455 mtspr 256,r0 1456 1457 subi $rounds,$rounds,3 # -4 in total 1458 1459 lvx $rndkey0,$x00,$key # load key schedule 1460 lvx v30,$x10,$key 1461 addi $key,$key,0x20 1462 lvx v31,$x00,$key 1463 ?vperm $rndkey0,$rndkey0,v30,$keyperm 1464 addi $key_,$sp,$FRAME+15 1465 mtctr $rounds 1466 1467Load_ctr32_enc_key: 1468 ?vperm v24,v30,v31,$keyperm 1469 lvx v30,$x10,$key 1470 addi $key,$key,0x20 1471 stvx v24,$x00,$key_ # off-load round[1] 1472 ?vperm v25,v31,v30,$keyperm 1473 lvx v31,$x00,$key 1474 stvx v25,$x10,$key_ # off-load round[2] 1475 addi $key_,$key_,0x20 1476 bdnz Load_ctr32_enc_key 1477 1478 lvx v26,$x10,$key 1479 ?vperm v24,v30,v31,$keyperm 1480 lvx v27,$x20,$key 1481 stvx v24,$x00,$key_ # off-load round[3] 1482 ?vperm v25,v31,v26,$keyperm 1483 lvx v28,$x30,$key 1484 stvx v25,$x10,$key_ # off-load round[4] 1485 addi $key_,$sp,$FRAME+15 # rewind $key_ 1486 ?vperm v26,v26,v27,$keyperm 1487 lvx v29,$x40,$key 1488 ?vperm v27,v27,v28,$keyperm 1489 lvx v30,$x50,$key 1490 ?vperm v28,v28,v29,$keyperm 1491 lvx v31,$x60,$key 1492 ?vperm v29,v29,v30,$keyperm 1493 lvx $out0,$x70,$key # borrow $out0 1494 ?vperm v30,v30,v31,$keyperm 1495 lvx v24,$x00,$key_ # pre-load round[1] 1496 ?vperm v31,v31,$out0,$keyperm 1497 lvx v25,$x10,$key_ # pre-load round[2] 1498 1499 vadduqm $two,$one,$one 1500 subi $inp,$inp,15 # undo "caller" 1501 $SHL $len,$len,4 1502 1503 vadduqm $out1,$ivec,$one # counter values ... 1504 vadduqm $out2,$ivec,$two 1505 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1506 le?li $idx,8 1507 vadduqm $out3,$out1,$two 1508 vxor $out1,$out1,$rndkey0 1509 le?lvsl $inpperm,0,$idx 1510 vadduqm $out4,$out2,$two 1511 vxor $out2,$out2,$rndkey0 1512 le?vspltisb $tmp,0x0f 1513 vadduqm $out5,$out3,$two 1514 vxor $out3,$out3,$rndkey0 1515 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 1516 vadduqm $out6,$out4,$two 1517 vxor $out4,$out4,$rndkey0 1518 vadduqm $out7,$out5,$two 1519 vxor $out5,$out5,$rndkey0 1520 vadduqm $ivec,$out6,$two # next counter value 1521 vxor $out6,$out6,$rndkey0 1522 vxor $out7,$out7,$rndkey0 1523 1524 mtctr $rounds 1525 b Loop_ctr32_enc8x 1526.align 5 1527Loop_ctr32_enc8x: 1528 vcipher $out0,$out0,v24 1529 vcipher $out1,$out1,v24 1530 vcipher $out2,$out2,v24 1531 vcipher $out3,$out3,v24 1532 vcipher $out4,$out4,v24 1533 vcipher $out5,$out5,v24 1534 vcipher $out6,$out6,v24 1535 vcipher $out7,$out7,v24 1536Loop_ctr32_enc8x_middle: 1537 lvx v24,$x20,$key_ # round[3] 1538 addi $key_,$key_,0x20 1539 1540 vcipher $out0,$out0,v25 1541 vcipher $out1,$out1,v25 1542 vcipher $out2,$out2,v25 1543 vcipher $out3,$out3,v25 1544 vcipher $out4,$out4,v25 1545 vcipher $out5,$out5,v25 1546 vcipher $out6,$out6,v25 1547 vcipher $out7,$out7,v25 1548 lvx v25,$x10,$key_ # round[4] 1549 bdnz Loop_ctr32_enc8x 1550 1551 subic r11,$len,256 # $len-256, borrow $key_ 1552 vcipher $out0,$out0,v24 1553 vcipher $out1,$out1,v24 1554 vcipher $out2,$out2,v24 1555 vcipher $out3,$out3,v24 1556 vcipher $out4,$out4,v24 1557 vcipher $out5,$out5,v24 1558 vcipher $out6,$out6,v24 1559 vcipher $out7,$out7,v24 1560 1561 subfe r0,r0,r0 # borrow?-1:0 1562 vcipher $out0,$out0,v25 1563 vcipher $out1,$out1,v25 1564 vcipher $out2,$out2,v25 1565 vcipher $out3,$out3,v25 1566 vcipher $out4,$out4,v25 1567 vcipher $out5,$out5,v25 1568 vcipher $out6,$out6,v25 1569 vcipher $out7,$out7,v25 1570 1571 and r0,r0,r11 1572 addi $key_,$sp,$FRAME+15 # rewind $key_ 1573 vcipher $out0,$out0,v26 1574 vcipher $out1,$out1,v26 1575 vcipher $out2,$out2,v26 1576 vcipher $out3,$out3,v26 1577 vcipher $out4,$out4,v26 1578 vcipher $out5,$out5,v26 1579 vcipher $out6,$out6,v26 1580 vcipher $out7,$out7,v26 1581 lvx v24,$x00,$key_ # re-pre-load round[1] 1582 1583 subic $len,$len,129 # $len-=129 1584 vcipher $out0,$out0,v27 1585 addi $len,$len,1 # $len-=128 really 1586 vcipher $out1,$out1,v27 1587 vcipher $out2,$out2,v27 1588 vcipher $out3,$out3,v27 1589 vcipher $out4,$out4,v27 1590 vcipher $out5,$out5,v27 1591 vcipher $out6,$out6,v27 1592 vcipher $out7,$out7,v27 1593 lvx v25,$x10,$key_ # re-pre-load round[2] 1594 1595 vcipher $out0,$out0,v28 1596 lvx_u $in0,$x00,$inp # load input 1597 vcipher $out1,$out1,v28 1598 lvx_u $in1,$x10,$inp 1599 vcipher $out2,$out2,v28 1600 lvx_u $in2,$x20,$inp 1601 vcipher $out3,$out3,v28 1602 lvx_u $in3,$x30,$inp 1603 vcipher $out4,$out4,v28 1604 lvx_u $in4,$x40,$inp 1605 vcipher $out5,$out5,v28 1606 lvx_u $in5,$x50,$inp 1607 vcipher $out6,$out6,v28 1608 lvx_u $in6,$x60,$inp 1609 vcipher $out7,$out7,v28 1610 lvx_u $in7,$x70,$inp 1611 addi $inp,$inp,0x80 1612 1613 vcipher $out0,$out0,v29 1614 le?vperm $in0,$in0,$in0,$inpperm 1615 vcipher $out1,$out1,v29 1616 le?vperm $in1,$in1,$in1,$inpperm 1617 vcipher $out2,$out2,v29 1618 le?vperm $in2,$in2,$in2,$inpperm 1619 vcipher $out3,$out3,v29 1620 le?vperm $in3,$in3,$in3,$inpperm 1621 vcipher $out4,$out4,v29 1622 le?vperm $in4,$in4,$in4,$inpperm 1623 vcipher $out5,$out5,v29 1624 le?vperm $in5,$in5,$in5,$inpperm 1625 vcipher $out6,$out6,v29 1626 le?vperm $in6,$in6,$in6,$inpperm 1627 vcipher $out7,$out7,v29 1628 le?vperm $in7,$in7,$in7,$inpperm 1629 1630 add $inp,$inp,r0 # $inp is adjusted in such 1631 # way that at exit from the 1632 # loop inX-in7 are loaded 1633 # with last "words" 1634 subfe. r0,r0,r0 # borrow?-1:0 1635 vcipher $out0,$out0,v30 1636 vxor $in0,$in0,v31 # xor with last round key 1637 vcipher $out1,$out1,v30 1638 vxor $in1,$in1,v31 1639 vcipher $out2,$out2,v30 1640 vxor $in2,$in2,v31 1641 vcipher $out3,$out3,v30 1642 vxor $in3,$in3,v31 1643 vcipher $out4,$out4,v30 1644 vxor $in4,$in4,v31 1645 vcipher $out5,$out5,v30 1646 vxor $in5,$in5,v31 1647 vcipher $out6,$out6,v30 1648 vxor $in6,$in6,v31 1649 vcipher $out7,$out7,v30 1650 vxor $in7,$in7,v31 1651 1652 bne Lctr32_enc8x_break # did $len-129 borrow? 1653 1654 vcipherlast $in0,$out0,$in0 1655 vcipherlast $in1,$out1,$in1 1656 vadduqm $out1,$ivec,$one # counter values ... 1657 vcipherlast $in2,$out2,$in2 1658 vadduqm $out2,$ivec,$two 1659 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1660 vcipherlast $in3,$out3,$in3 1661 vadduqm $out3,$out1,$two 1662 vxor $out1,$out1,$rndkey0 1663 vcipherlast $in4,$out4,$in4 1664 vadduqm $out4,$out2,$two 1665 vxor $out2,$out2,$rndkey0 1666 vcipherlast $in5,$out5,$in5 1667 vadduqm $out5,$out3,$two 1668 vxor $out3,$out3,$rndkey0 1669 vcipherlast $in6,$out6,$in6 1670 vadduqm $out6,$out4,$two 1671 vxor $out4,$out4,$rndkey0 1672 vcipherlast $in7,$out7,$in7 1673 vadduqm $out7,$out5,$two 1674 vxor $out5,$out5,$rndkey0 1675 le?vperm $in0,$in0,$in0,$inpperm 1676 vadduqm $ivec,$out6,$two # next counter value 1677 vxor $out6,$out6,$rndkey0 1678 le?vperm $in1,$in1,$in1,$inpperm 1679 vxor $out7,$out7,$rndkey0 1680 mtctr $rounds 1681 1682 vcipher $out0,$out0,v24 1683 stvx_u $in0,$x00,$out 1684 le?vperm $in2,$in2,$in2,$inpperm 1685 vcipher $out1,$out1,v24 1686 stvx_u $in1,$x10,$out 1687 le?vperm $in3,$in3,$in3,$inpperm 1688 vcipher $out2,$out2,v24 1689 stvx_u $in2,$x20,$out 1690 le?vperm $in4,$in4,$in4,$inpperm 1691 vcipher $out3,$out3,v24 1692 stvx_u $in3,$x30,$out 1693 le?vperm $in5,$in5,$in5,$inpperm 1694 vcipher $out4,$out4,v24 1695 stvx_u $in4,$x40,$out 1696 le?vperm $in6,$in6,$in6,$inpperm 1697 vcipher $out5,$out5,v24 1698 stvx_u $in5,$x50,$out 1699 le?vperm $in7,$in7,$in7,$inpperm 1700 vcipher $out6,$out6,v24 1701 stvx_u $in6,$x60,$out 1702 vcipher $out7,$out7,v24 1703 stvx_u $in7,$x70,$out 1704 addi $out,$out,0x80 1705 1706 b Loop_ctr32_enc8x_middle 1707 1708.align 5 1709Lctr32_enc8x_break: 1710 cmpwi $len,-0x60 1711 blt Lctr32_enc8x_one 1712 nop 1713 beq Lctr32_enc8x_two 1714 cmpwi $len,-0x40 1715 blt Lctr32_enc8x_three 1716 nop 1717 beq Lctr32_enc8x_four 1718 cmpwi $len,-0x20 1719 blt Lctr32_enc8x_five 1720 nop 1721 beq Lctr32_enc8x_six 1722 cmpwi $len,0x00 1723 blt Lctr32_enc8x_seven 1724 1725Lctr32_enc8x_eight: 1726 vcipherlast $out0,$out0,$in0 1727 vcipherlast $out1,$out1,$in1 1728 vcipherlast $out2,$out2,$in2 1729 vcipherlast $out3,$out3,$in3 1730 vcipherlast $out4,$out4,$in4 1731 vcipherlast $out5,$out5,$in5 1732 vcipherlast $out6,$out6,$in6 1733 vcipherlast $out7,$out7,$in7 1734 1735 le?vperm $out0,$out0,$out0,$inpperm 1736 le?vperm $out1,$out1,$out1,$inpperm 1737 stvx_u $out0,$x00,$out 1738 le?vperm $out2,$out2,$out2,$inpperm 1739 stvx_u $out1,$x10,$out 1740 le?vperm $out3,$out3,$out3,$inpperm 1741 stvx_u $out2,$x20,$out 1742 le?vperm $out4,$out4,$out4,$inpperm 1743 stvx_u $out3,$x30,$out 1744 le?vperm $out5,$out5,$out5,$inpperm 1745 stvx_u $out4,$x40,$out 1746 le?vperm $out6,$out6,$out6,$inpperm 1747 stvx_u $out5,$x50,$out 1748 le?vperm $out7,$out7,$out7,$inpperm 1749 stvx_u $out6,$x60,$out 1750 stvx_u $out7,$x70,$out 1751 addi $out,$out,0x80 1752 b Lctr32_enc8x_done 1753 1754.align 5 1755Lctr32_enc8x_seven: 1756 vcipherlast $out0,$out0,$in1 1757 vcipherlast $out1,$out1,$in2 1758 vcipherlast $out2,$out2,$in3 1759 vcipherlast $out3,$out3,$in4 1760 vcipherlast $out4,$out4,$in5 1761 vcipherlast $out5,$out5,$in6 1762 vcipherlast $out6,$out6,$in7 1763 1764 le?vperm $out0,$out0,$out0,$inpperm 1765 le?vperm $out1,$out1,$out1,$inpperm 1766 stvx_u $out0,$x00,$out 1767 le?vperm $out2,$out2,$out2,$inpperm 1768 stvx_u $out1,$x10,$out 1769 le?vperm $out3,$out3,$out3,$inpperm 1770 stvx_u $out2,$x20,$out 1771 le?vperm $out4,$out4,$out4,$inpperm 1772 stvx_u $out3,$x30,$out 1773 le?vperm $out5,$out5,$out5,$inpperm 1774 stvx_u $out4,$x40,$out 1775 le?vperm $out6,$out6,$out6,$inpperm 1776 stvx_u $out5,$x50,$out 1777 stvx_u $out6,$x60,$out 1778 addi $out,$out,0x70 1779 b Lctr32_enc8x_done 1780 1781.align 5 1782Lctr32_enc8x_six: 1783 vcipherlast $out0,$out0,$in2 1784 vcipherlast $out1,$out1,$in3 1785 vcipherlast $out2,$out2,$in4 1786 vcipherlast $out3,$out3,$in5 1787 vcipherlast $out4,$out4,$in6 1788 vcipherlast $out5,$out5,$in7 1789 1790 le?vperm $out0,$out0,$out0,$inpperm 1791 le?vperm $out1,$out1,$out1,$inpperm 1792 stvx_u $out0,$x00,$out 1793 le?vperm $out2,$out2,$out2,$inpperm 1794 stvx_u $out1,$x10,$out 1795 le?vperm $out3,$out3,$out3,$inpperm 1796 stvx_u $out2,$x20,$out 1797 le?vperm $out4,$out4,$out4,$inpperm 1798 stvx_u $out3,$x30,$out 1799 le?vperm $out5,$out5,$out5,$inpperm 1800 stvx_u $out4,$x40,$out 1801 stvx_u $out5,$x50,$out 1802 addi $out,$out,0x60 1803 b Lctr32_enc8x_done 1804 1805.align 5 1806Lctr32_enc8x_five: 1807 vcipherlast $out0,$out0,$in3 1808 vcipherlast $out1,$out1,$in4 1809 vcipherlast $out2,$out2,$in5 1810 vcipherlast $out3,$out3,$in6 1811 vcipherlast $out4,$out4,$in7 1812 1813 le?vperm $out0,$out0,$out0,$inpperm 1814 le?vperm $out1,$out1,$out1,$inpperm 1815 stvx_u $out0,$x00,$out 1816 le?vperm $out2,$out2,$out2,$inpperm 1817 stvx_u $out1,$x10,$out 1818 le?vperm $out3,$out3,$out3,$inpperm 1819 stvx_u $out2,$x20,$out 1820 le?vperm $out4,$out4,$out4,$inpperm 1821 stvx_u $out3,$x30,$out 1822 stvx_u $out4,$x40,$out 1823 addi $out,$out,0x50 1824 b Lctr32_enc8x_done 1825 1826.align 5 1827Lctr32_enc8x_four: 1828 vcipherlast $out0,$out0,$in4 1829 vcipherlast $out1,$out1,$in5 1830 vcipherlast $out2,$out2,$in6 1831 vcipherlast $out3,$out3,$in7 1832 1833 le?vperm $out0,$out0,$out0,$inpperm 1834 le?vperm $out1,$out1,$out1,$inpperm 1835 stvx_u $out0,$x00,$out 1836 le?vperm $out2,$out2,$out2,$inpperm 1837 stvx_u $out1,$x10,$out 1838 le?vperm $out3,$out3,$out3,$inpperm 1839 stvx_u $out2,$x20,$out 1840 stvx_u $out3,$x30,$out 1841 addi $out,$out,0x40 1842 b Lctr32_enc8x_done 1843 1844.align 5 1845Lctr32_enc8x_three: 1846 vcipherlast $out0,$out0,$in5 1847 vcipherlast $out1,$out1,$in6 1848 vcipherlast $out2,$out2,$in7 1849 1850 le?vperm $out0,$out0,$out0,$inpperm 1851 le?vperm $out1,$out1,$out1,$inpperm 1852 stvx_u $out0,$x00,$out 1853 le?vperm $out2,$out2,$out2,$inpperm 1854 stvx_u $out1,$x10,$out 1855 stvx_u $out2,$x20,$out 1856 addi $out,$out,0x30 1857 b Lctr32_enc8x_done 1858 1859.align 5 1860Lctr32_enc8x_two: 1861 vcipherlast $out0,$out0,$in6 1862 vcipherlast $out1,$out1,$in7 1863 1864 le?vperm $out0,$out0,$out0,$inpperm 1865 le?vperm $out1,$out1,$out1,$inpperm 1866 stvx_u $out0,$x00,$out 1867 stvx_u $out1,$x10,$out 1868 addi $out,$out,0x20 1869 b Lctr32_enc8x_done 1870 1871.align 5 1872Lctr32_enc8x_one: 1873 vcipherlast $out0,$out0,$in7 1874 1875 le?vperm $out0,$out0,$out0,$inpperm 1876 stvx_u $out0,0,$out 1877 addi $out,$out,0x10 1878 1879Lctr32_enc8x_done: 1880 li r10,`$FRAME+15` 1881 li r11,`$FRAME+31` 1882 stvx $inpperm,r10,$sp # wipe copies of round keys 1883 addi r10,r10,32 1884 stvx $inpperm,r11,$sp 1885 addi r11,r11,32 1886 stvx $inpperm,r10,$sp 1887 addi r10,r10,32 1888 stvx $inpperm,r11,$sp 1889 addi r11,r11,32 1890 stvx $inpperm,r10,$sp 1891 addi r10,r10,32 1892 stvx $inpperm,r11,$sp 1893 addi r11,r11,32 1894 stvx $inpperm,r10,$sp 1895 addi r10,r10,32 1896 stvx $inpperm,r11,$sp 1897 addi r11,r11,32 1898 1899 mtspr 256,$vrsave 1900 lvx v20,r10,$sp # ABI says so 1901 addi r10,r10,32 1902 lvx v21,r11,$sp 1903 addi r11,r11,32 1904 lvx v22,r10,$sp 1905 addi r10,r10,32 1906 lvx v23,r11,$sp 1907 addi r11,r11,32 1908 lvx v24,r10,$sp 1909 addi r10,r10,32 1910 lvx v25,r11,$sp 1911 addi r11,r11,32 1912 lvx v26,r10,$sp 1913 addi r10,r10,32 1914 lvx v27,r11,$sp 1915 addi r11,r11,32 1916 lvx v28,r10,$sp 1917 addi r10,r10,32 1918 lvx v29,r11,$sp 1919 addi r11,r11,32 1920 lvx v30,r10,$sp 1921 lvx v31,r11,$sp 1922 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1923 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1924 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1925 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1926 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1927 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1928 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1929 blr 1930 .long 0 1931 .byte 0,12,0x14,0,0x80,6,6,0 1932 .long 0 1933.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks 1934___ 1935}} }}} 1936 1937######################################################################### 1938{{{ # XTS procedures # 1939# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # 1940# const AES_KEY *key1, const AES_KEY *key2, # 1941# [const] unsigned char iv[16]); # 1942# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # 1943# input tweak value is assumed to be encrypted already, and last tweak # 1944# value, one suitable for consecutive call on same chunk of data, is # 1945# written back to original buffer. In addition, in "tweak chaining" # 1946# mode only complete input blocks are processed. # 1947 1948my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); 1949my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); 1950my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); 1951my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); 1952my $taillen = $key2; 1953 1954 ($inp,$idx) = ($idx,$inp); # reassign 1955 1956$code.=<<___; 1957.globl .${prefix}_xts_encrypt 1958 mr $inp,r3 # reassign 1959 li r3,-1 1960 ${UCMP}i $len,16 1961 bltlr- 1962 1963 lis r0,0xfff0 1964 mfspr r12,256 # save vrsave 1965 li r11,0 1966 mtspr 256,r0 1967 1968 vspltisb $seven,0x07 # 0x070707..07 1969 le?lvsl $leperm,r11,r11 1970 le?vspltisb $tmp,0x0f 1971 le?vxor $leperm,$leperm,$seven 1972 1973 li $idx,15 1974 lvx $tweak,0,$ivp # load [unaligned] iv 1975 lvsl $inpperm,0,$ivp 1976 lvx $inptail,$idx,$ivp 1977 le?vxor $inpperm,$inpperm,$tmp 1978 vperm $tweak,$tweak,$inptail,$inpperm 1979 1980 neg r11,$inp 1981 lvsr $inpperm,0,r11 # prepare for unaligned load 1982 lvx $inout,0,$inp 1983 addi $inp,$inp,15 # 15 is not typo 1984 le?vxor $inpperm,$inpperm,$tmp 1985 1986 ${UCMP}i $key2,0 # key2==NULL? 1987 beq Lxts_enc_no_key2 1988 1989 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 1990 lwz $rounds,240($key2) 1991 srwi $rounds,$rounds,1 1992 subi $rounds,$rounds,1 1993 li $idx,16 1994 1995 lvx $rndkey0,0,$key2 1996 lvx $rndkey1,$idx,$key2 1997 addi $idx,$idx,16 1998 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1999 vxor $tweak,$tweak,$rndkey0 2000 lvx $rndkey0,$idx,$key2 2001 addi $idx,$idx,16 2002 mtctr $rounds 2003 2004Ltweak_xts_enc: 2005 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2006 vcipher $tweak,$tweak,$rndkey1 2007 lvx $rndkey1,$idx,$key2 2008 addi $idx,$idx,16 2009 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2010 vcipher $tweak,$tweak,$rndkey0 2011 lvx $rndkey0,$idx,$key2 2012 addi $idx,$idx,16 2013 bdnz Ltweak_xts_enc 2014 2015 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2016 vcipher $tweak,$tweak,$rndkey1 2017 lvx $rndkey1,$idx,$key2 2018 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2019 vcipherlast $tweak,$tweak,$rndkey0 2020 2021 li $ivp,0 # don't chain the tweak 2022 b Lxts_enc 2023 2024Lxts_enc_no_key2: 2025 li $idx,-16 2026 and $len,$len,$idx # in "tweak chaining" 2027 # mode only complete 2028 # blocks are processed 2029Lxts_enc: 2030 lvx $inptail,0,$inp 2031 addi $inp,$inp,16 2032 2033 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2034 lwz $rounds,240($key1) 2035 srwi $rounds,$rounds,1 2036 subi $rounds,$rounds,1 2037 li $idx,16 2038 2039 vslb $eighty7,$seven,$seven # 0x808080..80 2040 vor $eighty7,$eighty7,$seven # 0x878787..87 2041 vspltisb $tmp,1 # 0x010101..01 2042 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2043 2044 ${UCMP}i $len,96 2045 bge _aesp8_xts_encrypt6x 2046 2047 andi. $taillen,$len,15 2048 subic r0,$len,32 2049 subi $taillen,$taillen,16 2050 subfe r0,r0,r0 2051 and r0,r0,$taillen 2052 add $inp,$inp,r0 2053 2054 lvx $rndkey0,0,$key1 2055 lvx $rndkey1,$idx,$key1 2056 addi $idx,$idx,16 2057 vperm $inout,$inout,$inptail,$inpperm 2058 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2059 vxor $inout,$inout,$tweak 2060 vxor $inout,$inout,$rndkey0 2061 lvx $rndkey0,$idx,$key1 2062 addi $idx,$idx,16 2063 mtctr $rounds 2064 b Loop_xts_enc 2065 2066.align 5 2067Loop_xts_enc: 2068 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2069 vcipher $inout,$inout,$rndkey1 2070 lvx $rndkey1,$idx,$key1 2071 addi $idx,$idx,16 2072 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2073 vcipher $inout,$inout,$rndkey0 2074 lvx $rndkey0,$idx,$key1 2075 addi $idx,$idx,16 2076 bdnz Loop_xts_enc 2077 2078 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2079 vcipher $inout,$inout,$rndkey1 2080 lvx $rndkey1,$idx,$key1 2081 li $idx,16 2082 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2083 vxor $rndkey0,$rndkey0,$tweak 2084 vcipherlast $output,$inout,$rndkey0 2085 2086 le?vperm $tmp,$output,$output,$leperm 2087 be?nop 2088 le?stvx_u $tmp,0,$out 2089 be?stvx_u $output,0,$out 2090 addi $out,$out,16 2091 2092 subic. $len,$len,16 2093 beq Lxts_enc_done 2094 2095 vmr $inout,$inptail 2096 lvx $inptail,0,$inp 2097 addi $inp,$inp,16 2098 lvx $rndkey0,0,$key1 2099 lvx $rndkey1,$idx,$key1 2100 addi $idx,$idx,16 2101 2102 subic r0,$len,32 2103 subfe r0,r0,r0 2104 and r0,r0,$taillen 2105 add $inp,$inp,r0 2106 2107 vsrab $tmp,$tweak,$seven # next tweak value 2108 vaddubm $tweak,$tweak,$tweak 2109 vsldoi $tmp,$tmp,$tmp,15 2110 vand $tmp,$tmp,$eighty7 2111 vxor $tweak,$tweak,$tmp 2112 2113 vperm $inout,$inout,$inptail,$inpperm 2114 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2115 vxor $inout,$inout,$tweak 2116 vxor $output,$output,$rndkey0 # just in case $len<16 2117 vxor $inout,$inout,$rndkey0 2118 lvx $rndkey0,$idx,$key1 2119 addi $idx,$idx,16 2120 2121 mtctr $rounds 2122 ${UCMP}i $len,16 2123 bge Loop_xts_enc 2124 2125 vxor $output,$output,$tweak 2126 lvsr $inpperm,0,$len # $inpperm is no longer needed 2127 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2128 vspltisb $tmp,-1 2129 vperm $inptail,$inptail,$tmp,$inpperm 2130 vsel $inout,$inout,$output,$inptail 2131 2132 subi r11,$out,17 2133 subi $out,$out,16 2134 mtctr $len 2135 li $len,16 2136Loop_xts_enc_steal: 2137 lbzu r0,1(r11) 2138 stb r0,16(r11) 2139 bdnz Loop_xts_enc_steal 2140 2141 mtctr $rounds 2142 b Loop_xts_enc # one more time... 2143 2144Lxts_enc_done: 2145 ${UCMP}i $ivp,0 2146 beq Lxts_enc_ret 2147 2148 vsrab $tmp,$tweak,$seven # next tweak value 2149 vaddubm $tweak,$tweak,$tweak 2150 vsldoi $tmp,$tmp,$tmp,15 2151 vand $tmp,$tmp,$eighty7 2152 vxor $tweak,$tweak,$tmp 2153 2154 le?vperm $tweak,$tweak,$tweak,$leperm 2155 stvx_u $tweak,0,$ivp 2156 2157Lxts_enc_ret: 2158 mtspr 256,r12 # restore vrsave 2159 li r3,0 2160 blr 2161 .long 0 2162 .byte 0,12,0x04,0,0x80,6,6,0 2163 .long 0 2164.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt 2165 2166.globl .${prefix}_xts_decrypt 2167 mr $inp,r3 # reassign 2168 li r3,-1 2169 ${UCMP}i $len,16 2170 bltlr- 2171 2172 lis r0,0xfff8 2173 mfspr r12,256 # save vrsave 2174 li r11,0 2175 mtspr 256,r0 2176 2177 andi. r0,$len,15 2178 neg r0,r0 2179 andi. r0,r0,16 2180 sub $len,$len,r0 2181 2182 vspltisb $seven,0x07 # 0x070707..07 2183 le?lvsl $leperm,r11,r11 2184 le?vspltisb $tmp,0x0f 2185 le?vxor $leperm,$leperm,$seven 2186 2187 li $idx,15 2188 lvx $tweak,0,$ivp # load [unaligned] iv 2189 lvsl $inpperm,0,$ivp 2190 lvx $inptail,$idx,$ivp 2191 le?vxor $inpperm,$inpperm,$tmp 2192 vperm $tweak,$tweak,$inptail,$inpperm 2193 2194 neg r11,$inp 2195 lvsr $inpperm,0,r11 # prepare for unaligned load 2196 lvx $inout,0,$inp 2197 addi $inp,$inp,15 # 15 is not typo 2198 le?vxor $inpperm,$inpperm,$tmp 2199 2200 ${UCMP}i $key2,0 # key2==NULL? 2201 beq Lxts_dec_no_key2 2202 2203 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2204 lwz $rounds,240($key2) 2205 srwi $rounds,$rounds,1 2206 subi $rounds,$rounds,1 2207 li $idx,16 2208 2209 lvx $rndkey0,0,$key2 2210 lvx $rndkey1,$idx,$key2 2211 addi $idx,$idx,16 2212 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2213 vxor $tweak,$tweak,$rndkey0 2214 lvx $rndkey0,$idx,$key2 2215 addi $idx,$idx,16 2216 mtctr $rounds 2217 2218Ltweak_xts_dec: 2219 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2220 vcipher $tweak,$tweak,$rndkey1 2221 lvx $rndkey1,$idx,$key2 2222 addi $idx,$idx,16 2223 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2224 vcipher $tweak,$tweak,$rndkey0 2225 lvx $rndkey0,$idx,$key2 2226 addi $idx,$idx,16 2227 bdnz Ltweak_xts_dec 2228 2229 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2230 vcipher $tweak,$tweak,$rndkey1 2231 lvx $rndkey1,$idx,$key2 2232 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2233 vcipherlast $tweak,$tweak,$rndkey0 2234 2235 li $ivp,0 # don't chain the tweak 2236 b Lxts_dec 2237 2238Lxts_dec_no_key2: 2239 neg $idx,$len 2240 andi. $idx,$idx,15 2241 add $len,$len,$idx # in "tweak chaining" 2242 # mode only complete 2243 # blocks are processed 2244Lxts_dec: 2245 lvx $inptail,0,$inp 2246 addi $inp,$inp,16 2247 2248 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2249 lwz $rounds,240($key1) 2250 srwi $rounds,$rounds,1 2251 subi $rounds,$rounds,1 2252 li $idx,16 2253 2254 vslb $eighty7,$seven,$seven # 0x808080..80 2255 vor $eighty7,$eighty7,$seven # 0x878787..87 2256 vspltisb $tmp,1 # 0x010101..01 2257 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2258 2259 ${UCMP}i $len,96 2260 bge _aesp8_xts_decrypt6x 2261 2262 lvx $rndkey0,0,$key1 2263 lvx $rndkey1,$idx,$key1 2264 addi $idx,$idx,16 2265 vperm $inout,$inout,$inptail,$inpperm 2266 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2267 vxor $inout,$inout,$tweak 2268 vxor $inout,$inout,$rndkey0 2269 lvx $rndkey0,$idx,$key1 2270 addi $idx,$idx,16 2271 mtctr $rounds 2272 2273 ${UCMP}i $len,16 2274 blt Ltail_xts_dec 2275 be?b Loop_xts_dec 2276 2277.align 5 2278Loop_xts_dec: 2279 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2280 vncipher $inout,$inout,$rndkey1 2281 lvx $rndkey1,$idx,$key1 2282 addi $idx,$idx,16 2283 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2284 vncipher $inout,$inout,$rndkey0 2285 lvx $rndkey0,$idx,$key1 2286 addi $idx,$idx,16 2287 bdnz Loop_xts_dec 2288 2289 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2290 vncipher $inout,$inout,$rndkey1 2291 lvx $rndkey1,$idx,$key1 2292 li $idx,16 2293 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2294 vxor $rndkey0,$rndkey0,$tweak 2295 vncipherlast $output,$inout,$rndkey0 2296 2297 le?vperm $tmp,$output,$output,$leperm 2298 be?nop 2299 le?stvx_u $tmp,0,$out 2300 be?stvx_u $output,0,$out 2301 addi $out,$out,16 2302 2303 subic. $len,$len,16 2304 beq Lxts_dec_done 2305 2306 vmr $inout,$inptail 2307 lvx $inptail,0,$inp 2308 addi $inp,$inp,16 2309 lvx $rndkey0,0,$key1 2310 lvx $rndkey1,$idx,$key1 2311 addi $idx,$idx,16 2312 2313 vsrab $tmp,$tweak,$seven # next tweak value 2314 vaddubm $tweak,$tweak,$tweak 2315 vsldoi $tmp,$tmp,$tmp,15 2316 vand $tmp,$tmp,$eighty7 2317 vxor $tweak,$tweak,$tmp 2318 2319 vperm $inout,$inout,$inptail,$inpperm 2320 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2321 vxor $inout,$inout,$tweak 2322 vxor $inout,$inout,$rndkey0 2323 lvx $rndkey0,$idx,$key1 2324 addi $idx,$idx,16 2325 2326 mtctr $rounds 2327 ${UCMP}i $len,16 2328 bge Loop_xts_dec 2329 2330Ltail_xts_dec: 2331 vsrab $tmp,$tweak,$seven # next tweak value 2332 vaddubm $tweak1,$tweak,$tweak 2333 vsldoi $tmp,$tmp,$tmp,15 2334 vand $tmp,$tmp,$eighty7 2335 vxor $tweak1,$tweak1,$tmp 2336 2337 subi $inp,$inp,16 2338 add $inp,$inp,$len 2339 2340 vxor $inout,$inout,$tweak # :-( 2341 vxor $inout,$inout,$tweak1 # :-) 2342 2343Loop_xts_dec_short: 2344 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2345 vncipher $inout,$inout,$rndkey1 2346 lvx $rndkey1,$idx,$key1 2347 addi $idx,$idx,16 2348 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2349 vncipher $inout,$inout,$rndkey0 2350 lvx $rndkey0,$idx,$key1 2351 addi $idx,$idx,16 2352 bdnz Loop_xts_dec_short 2353 2354 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2355 vncipher $inout,$inout,$rndkey1 2356 lvx $rndkey1,$idx,$key1 2357 li $idx,16 2358 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2359 vxor $rndkey0,$rndkey0,$tweak1 2360 vncipherlast $output,$inout,$rndkey0 2361 2362 le?vperm $tmp,$output,$output,$leperm 2363 be?nop 2364 le?stvx_u $tmp,0,$out 2365 be?stvx_u $output,0,$out 2366 2367 vmr $inout,$inptail 2368 lvx $inptail,0,$inp 2369 #addi $inp,$inp,16 2370 lvx $rndkey0,0,$key1 2371 lvx $rndkey1,$idx,$key1 2372 addi $idx,$idx,16 2373 vperm $inout,$inout,$inptail,$inpperm 2374 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2375 2376 lvsr $inpperm,0,$len # $inpperm is no longer needed 2377 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2378 vspltisb $tmp,-1 2379 vperm $inptail,$inptail,$tmp,$inpperm 2380 vsel $inout,$inout,$output,$inptail 2381 2382 vxor $rndkey0,$rndkey0,$tweak 2383 vxor $inout,$inout,$rndkey0 2384 lvx $rndkey0,$idx,$key1 2385 addi $idx,$idx,16 2386 2387 subi r11,$out,1 2388 mtctr $len 2389 li $len,16 2390Loop_xts_dec_steal: 2391 lbzu r0,1(r11) 2392 stb r0,16(r11) 2393 bdnz Loop_xts_dec_steal 2394 2395 mtctr $rounds 2396 b Loop_xts_dec # one more time... 2397 2398Lxts_dec_done: 2399 ${UCMP}i $ivp,0 2400 beq Lxts_dec_ret 2401 2402 vsrab $tmp,$tweak,$seven # next tweak value 2403 vaddubm $tweak,$tweak,$tweak 2404 vsldoi $tmp,$tmp,$tmp,15 2405 vand $tmp,$tmp,$eighty7 2406 vxor $tweak,$tweak,$tmp 2407 2408 le?vperm $tweak,$tweak,$tweak,$leperm 2409 stvx_u $tweak,0,$ivp 2410 2411Lxts_dec_ret: 2412 mtspr 256,r12 # restore vrsave 2413 li r3,0 2414 blr 2415 .long 0 2416 .byte 0,12,0x04,0,0x80,6,6,0 2417 .long 0 2418.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt 2419___ 2420######################################################################### 2421{{ # Optimized XTS procedures # 2422my $key_=$key2; 2423my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); 2424 $x00=0 if ($flavour =~ /osx/); 2425my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); 2426my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); 2427my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); 2428my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 2429 # v26-v31 last 6 round keys 2430my ($keyperm)=($out0); # aliases with "caller", redundant assignment 2431my $taillen=$x70; 2432 2433$code.=<<___; 2434.align 5 2435_aesp8_xts_encrypt6x: 2436 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 2437 mflr r11 2438 li r7,`$FRAME+8*16+15` 2439 li r3,`$FRAME+8*16+31` 2440 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 2441 stvx v20,r7,$sp # ABI says so 2442 addi r7,r7,32 2443 stvx v21,r3,$sp 2444 addi r3,r3,32 2445 stvx v22,r7,$sp 2446 addi r7,r7,32 2447 stvx v23,r3,$sp 2448 addi r3,r3,32 2449 stvx v24,r7,$sp 2450 addi r7,r7,32 2451 stvx v25,r3,$sp 2452 addi r3,r3,32 2453 stvx v26,r7,$sp 2454 addi r7,r7,32 2455 stvx v27,r3,$sp 2456 addi r3,r3,32 2457 stvx v28,r7,$sp 2458 addi r7,r7,32 2459 stvx v29,r3,$sp 2460 addi r3,r3,32 2461 stvx v30,r7,$sp 2462 stvx v31,r3,$sp 2463 li r0,-1 2464 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 2465 li $x10,0x10 2466 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2467 li $x20,0x20 2468 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2469 li $x30,0x30 2470 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2471 li $x40,0x40 2472 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2473 li $x50,0x50 2474 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2475 li $x60,0x60 2476 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2477 li $x70,0x70 2478 mtspr 256,r0 2479 2480 subi $rounds,$rounds,3 # -4 in total 2481 2482 lvx $rndkey0,$x00,$key1 # load key schedule 2483 lvx v30,$x10,$key1 2484 addi $key1,$key1,0x20 2485 lvx v31,$x00,$key1 2486 ?vperm $rndkey0,$rndkey0,v30,$keyperm 2487 addi $key_,$sp,$FRAME+15 2488 mtctr $rounds 2489 2490Load_xts_enc_key: 2491 ?vperm v24,v30,v31,$keyperm 2492 lvx v30,$x10,$key1 2493 addi $key1,$key1,0x20 2494 stvx v24,$x00,$key_ # off-load round[1] 2495 ?vperm v25,v31,v30,$keyperm 2496 lvx v31,$x00,$key1 2497 stvx v25,$x10,$key_ # off-load round[2] 2498 addi $key_,$key_,0x20 2499 bdnz Load_xts_enc_key 2500 2501 lvx v26,$x10,$key1 2502 ?vperm v24,v30,v31,$keyperm 2503 lvx v27,$x20,$key1 2504 stvx v24,$x00,$key_ # off-load round[3] 2505 ?vperm v25,v31,v26,$keyperm 2506 lvx v28,$x30,$key1 2507 stvx v25,$x10,$key_ # off-load round[4] 2508 addi $key_,$sp,$FRAME+15 # rewind $key_ 2509 ?vperm v26,v26,v27,$keyperm 2510 lvx v29,$x40,$key1 2511 ?vperm v27,v27,v28,$keyperm 2512 lvx v30,$x50,$key1 2513 ?vperm v28,v28,v29,$keyperm 2514 lvx v31,$x60,$key1 2515 ?vperm v29,v29,v30,$keyperm 2516 lvx $twk5,$x70,$key1 # borrow $twk5 2517 ?vperm v30,v30,v31,$keyperm 2518 lvx v24,$x00,$key_ # pre-load round[1] 2519 ?vperm v31,v31,$twk5,$keyperm 2520 lvx v25,$x10,$key_ # pre-load round[2] 2521 2522 vperm $in0,$inout,$inptail,$inpperm 2523 subi $inp,$inp,31 # undo "caller" 2524 vxor $twk0,$tweak,$rndkey0 2525 vsrab $tmp,$tweak,$seven # next tweak value 2526 vaddubm $tweak,$tweak,$tweak 2527 vsldoi $tmp,$tmp,$tmp,15 2528 vand $tmp,$tmp,$eighty7 2529 vxor $out0,$in0,$twk0 2530 vxor $tweak,$tweak,$tmp 2531 2532 lvx_u $in1,$x10,$inp 2533 vxor $twk1,$tweak,$rndkey0 2534 vsrab $tmp,$tweak,$seven # next tweak value 2535 vaddubm $tweak,$tweak,$tweak 2536 vsldoi $tmp,$tmp,$tmp,15 2537 le?vperm $in1,$in1,$in1,$leperm 2538 vand $tmp,$tmp,$eighty7 2539 vxor $out1,$in1,$twk1 2540 vxor $tweak,$tweak,$tmp 2541 2542 lvx_u $in2,$x20,$inp 2543 andi. $taillen,$len,15 2544 vxor $twk2,$tweak,$rndkey0 2545 vsrab $tmp,$tweak,$seven # next tweak value 2546 vaddubm $tweak,$tweak,$tweak 2547 vsldoi $tmp,$tmp,$tmp,15 2548 le?vperm $in2,$in2,$in2,$leperm 2549 vand $tmp,$tmp,$eighty7 2550 vxor $out2,$in2,$twk2 2551 vxor $tweak,$tweak,$tmp 2552 2553 lvx_u $in3,$x30,$inp 2554 sub $len,$len,$taillen 2555 vxor $twk3,$tweak,$rndkey0 2556 vsrab $tmp,$tweak,$seven # next tweak value 2557 vaddubm $tweak,$tweak,$tweak 2558 vsldoi $tmp,$tmp,$tmp,15 2559 le?vperm $in3,$in3,$in3,$leperm 2560 vand $tmp,$tmp,$eighty7 2561 vxor $out3,$in3,$twk3 2562 vxor $tweak,$tweak,$tmp 2563 2564 lvx_u $in4,$x40,$inp 2565 subi $len,$len,0x60 2566 vxor $twk4,$tweak,$rndkey0 2567 vsrab $tmp,$tweak,$seven # next tweak value 2568 vaddubm $tweak,$tweak,$tweak 2569 vsldoi $tmp,$tmp,$tmp,15 2570 le?vperm $in4,$in4,$in4,$leperm 2571 vand $tmp,$tmp,$eighty7 2572 vxor $out4,$in4,$twk4 2573 vxor $tweak,$tweak,$tmp 2574 2575 lvx_u $in5,$x50,$inp 2576 addi $inp,$inp,0x60 2577 vxor $twk5,$tweak,$rndkey0 2578 vsrab $tmp,$tweak,$seven # next tweak value 2579 vaddubm $tweak,$tweak,$tweak 2580 vsldoi $tmp,$tmp,$tmp,15 2581 le?vperm $in5,$in5,$in5,$leperm 2582 vand $tmp,$tmp,$eighty7 2583 vxor $out5,$in5,$twk5 2584 vxor $tweak,$tweak,$tmp 2585 2586 vxor v31,v31,$rndkey0 2587 mtctr $rounds 2588 b Loop_xts_enc6x 2589 2590.align 5 2591Loop_xts_enc6x: 2592 vcipher $out0,$out0,v24 2593 vcipher $out1,$out1,v24 2594 vcipher $out2,$out2,v24 2595 vcipher $out3,$out3,v24 2596 vcipher $out4,$out4,v24 2597 vcipher $out5,$out5,v24 2598 lvx v24,$x20,$key_ # round[3] 2599 addi $key_,$key_,0x20 2600 2601 vcipher $out0,$out0,v25 2602 vcipher $out1,$out1,v25 2603 vcipher $out2,$out2,v25 2604 vcipher $out3,$out3,v25 2605 vcipher $out4,$out4,v25 2606 vcipher $out5,$out5,v25 2607 lvx v25,$x10,$key_ # round[4] 2608 bdnz Loop_xts_enc6x 2609 2610 subic $len,$len,96 # $len-=96 2611 vxor $in0,$twk0,v31 # xor with last round key 2612 vcipher $out0,$out0,v24 2613 vcipher $out1,$out1,v24 2614 vsrab $tmp,$tweak,$seven # next tweak value 2615 vxor $twk0,$tweak,$rndkey0 2616 vaddubm $tweak,$tweak,$tweak 2617 vcipher $out2,$out2,v24 2618 vcipher $out3,$out3,v24 2619 vsldoi $tmp,$tmp,$tmp,15 2620 vcipher $out4,$out4,v24 2621 vcipher $out5,$out5,v24 2622 2623 subfe. r0,r0,r0 # borrow?-1:0 2624 vand $tmp,$tmp,$eighty7 2625 vcipher $out0,$out0,v25 2626 vcipher $out1,$out1,v25 2627 vxor $tweak,$tweak,$tmp 2628 vcipher $out2,$out2,v25 2629 vcipher $out3,$out3,v25 2630 vxor $in1,$twk1,v31 2631 vsrab $tmp,$tweak,$seven # next tweak value 2632 vxor $twk1,$tweak,$rndkey0 2633 vcipher $out4,$out4,v25 2634 vcipher $out5,$out5,v25 2635 2636 and r0,r0,$len 2637 vaddubm $tweak,$tweak,$tweak 2638 vsldoi $tmp,$tmp,$tmp,15 2639 vcipher $out0,$out0,v26 2640 vcipher $out1,$out1,v26 2641 vand $tmp,$tmp,$eighty7 2642 vcipher $out2,$out2,v26 2643 vcipher $out3,$out3,v26 2644 vxor $tweak,$tweak,$tmp 2645 vcipher $out4,$out4,v26 2646 vcipher $out5,$out5,v26 2647 2648 add $inp,$inp,r0 # $inp is adjusted in such 2649 # way that at exit from the 2650 # loop inX-in5 are loaded 2651 # with last "words" 2652 vxor $in2,$twk2,v31 2653 vsrab $tmp,$tweak,$seven # next tweak value 2654 vxor $twk2,$tweak,$rndkey0 2655 vaddubm $tweak,$tweak,$tweak 2656 vcipher $out0,$out0,v27 2657 vcipher $out1,$out1,v27 2658 vsldoi $tmp,$tmp,$tmp,15 2659 vcipher $out2,$out2,v27 2660 vcipher $out3,$out3,v27 2661 vand $tmp,$tmp,$eighty7 2662 vcipher $out4,$out4,v27 2663 vcipher $out5,$out5,v27 2664 2665 addi $key_,$sp,$FRAME+15 # rewind $key_ 2666 vxor $tweak,$tweak,$tmp 2667 vcipher $out0,$out0,v28 2668 vcipher $out1,$out1,v28 2669 vxor $in3,$twk3,v31 2670 vsrab $tmp,$tweak,$seven # next tweak value 2671 vxor $twk3,$tweak,$rndkey0 2672 vcipher $out2,$out2,v28 2673 vcipher $out3,$out3,v28 2674 vaddubm $tweak,$tweak,$tweak 2675 vsldoi $tmp,$tmp,$tmp,15 2676 vcipher $out4,$out4,v28 2677 vcipher $out5,$out5,v28 2678 lvx v24,$x00,$key_ # re-pre-load round[1] 2679 vand $tmp,$tmp,$eighty7 2680 2681 vcipher $out0,$out0,v29 2682 vcipher $out1,$out1,v29 2683 vxor $tweak,$tweak,$tmp 2684 vcipher $out2,$out2,v29 2685 vcipher $out3,$out3,v29 2686 vxor $in4,$twk4,v31 2687 vsrab $tmp,$tweak,$seven # next tweak value 2688 vxor $twk4,$tweak,$rndkey0 2689 vcipher $out4,$out4,v29 2690 vcipher $out5,$out5,v29 2691 lvx v25,$x10,$key_ # re-pre-load round[2] 2692 vaddubm $tweak,$tweak,$tweak 2693 vsldoi $tmp,$tmp,$tmp,15 2694 2695 vcipher $out0,$out0,v30 2696 vcipher $out1,$out1,v30 2697 vand $tmp,$tmp,$eighty7 2698 vcipher $out2,$out2,v30 2699 vcipher $out3,$out3,v30 2700 vxor $tweak,$tweak,$tmp 2701 vcipher $out4,$out4,v30 2702 vcipher $out5,$out5,v30 2703 vxor $in5,$twk5,v31 2704 vsrab $tmp,$tweak,$seven # next tweak value 2705 vxor $twk5,$tweak,$rndkey0 2706 2707 vcipherlast $out0,$out0,$in0 2708 lvx_u $in0,$x00,$inp # load next input block 2709 vaddubm $tweak,$tweak,$tweak 2710 vsldoi $tmp,$tmp,$tmp,15 2711 vcipherlast $out1,$out1,$in1 2712 lvx_u $in1,$x10,$inp 2713 vcipherlast $out2,$out2,$in2 2714 le?vperm $in0,$in0,$in0,$leperm 2715 lvx_u $in2,$x20,$inp 2716 vand $tmp,$tmp,$eighty7 2717 vcipherlast $out3,$out3,$in3 2718 le?vperm $in1,$in1,$in1,$leperm 2719 lvx_u $in3,$x30,$inp 2720 vcipherlast $out4,$out4,$in4 2721 le?vperm $in2,$in2,$in2,$leperm 2722 lvx_u $in4,$x40,$inp 2723 vxor $tweak,$tweak,$tmp 2724 vcipherlast $tmp,$out5,$in5 # last block might be needed 2725 # in stealing mode 2726 le?vperm $in3,$in3,$in3,$leperm 2727 lvx_u $in5,$x50,$inp 2728 addi $inp,$inp,0x60 2729 le?vperm $in4,$in4,$in4,$leperm 2730 le?vperm $in5,$in5,$in5,$leperm 2731 2732 le?vperm $out0,$out0,$out0,$leperm 2733 le?vperm $out1,$out1,$out1,$leperm 2734 stvx_u $out0,$x00,$out # store output 2735 vxor $out0,$in0,$twk0 2736 le?vperm $out2,$out2,$out2,$leperm 2737 stvx_u $out1,$x10,$out 2738 vxor $out1,$in1,$twk1 2739 le?vperm $out3,$out3,$out3,$leperm 2740 stvx_u $out2,$x20,$out 2741 vxor $out2,$in2,$twk2 2742 le?vperm $out4,$out4,$out4,$leperm 2743 stvx_u $out3,$x30,$out 2744 vxor $out3,$in3,$twk3 2745 le?vperm $out5,$tmp,$tmp,$leperm 2746 stvx_u $out4,$x40,$out 2747 vxor $out4,$in4,$twk4 2748 le?stvx_u $out5,$x50,$out 2749 be?stvx_u $tmp, $x50,$out 2750 vxor $out5,$in5,$twk5 2751 addi $out,$out,0x60 2752 2753 mtctr $rounds 2754 beq Loop_xts_enc6x # did $len-=96 borrow? 2755 2756 addic. $len,$len,0x60 2757 beq Lxts_enc6x_zero 2758 cmpwi $len,0x20 2759 blt Lxts_enc6x_one 2760 nop 2761 beq Lxts_enc6x_two 2762 cmpwi $len,0x40 2763 blt Lxts_enc6x_three 2764 nop 2765 beq Lxts_enc6x_four 2766 2767Lxts_enc6x_five: 2768 vxor $out0,$in1,$twk0 2769 vxor $out1,$in2,$twk1 2770 vxor $out2,$in3,$twk2 2771 vxor $out3,$in4,$twk3 2772 vxor $out4,$in5,$twk4 2773 2774 bl _aesp8_xts_enc5x 2775 2776 le?vperm $out0,$out0,$out0,$leperm 2777 vmr $twk0,$twk5 # unused tweak 2778 le?vperm $out1,$out1,$out1,$leperm 2779 stvx_u $out0,$x00,$out # store output 2780 le?vperm $out2,$out2,$out2,$leperm 2781 stvx_u $out1,$x10,$out 2782 le?vperm $out3,$out3,$out3,$leperm 2783 stvx_u $out2,$x20,$out 2784 vxor $tmp,$out4,$twk5 # last block prep for stealing 2785 le?vperm $out4,$out4,$out4,$leperm 2786 stvx_u $out3,$x30,$out 2787 stvx_u $out4,$x40,$out 2788 addi $out,$out,0x50 2789 bne Lxts_enc6x_steal 2790 b Lxts_enc6x_done 2791 2792.align 4 2793Lxts_enc6x_four: 2794 vxor $out0,$in2,$twk0 2795 vxor $out1,$in3,$twk1 2796 vxor $out2,$in4,$twk2 2797 vxor $out3,$in5,$twk3 2798 vxor $out4,$out4,$out4 2799 2800 bl _aesp8_xts_enc5x 2801 2802 le?vperm $out0,$out0,$out0,$leperm 2803 vmr $twk0,$twk4 # unused tweak 2804 le?vperm $out1,$out1,$out1,$leperm 2805 stvx_u $out0,$x00,$out # store output 2806 le?vperm $out2,$out2,$out2,$leperm 2807 stvx_u $out1,$x10,$out 2808 vxor $tmp,$out3,$twk4 # last block prep for stealing 2809 le?vperm $out3,$out3,$out3,$leperm 2810 stvx_u $out2,$x20,$out 2811 stvx_u $out3,$x30,$out 2812 addi $out,$out,0x40 2813 bne Lxts_enc6x_steal 2814 b Lxts_enc6x_done 2815 2816.align 4 2817Lxts_enc6x_three: 2818 vxor $out0,$in3,$twk0 2819 vxor $out1,$in4,$twk1 2820 vxor $out2,$in5,$twk2 2821 vxor $out3,$out3,$out3 2822 vxor $out4,$out4,$out4 2823 2824 bl _aesp8_xts_enc5x 2825 2826 le?vperm $out0,$out0,$out0,$leperm 2827 vmr $twk0,$twk3 # unused tweak 2828 le?vperm $out1,$out1,$out1,$leperm 2829 stvx_u $out0,$x00,$out # store output 2830 vxor $tmp,$out2,$twk3 # last block prep for stealing 2831 le?vperm $out2,$out2,$out2,$leperm 2832 stvx_u $out1,$x10,$out 2833 stvx_u $out2,$x20,$out 2834 addi $out,$out,0x30 2835 bne Lxts_enc6x_steal 2836 b Lxts_enc6x_done 2837 2838.align 4 2839Lxts_enc6x_two: 2840 vxor $out0,$in4,$twk0 2841 vxor $out1,$in5,$twk1 2842 vxor $out2,$out2,$out2 2843 vxor $out3,$out3,$out3 2844 vxor $out4,$out4,$out4 2845 2846 bl _aesp8_xts_enc5x 2847 2848 le?vperm $out0,$out0,$out0,$leperm 2849 vmr $twk0,$twk2 # unused tweak 2850 vxor $tmp,$out1,$twk2 # last block prep for stealing 2851 le?vperm $out1,$out1,$out1,$leperm 2852 stvx_u $out0,$x00,$out # store output 2853 stvx_u $out1,$x10,$out 2854 addi $out,$out,0x20 2855 bne Lxts_enc6x_steal 2856 b Lxts_enc6x_done 2857 2858.align 4 2859Lxts_enc6x_one: 2860 vxor $out0,$in5,$twk0 2861 nop 2862Loop_xts_enc1x: 2863 vcipher $out0,$out0,v24 2864 lvx v24,$x20,$key_ # round[3] 2865 addi $key_,$key_,0x20 2866 2867 vcipher $out0,$out0,v25 2868 lvx v25,$x10,$key_ # round[4] 2869 bdnz Loop_xts_enc1x 2870 2871 add $inp,$inp,$taillen 2872 cmpwi $taillen,0 2873 vcipher $out0,$out0,v24 2874 2875 subi $inp,$inp,16 2876 vcipher $out0,$out0,v25 2877 2878 lvsr $inpperm,0,$taillen 2879 vcipher $out0,$out0,v26 2880 2881 lvx_u $in0,0,$inp 2882 vcipher $out0,$out0,v27 2883 2884 addi $key_,$sp,$FRAME+15 # rewind $key_ 2885 vcipher $out0,$out0,v28 2886 lvx v24,$x00,$key_ # re-pre-load round[1] 2887 2888 vcipher $out0,$out0,v29 2889 lvx v25,$x10,$key_ # re-pre-load round[2] 2890 vxor $twk0,$twk0,v31 2891 2892 le?vperm $in0,$in0,$in0,$leperm 2893 vcipher $out0,$out0,v30 2894 2895 vperm $in0,$in0,$in0,$inpperm 2896 vcipherlast $out0,$out0,$twk0 2897 2898 vmr $twk0,$twk1 # unused tweak 2899 vxor $tmp,$out0,$twk1 # last block prep for stealing 2900 le?vperm $out0,$out0,$out0,$leperm 2901 stvx_u $out0,$x00,$out # store output 2902 addi $out,$out,0x10 2903 bne Lxts_enc6x_steal 2904 b Lxts_enc6x_done 2905 2906.align 4 2907Lxts_enc6x_zero: 2908 cmpwi $taillen,0 2909 beq Lxts_enc6x_done 2910 2911 add $inp,$inp,$taillen 2912 subi $inp,$inp,16 2913 lvx_u $in0,0,$inp 2914 lvsr $inpperm,0,$taillen # $in5 is no more 2915 le?vperm $in0,$in0,$in0,$leperm 2916 vperm $in0,$in0,$in0,$inpperm 2917 vxor $tmp,$tmp,$twk0 2918Lxts_enc6x_steal: 2919 vxor $in0,$in0,$twk0 2920 vxor $out0,$out0,$out0 2921 vspltisb $out1,-1 2922 vperm $out0,$out0,$out1,$inpperm 2923 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? 2924 2925 subi r30,$out,17 2926 subi $out,$out,16 2927 mtctr $taillen 2928Loop_xts_enc6x_steal: 2929 lbzu r0,1(r30) 2930 stb r0,16(r30) 2931 bdnz Loop_xts_enc6x_steal 2932 2933 li $taillen,0 2934 mtctr $rounds 2935 b Loop_xts_enc1x # one more time... 2936 2937.align 4 2938Lxts_enc6x_done: 2939 ${UCMP}i $ivp,0 2940 beq Lxts_enc6x_ret 2941 2942 vxor $tweak,$twk0,$rndkey0 2943 le?vperm $tweak,$tweak,$tweak,$leperm 2944 stvx_u $tweak,0,$ivp 2945 2946Lxts_enc6x_ret: 2947 mtlr r11 2948 li r10,`$FRAME+15` 2949 li r11,`$FRAME+31` 2950 stvx $seven,r10,$sp # wipe copies of round keys 2951 addi r10,r10,32 2952 stvx $seven,r11,$sp 2953 addi r11,r11,32 2954 stvx $seven,r10,$sp 2955 addi r10,r10,32 2956 stvx $seven,r11,$sp 2957 addi r11,r11,32 2958 stvx $seven,r10,$sp 2959 addi r10,r10,32 2960 stvx $seven,r11,$sp 2961 addi r11,r11,32 2962 stvx $seven,r10,$sp 2963 addi r10,r10,32 2964 stvx $seven,r11,$sp 2965 addi r11,r11,32 2966 2967 mtspr 256,$vrsave 2968 lvx v20,r10,$sp # ABI says so 2969 addi r10,r10,32 2970 lvx v21,r11,$sp 2971 addi r11,r11,32 2972 lvx v22,r10,$sp 2973 addi r10,r10,32 2974 lvx v23,r11,$sp 2975 addi r11,r11,32 2976 lvx v24,r10,$sp 2977 addi r10,r10,32 2978 lvx v25,r11,$sp 2979 addi r11,r11,32 2980 lvx v26,r10,$sp 2981 addi r10,r10,32 2982 lvx v27,r11,$sp 2983 addi r11,r11,32 2984 lvx v28,r10,$sp 2985 addi r10,r10,32 2986 lvx v29,r11,$sp 2987 addi r11,r11,32 2988 lvx v30,r10,$sp 2989 lvx v31,r11,$sp 2990 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2991 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2992 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2993 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2994 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2995 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2996 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 2997 blr 2998 .long 0 2999 .byte 0,12,0x04,1,0x80,6,6,0 3000 .long 0 3001 3002.align 5 3003_aesp8_xts_enc5x: 3004 vcipher $out0,$out0,v24 3005 vcipher $out1,$out1,v24 3006 vcipher $out2,$out2,v24 3007 vcipher $out3,$out3,v24 3008 vcipher $out4,$out4,v24 3009 lvx v24,$x20,$key_ # round[3] 3010 addi $key_,$key_,0x20 3011 3012 vcipher $out0,$out0,v25 3013 vcipher $out1,$out1,v25 3014 vcipher $out2,$out2,v25 3015 vcipher $out3,$out3,v25 3016 vcipher $out4,$out4,v25 3017 lvx v25,$x10,$key_ # round[4] 3018 bdnz _aesp8_xts_enc5x 3019 3020 add $inp,$inp,$taillen 3021 cmpwi $taillen,0 3022 vcipher $out0,$out0,v24 3023 vcipher $out1,$out1,v24 3024 vcipher $out2,$out2,v24 3025 vcipher $out3,$out3,v24 3026 vcipher $out4,$out4,v24 3027 3028 subi $inp,$inp,16 3029 vcipher $out0,$out0,v25 3030 vcipher $out1,$out1,v25 3031 vcipher $out2,$out2,v25 3032 vcipher $out3,$out3,v25 3033 vcipher $out4,$out4,v25 3034 vxor $twk0,$twk0,v31 3035 3036 vcipher $out0,$out0,v26 3037 lvsr $inpperm,r0,$taillen # $in5 is no more 3038 vcipher $out1,$out1,v26 3039 vcipher $out2,$out2,v26 3040 vcipher $out3,$out3,v26 3041 vcipher $out4,$out4,v26 3042 vxor $in1,$twk1,v31 3043 3044 vcipher $out0,$out0,v27 3045 lvx_u $in0,0,$inp 3046 vcipher $out1,$out1,v27 3047 vcipher $out2,$out2,v27 3048 vcipher $out3,$out3,v27 3049 vcipher $out4,$out4,v27 3050 vxor $in2,$twk2,v31 3051 3052 addi $key_,$sp,$FRAME+15 # rewind $key_ 3053 vcipher $out0,$out0,v28 3054 vcipher $out1,$out1,v28 3055 vcipher $out2,$out2,v28 3056 vcipher $out3,$out3,v28 3057 vcipher $out4,$out4,v28 3058 lvx v24,$x00,$key_ # re-pre-load round[1] 3059 vxor $in3,$twk3,v31 3060 3061 vcipher $out0,$out0,v29 3062 le?vperm $in0,$in0,$in0,$leperm 3063 vcipher $out1,$out1,v29 3064 vcipher $out2,$out2,v29 3065 vcipher $out3,$out3,v29 3066 vcipher $out4,$out4,v29 3067 lvx v25,$x10,$key_ # re-pre-load round[2] 3068 vxor $in4,$twk4,v31 3069 3070 vcipher $out0,$out0,v30 3071 vperm $in0,$in0,$in0,$inpperm 3072 vcipher $out1,$out1,v30 3073 vcipher $out2,$out2,v30 3074 vcipher $out3,$out3,v30 3075 vcipher $out4,$out4,v30 3076 3077 vcipherlast $out0,$out0,$twk0 3078 vcipherlast $out1,$out1,$in1 3079 vcipherlast $out2,$out2,$in2 3080 vcipherlast $out3,$out3,$in3 3081 vcipherlast $out4,$out4,$in4 3082 blr 3083 .long 0 3084 .byte 0,12,0x14,0,0,0,0,0 3085 3086.align 5 3087_aesp8_xts_decrypt6x: 3088 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 3089 mflr r11 3090 li r7,`$FRAME+8*16+15` 3091 li r3,`$FRAME+8*16+31` 3092 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 3093 stvx v20,r7,$sp # ABI says so 3094 addi r7,r7,32 3095 stvx v21,r3,$sp 3096 addi r3,r3,32 3097 stvx v22,r7,$sp 3098 addi r7,r7,32 3099 stvx v23,r3,$sp 3100 addi r3,r3,32 3101 stvx v24,r7,$sp 3102 addi r7,r7,32 3103 stvx v25,r3,$sp 3104 addi r3,r3,32 3105 stvx v26,r7,$sp 3106 addi r7,r7,32 3107 stvx v27,r3,$sp 3108 addi r3,r3,32 3109 stvx v28,r7,$sp 3110 addi r7,r7,32 3111 stvx v29,r3,$sp 3112 addi r3,r3,32 3113 stvx v30,r7,$sp 3114 stvx v31,r3,$sp 3115 li r0,-1 3116 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 3117 li $x10,0x10 3118 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3119 li $x20,0x20 3120 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3121 li $x30,0x30 3122 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3123 li $x40,0x40 3124 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3125 li $x50,0x50 3126 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3127 li $x60,0x60 3128 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3129 li $x70,0x70 3130 mtspr 256,r0 3131 3132 subi $rounds,$rounds,3 # -4 in total 3133 3134 lvx $rndkey0,$x00,$key1 # load key schedule 3135 lvx v30,$x10,$key1 3136 addi $key1,$key1,0x20 3137 lvx v31,$x00,$key1 3138 ?vperm $rndkey0,$rndkey0,v30,$keyperm 3139 addi $key_,$sp,$FRAME+15 3140 mtctr $rounds 3141 3142Load_xts_dec_key: 3143 ?vperm v24,v30,v31,$keyperm 3144 lvx v30,$x10,$key1 3145 addi $key1,$key1,0x20 3146 stvx v24,$x00,$key_ # off-load round[1] 3147 ?vperm v25,v31,v30,$keyperm 3148 lvx v31,$x00,$key1 3149 stvx v25,$x10,$key_ # off-load round[2] 3150 addi $key_,$key_,0x20 3151 bdnz Load_xts_dec_key 3152 3153 lvx v26,$x10,$key1 3154 ?vperm v24,v30,v31,$keyperm 3155 lvx v27,$x20,$key1 3156 stvx v24,$x00,$key_ # off-load round[3] 3157 ?vperm v25,v31,v26,$keyperm 3158 lvx v28,$x30,$key1 3159 stvx v25,$x10,$key_ # off-load round[4] 3160 addi $key_,$sp,$FRAME+15 # rewind $key_ 3161 ?vperm v26,v26,v27,$keyperm 3162 lvx v29,$x40,$key1 3163 ?vperm v27,v27,v28,$keyperm 3164 lvx v30,$x50,$key1 3165 ?vperm v28,v28,v29,$keyperm 3166 lvx v31,$x60,$key1 3167 ?vperm v29,v29,v30,$keyperm 3168 lvx $twk5,$x70,$key1 # borrow $twk5 3169 ?vperm v30,v30,v31,$keyperm 3170 lvx v24,$x00,$key_ # pre-load round[1] 3171 ?vperm v31,v31,$twk5,$keyperm 3172 lvx v25,$x10,$key_ # pre-load round[2] 3173 3174 vperm $in0,$inout,$inptail,$inpperm 3175 subi $inp,$inp,31 # undo "caller" 3176 vxor $twk0,$tweak,$rndkey0 3177 vsrab $tmp,$tweak,$seven # next tweak value 3178 vaddubm $tweak,$tweak,$tweak 3179 vsldoi $tmp,$tmp,$tmp,15 3180 vand $tmp,$tmp,$eighty7 3181 vxor $out0,$in0,$twk0 3182 vxor $tweak,$tweak,$tmp 3183 3184 lvx_u $in1,$x10,$inp 3185 vxor $twk1,$tweak,$rndkey0 3186 vsrab $tmp,$tweak,$seven # next tweak value 3187 vaddubm $tweak,$tweak,$tweak 3188 vsldoi $tmp,$tmp,$tmp,15 3189 le?vperm $in1,$in1,$in1,$leperm 3190 vand $tmp,$tmp,$eighty7 3191 vxor $out1,$in1,$twk1 3192 vxor $tweak,$tweak,$tmp 3193 3194 lvx_u $in2,$x20,$inp 3195 andi. $taillen,$len,15 3196 vxor $twk2,$tweak,$rndkey0 3197 vsrab $tmp,$tweak,$seven # next tweak value 3198 vaddubm $tweak,$tweak,$tweak 3199 vsldoi $tmp,$tmp,$tmp,15 3200 le?vperm $in2,$in2,$in2,$leperm 3201 vand $tmp,$tmp,$eighty7 3202 vxor $out2,$in2,$twk2 3203 vxor $tweak,$tweak,$tmp 3204 3205 lvx_u $in3,$x30,$inp 3206 sub $len,$len,$taillen 3207 vxor $twk3,$tweak,$rndkey0 3208 vsrab $tmp,$tweak,$seven # next tweak value 3209 vaddubm $tweak,$tweak,$tweak 3210 vsldoi $tmp,$tmp,$tmp,15 3211 le?vperm $in3,$in3,$in3,$leperm 3212 vand $tmp,$tmp,$eighty7 3213 vxor $out3,$in3,$twk3 3214 vxor $tweak,$tweak,$tmp 3215 3216 lvx_u $in4,$x40,$inp 3217 subi $len,$len,0x60 3218 vxor $twk4,$tweak,$rndkey0 3219 vsrab $tmp,$tweak,$seven # next tweak value 3220 vaddubm $tweak,$tweak,$tweak 3221 vsldoi $tmp,$tmp,$tmp,15 3222 le?vperm $in4,$in4,$in4,$leperm 3223 vand $tmp,$tmp,$eighty7 3224 vxor $out4,$in4,$twk4 3225 vxor $tweak,$tweak,$tmp 3226 3227 lvx_u $in5,$x50,$inp 3228 addi $inp,$inp,0x60 3229 vxor $twk5,$tweak,$rndkey0 3230 vsrab $tmp,$tweak,$seven # next tweak value 3231 vaddubm $tweak,$tweak,$tweak 3232 vsldoi $tmp,$tmp,$tmp,15 3233 le?vperm $in5,$in5,$in5,$leperm 3234 vand $tmp,$tmp,$eighty7 3235 vxor $out5,$in5,$twk5 3236 vxor $tweak,$tweak,$tmp 3237 3238 vxor v31,v31,$rndkey0 3239 mtctr $rounds 3240 b Loop_xts_dec6x 3241 3242.align 5 3243Loop_xts_dec6x: 3244 vncipher $out0,$out0,v24 3245 vncipher $out1,$out1,v24 3246 vncipher $out2,$out2,v24 3247 vncipher $out3,$out3,v24 3248 vncipher $out4,$out4,v24 3249 vncipher $out5,$out5,v24 3250 lvx v24,$x20,$key_ # round[3] 3251 addi $key_,$key_,0x20 3252 3253 vncipher $out0,$out0,v25 3254 vncipher $out1,$out1,v25 3255 vncipher $out2,$out2,v25 3256 vncipher $out3,$out3,v25 3257 vncipher $out4,$out4,v25 3258 vncipher $out5,$out5,v25 3259 lvx v25,$x10,$key_ # round[4] 3260 bdnz Loop_xts_dec6x 3261 3262 subic $len,$len,96 # $len-=96 3263 vxor $in0,$twk0,v31 # xor with last round key 3264 vncipher $out0,$out0,v24 3265 vncipher $out1,$out1,v24 3266 vsrab $tmp,$tweak,$seven # next tweak value 3267 vxor $twk0,$tweak,$rndkey0 3268 vaddubm $tweak,$tweak,$tweak 3269 vncipher $out2,$out2,v24 3270 vncipher $out3,$out3,v24 3271 vsldoi $tmp,$tmp,$tmp,15 3272 vncipher $out4,$out4,v24 3273 vncipher $out5,$out5,v24 3274 3275 subfe. r0,r0,r0 # borrow?-1:0 3276 vand $tmp,$tmp,$eighty7 3277 vncipher $out0,$out0,v25 3278 vncipher $out1,$out1,v25 3279 vxor $tweak,$tweak,$tmp 3280 vncipher $out2,$out2,v25 3281 vncipher $out3,$out3,v25 3282 vxor $in1,$twk1,v31 3283 vsrab $tmp,$tweak,$seven # next tweak value 3284 vxor $twk1,$tweak,$rndkey0 3285 vncipher $out4,$out4,v25 3286 vncipher $out5,$out5,v25 3287 3288 and r0,r0,$len 3289 vaddubm $tweak,$tweak,$tweak 3290 vsldoi $tmp,$tmp,$tmp,15 3291 vncipher $out0,$out0,v26 3292 vncipher $out1,$out1,v26 3293 vand $tmp,$tmp,$eighty7 3294 vncipher $out2,$out2,v26 3295 vncipher $out3,$out3,v26 3296 vxor $tweak,$tweak,$tmp 3297 vncipher $out4,$out4,v26 3298 vncipher $out5,$out5,v26 3299 3300 add $inp,$inp,r0 # $inp is adjusted in such 3301 # way that at exit from the 3302 # loop inX-in5 are loaded 3303 # with last "words" 3304 vxor $in2,$twk2,v31 3305 vsrab $tmp,$tweak,$seven # next tweak value 3306 vxor $twk2,$tweak,$rndkey0 3307 vaddubm $tweak,$tweak,$tweak 3308 vncipher $out0,$out0,v27 3309 vncipher $out1,$out1,v27 3310 vsldoi $tmp,$tmp,$tmp,15 3311 vncipher $out2,$out2,v27 3312 vncipher $out3,$out3,v27 3313 vand $tmp,$tmp,$eighty7 3314 vncipher $out4,$out4,v27 3315 vncipher $out5,$out5,v27 3316 3317 addi $key_,$sp,$FRAME+15 # rewind $key_ 3318 vxor $tweak,$tweak,$tmp 3319 vncipher $out0,$out0,v28 3320 vncipher $out1,$out1,v28 3321 vxor $in3,$twk3,v31 3322 vsrab $tmp,$tweak,$seven # next tweak value 3323 vxor $twk3,$tweak,$rndkey0 3324 vncipher $out2,$out2,v28 3325 vncipher $out3,$out3,v28 3326 vaddubm $tweak,$tweak,$tweak 3327 vsldoi $tmp,$tmp,$tmp,15 3328 vncipher $out4,$out4,v28 3329 vncipher $out5,$out5,v28 3330 lvx v24,$x00,$key_ # re-pre-load round[1] 3331 vand $tmp,$tmp,$eighty7 3332 3333 vncipher $out0,$out0,v29 3334 vncipher $out1,$out1,v29 3335 vxor $tweak,$tweak,$tmp 3336 vncipher $out2,$out2,v29 3337 vncipher $out3,$out3,v29 3338 vxor $in4,$twk4,v31 3339 vsrab $tmp,$tweak,$seven # next tweak value 3340 vxor $twk4,$tweak,$rndkey0 3341 vncipher $out4,$out4,v29 3342 vncipher $out5,$out5,v29 3343 lvx v25,$x10,$key_ # re-pre-load round[2] 3344 vaddubm $tweak,$tweak,$tweak 3345 vsldoi $tmp,$tmp,$tmp,15 3346 3347 vncipher $out0,$out0,v30 3348 vncipher $out1,$out1,v30 3349 vand $tmp,$tmp,$eighty7 3350 vncipher $out2,$out2,v30 3351 vncipher $out3,$out3,v30 3352 vxor $tweak,$tweak,$tmp 3353 vncipher $out4,$out4,v30 3354 vncipher $out5,$out5,v30 3355 vxor $in5,$twk5,v31 3356 vsrab $tmp,$tweak,$seven # next tweak value 3357 vxor $twk5,$tweak,$rndkey0 3358 3359 vncipherlast $out0,$out0,$in0 3360 lvx_u $in0,$x00,$inp # load next input block 3361 vaddubm $tweak,$tweak,$tweak 3362 vsldoi $tmp,$tmp,$tmp,15 3363 vncipherlast $out1,$out1,$in1 3364 lvx_u $in1,$x10,$inp 3365 vncipherlast $out2,$out2,$in2 3366 le?vperm $in0,$in0,$in0,$leperm 3367 lvx_u $in2,$x20,$inp 3368 vand $tmp,$tmp,$eighty7 3369 vncipherlast $out3,$out3,$in3 3370 le?vperm $in1,$in1,$in1,$leperm 3371 lvx_u $in3,$x30,$inp 3372 vncipherlast $out4,$out4,$in4 3373 le?vperm $in2,$in2,$in2,$leperm 3374 lvx_u $in4,$x40,$inp 3375 vxor $tweak,$tweak,$tmp 3376 vncipherlast $out5,$out5,$in5 3377 le?vperm $in3,$in3,$in3,$leperm 3378 lvx_u $in5,$x50,$inp 3379 addi $inp,$inp,0x60 3380 le?vperm $in4,$in4,$in4,$leperm 3381 le?vperm $in5,$in5,$in5,$leperm 3382 3383 le?vperm $out0,$out0,$out0,$leperm 3384 le?vperm $out1,$out1,$out1,$leperm 3385 stvx_u $out0,$x00,$out # store output 3386 vxor $out0,$in0,$twk0 3387 le?vperm $out2,$out2,$out2,$leperm 3388 stvx_u $out1,$x10,$out 3389 vxor $out1,$in1,$twk1 3390 le?vperm $out3,$out3,$out3,$leperm 3391 stvx_u $out2,$x20,$out 3392 vxor $out2,$in2,$twk2 3393 le?vperm $out4,$out4,$out4,$leperm 3394 stvx_u $out3,$x30,$out 3395 vxor $out3,$in3,$twk3 3396 le?vperm $out5,$out5,$out5,$leperm 3397 stvx_u $out4,$x40,$out 3398 vxor $out4,$in4,$twk4 3399 stvx_u $out5,$x50,$out 3400 vxor $out5,$in5,$twk5 3401 addi $out,$out,0x60 3402 3403 mtctr $rounds 3404 beq Loop_xts_dec6x # did $len-=96 borrow? 3405 3406 addic. $len,$len,0x60 3407 beq Lxts_dec6x_zero 3408 cmpwi $len,0x20 3409 blt Lxts_dec6x_one 3410 nop 3411 beq Lxts_dec6x_two 3412 cmpwi $len,0x40 3413 blt Lxts_dec6x_three 3414 nop 3415 beq Lxts_dec6x_four 3416 3417Lxts_dec6x_five: 3418 vxor $out0,$in1,$twk0 3419 vxor $out1,$in2,$twk1 3420 vxor $out2,$in3,$twk2 3421 vxor $out3,$in4,$twk3 3422 vxor $out4,$in5,$twk4 3423 3424 bl _aesp8_xts_dec5x 3425 3426 le?vperm $out0,$out0,$out0,$leperm 3427 vmr $twk0,$twk5 # unused tweak 3428 vxor $twk1,$tweak,$rndkey0 3429 le?vperm $out1,$out1,$out1,$leperm 3430 stvx_u $out0,$x00,$out # store output 3431 vxor $out0,$in0,$twk1 3432 le?vperm $out2,$out2,$out2,$leperm 3433 stvx_u $out1,$x10,$out 3434 le?vperm $out3,$out3,$out3,$leperm 3435 stvx_u $out2,$x20,$out 3436 le?vperm $out4,$out4,$out4,$leperm 3437 stvx_u $out3,$x30,$out 3438 stvx_u $out4,$x40,$out 3439 addi $out,$out,0x50 3440 bne Lxts_dec6x_steal 3441 b Lxts_dec6x_done 3442 3443.align 4 3444Lxts_dec6x_four: 3445 vxor $out0,$in2,$twk0 3446 vxor $out1,$in3,$twk1 3447 vxor $out2,$in4,$twk2 3448 vxor $out3,$in5,$twk3 3449 vxor $out4,$out4,$out4 3450 3451 bl _aesp8_xts_dec5x 3452 3453 le?vperm $out0,$out0,$out0,$leperm 3454 vmr $twk0,$twk4 # unused tweak 3455 vmr $twk1,$twk5 3456 le?vperm $out1,$out1,$out1,$leperm 3457 stvx_u $out0,$x00,$out # store output 3458 vxor $out0,$in0,$twk5 3459 le?vperm $out2,$out2,$out2,$leperm 3460 stvx_u $out1,$x10,$out 3461 le?vperm $out3,$out3,$out3,$leperm 3462 stvx_u $out2,$x20,$out 3463 stvx_u $out3,$x30,$out 3464 addi $out,$out,0x40 3465 bne Lxts_dec6x_steal 3466 b Lxts_dec6x_done 3467 3468.align 4 3469Lxts_dec6x_three: 3470 vxor $out0,$in3,$twk0 3471 vxor $out1,$in4,$twk1 3472 vxor $out2,$in5,$twk2 3473 vxor $out3,$out3,$out3 3474 vxor $out4,$out4,$out4 3475 3476 bl _aesp8_xts_dec5x 3477 3478 le?vperm $out0,$out0,$out0,$leperm 3479 vmr $twk0,$twk3 # unused tweak 3480 vmr $twk1,$twk4 3481 le?vperm $out1,$out1,$out1,$leperm 3482 stvx_u $out0,$x00,$out # store output 3483 vxor $out0,$in0,$twk4 3484 le?vperm $out2,$out2,$out2,$leperm 3485 stvx_u $out1,$x10,$out 3486 stvx_u $out2,$x20,$out 3487 addi $out,$out,0x30 3488 bne Lxts_dec6x_steal 3489 b Lxts_dec6x_done 3490 3491.align 4 3492Lxts_dec6x_two: 3493 vxor $out0,$in4,$twk0 3494 vxor $out1,$in5,$twk1 3495 vxor $out2,$out2,$out2 3496 vxor $out3,$out3,$out3 3497 vxor $out4,$out4,$out4 3498 3499 bl _aesp8_xts_dec5x 3500 3501 le?vperm $out0,$out0,$out0,$leperm 3502 vmr $twk0,$twk2 # unused tweak 3503 vmr $twk1,$twk3 3504 le?vperm $out1,$out1,$out1,$leperm 3505 stvx_u $out0,$x00,$out # store output 3506 vxor $out0,$in0,$twk3 3507 stvx_u $out1,$x10,$out 3508 addi $out,$out,0x20 3509 bne Lxts_dec6x_steal 3510 b Lxts_dec6x_done 3511 3512.align 4 3513Lxts_dec6x_one: 3514 vxor $out0,$in5,$twk0 3515 nop 3516Loop_xts_dec1x: 3517 vncipher $out0,$out0,v24 3518 lvx v24,$x20,$key_ # round[3] 3519 addi $key_,$key_,0x20 3520 3521 vncipher $out0,$out0,v25 3522 lvx v25,$x10,$key_ # round[4] 3523 bdnz Loop_xts_dec1x 3524 3525 subi r0,$taillen,1 3526 vncipher $out0,$out0,v24 3527 3528 andi. r0,r0,16 3529 cmpwi $taillen,0 3530 vncipher $out0,$out0,v25 3531 3532 sub $inp,$inp,r0 3533 vncipher $out0,$out0,v26 3534 3535 lvx_u $in0,0,$inp 3536 vncipher $out0,$out0,v27 3537 3538 addi $key_,$sp,$FRAME+15 # rewind $key_ 3539 vncipher $out0,$out0,v28 3540 lvx v24,$x00,$key_ # re-pre-load round[1] 3541 3542 vncipher $out0,$out0,v29 3543 lvx v25,$x10,$key_ # re-pre-load round[2] 3544 vxor $twk0,$twk0,v31 3545 3546 le?vperm $in0,$in0,$in0,$leperm 3547 vncipher $out0,$out0,v30 3548 3549 mtctr $rounds 3550 vncipherlast $out0,$out0,$twk0 3551 3552 vmr $twk0,$twk1 # unused tweak 3553 vmr $twk1,$twk2 3554 le?vperm $out0,$out0,$out0,$leperm 3555 stvx_u $out0,$x00,$out # store output 3556 addi $out,$out,0x10 3557 vxor $out0,$in0,$twk2 3558 bne Lxts_dec6x_steal 3559 b Lxts_dec6x_done 3560 3561.align 4 3562Lxts_dec6x_zero: 3563 cmpwi $taillen,0 3564 beq Lxts_dec6x_done 3565 3566 lvx_u $in0,0,$inp 3567 le?vperm $in0,$in0,$in0,$leperm 3568 vxor $out0,$in0,$twk1 3569Lxts_dec6x_steal: 3570 vncipher $out0,$out0,v24 3571 lvx v24,$x20,$key_ # round[3] 3572 addi $key_,$key_,0x20 3573 3574 vncipher $out0,$out0,v25 3575 lvx v25,$x10,$key_ # round[4] 3576 bdnz Lxts_dec6x_steal 3577 3578 add $inp,$inp,$taillen 3579 vncipher $out0,$out0,v24 3580 3581 cmpwi $taillen,0 3582 vncipher $out0,$out0,v25 3583 3584 lvx_u $in0,0,$inp 3585 vncipher $out0,$out0,v26 3586 3587 lvsr $inpperm,0,$taillen # $in5 is no more 3588 vncipher $out0,$out0,v27 3589 3590 addi $key_,$sp,$FRAME+15 # rewind $key_ 3591 vncipher $out0,$out0,v28 3592 lvx v24,$x00,$key_ # re-pre-load round[1] 3593 3594 vncipher $out0,$out0,v29 3595 lvx v25,$x10,$key_ # re-pre-load round[2] 3596 vxor $twk1,$twk1,v31 3597 3598 le?vperm $in0,$in0,$in0,$leperm 3599 vncipher $out0,$out0,v30 3600 3601 vperm $in0,$in0,$in0,$inpperm 3602 vncipherlast $tmp,$out0,$twk1 3603 3604 le?vperm $out0,$tmp,$tmp,$leperm 3605 le?stvx_u $out0,0,$out 3606 be?stvx_u $tmp,0,$out 3607 3608 vxor $out0,$out0,$out0 3609 vspltisb $out1,-1 3610 vperm $out0,$out0,$out1,$inpperm 3611 vsel $out0,$in0,$tmp,$out0 3612 vxor $out0,$out0,$twk0 3613 3614 subi r30,$out,1 3615 mtctr $taillen 3616Loop_xts_dec6x_steal: 3617 lbzu r0,1(r30) 3618 stb r0,16(r30) 3619 bdnz Loop_xts_dec6x_steal 3620 3621 li $taillen,0 3622 mtctr $rounds 3623 b Loop_xts_dec1x # one more time... 3624 3625.align 4 3626Lxts_dec6x_done: 3627 ${UCMP}i $ivp,0 3628 beq Lxts_dec6x_ret 3629 3630 vxor $tweak,$twk0,$rndkey0 3631 le?vperm $tweak,$tweak,$tweak,$leperm 3632 stvx_u $tweak,0,$ivp 3633 3634Lxts_dec6x_ret: 3635 mtlr r11 3636 li r10,`$FRAME+15` 3637 li r11,`$FRAME+31` 3638 stvx $seven,r10,$sp # wipe copies of round keys 3639 addi r10,r10,32 3640 stvx $seven,r11,$sp 3641 addi r11,r11,32 3642 stvx $seven,r10,$sp 3643 addi r10,r10,32 3644 stvx $seven,r11,$sp 3645 addi r11,r11,32 3646 stvx $seven,r10,$sp 3647 addi r10,r10,32 3648 stvx $seven,r11,$sp 3649 addi r11,r11,32 3650 stvx $seven,r10,$sp 3651 addi r10,r10,32 3652 stvx $seven,r11,$sp 3653 addi r11,r11,32 3654 3655 mtspr 256,$vrsave 3656 lvx v20,r10,$sp # ABI says so 3657 addi r10,r10,32 3658 lvx v21,r11,$sp 3659 addi r11,r11,32 3660 lvx v22,r10,$sp 3661 addi r10,r10,32 3662 lvx v23,r11,$sp 3663 addi r11,r11,32 3664 lvx v24,r10,$sp 3665 addi r10,r10,32 3666 lvx v25,r11,$sp 3667 addi r11,r11,32 3668 lvx v26,r10,$sp 3669 addi r10,r10,32 3670 lvx v27,r11,$sp 3671 addi r11,r11,32 3672 lvx v28,r10,$sp 3673 addi r10,r10,32 3674 lvx v29,r11,$sp 3675 addi r11,r11,32 3676 lvx v30,r10,$sp 3677 lvx v31,r11,$sp 3678 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3679 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3680 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3681 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3682 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3683 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3684 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3685 blr 3686 .long 0 3687 .byte 0,12,0x04,1,0x80,6,6,0 3688 .long 0 3689 3690.align 5 3691_aesp8_xts_dec5x: 3692 vncipher $out0,$out0,v24 3693 vncipher $out1,$out1,v24 3694 vncipher $out2,$out2,v24 3695 vncipher $out3,$out3,v24 3696 vncipher $out4,$out4,v24 3697 lvx v24,$x20,$key_ # round[3] 3698 addi $key_,$key_,0x20 3699 3700 vncipher $out0,$out0,v25 3701 vncipher $out1,$out1,v25 3702 vncipher $out2,$out2,v25 3703 vncipher $out3,$out3,v25 3704 vncipher $out4,$out4,v25 3705 lvx v25,$x10,$key_ # round[4] 3706 bdnz _aesp8_xts_dec5x 3707 3708 subi r0,$taillen,1 3709 vncipher $out0,$out0,v24 3710 vncipher $out1,$out1,v24 3711 vncipher $out2,$out2,v24 3712 vncipher $out3,$out3,v24 3713 vncipher $out4,$out4,v24 3714 3715 andi. r0,r0,16 3716 cmpwi $taillen,0 3717 vncipher $out0,$out0,v25 3718 vncipher $out1,$out1,v25 3719 vncipher $out2,$out2,v25 3720 vncipher $out3,$out3,v25 3721 vncipher $out4,$out4,v25 3722 vxor $twk0,$twk0,v31 3723 3724 sub $inp,$inp,r0 3725 vncipher $out0,$out0,v26 3726 vncipher $out1,$out1,v26 3727 vncipher $out2,$out2,v26 3728 vncipher $out3,$out3,v26 3729 vncipher $out4,$out4,v26 3730 vxor $in1,$twk1,v31 3731 3732 vncipher $out0,$out0,v27 3733 lvx_u $in0,0,$inp 3734 vncipher $out1,$out1,v27 3735 vncipher $out2,$out2,v27 3736 vncipher $out3,$out3,v27 3737 vncipher $out4,$out4,v27 3738 vxor $in2,$twk2,v31 3739 3740 addi $key_,$sp,$FRAME+15 # rewind $key_ 3741 vncipher $out0,$out0,v28 3742 vncipher $out1,$out1,v28 3743 vncipher $out2,$out2,v28 3744 vncipher $out3,$out3,v28 3745 vncipher $out4,$out4,v28 3746 lvx v24,$x00,$key_ # re-pre-load round[1] 3747 vxor $in3,$twk3,v31 3748 3749 vncipher $out0,$out0,v29 3750 le?vperm $in0,$in0,$in0,$leperm 3751 vncipher $out1,$out1,v29 3752 vncipher $out2,$out2,v29 3753 vncipher $out3,$out3,v29 3754 vncipher $out4,$out4,v29 3755 lvx v25,$x10,$key_ # re-pre-load round[2] 3756 vxor $in4,$twk4,v31 3757 3758 vncipher $out0,$out0,v30 3759 vncipher $out1,$out1,v30 3760 vncipher $out2,$out2,v30 3761 vncipher $out3,$out3,v30 3762 vncipher $out4,$out4,v30 3763 3764 vncipherlast $out0,$out0,$twk0 3765 vncipherlast $out1,$out1,$in1 3766 vncipherlast $out2,$out2,$in2 3767 vncipherlast $out3,$out3,$in3 3768 vncipherlast $out4,$out4,$in4 3769 mtctr $rounds 3770 blr 3771 .long 0 3772 .byte 0,12,0x14,0,0,0,0,0 3773___ 3774}} }}} 3775 3776my $consts=1; 3777foreach(split("\n",$code)) { 3778 s/\`([^\`]*)\`/eval($1)/geo; 3779 3780 # constants table endian-specific conversion 3781 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 3782 my $conv=$3; 3783 my @bytes=(); 3784 3785 # convert to endian-agnostic format 3786 if ($1 eq "long") { 3787 foreach (split(/,\s*/,$2)) { 3788 my $l = /^0/?oct:int; 3789 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 3790 } 3791 } else { 3792 @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 3793 } 3794 3795 # little-endian conversion 3796 if ($flavour =~ /le$/o) { 3797 SWITCH: for($conv) { 3798 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 3799 /\?rev/ && do { @bytes=reverse(@bytes); last; }; 3800 } 3801 } 3802 3803 #emit 3804 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 3805 next; 3806 } 3807 $consts=0 if (m/Lconsts:/o); # end of table 3808 3809 # instructions prefixed with '?' are endian-specific and need 3810 # to be adjusted accordingly... 3811 if ($flavour =~ /le$/o) { # little-endian 3812 s/le\?//o or 3813 s/be\?/#be#/o or 3814 s/\?lvsr/lvsl/o or 3815 s/\?lvsl/lvsr/o or 3816 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 3817 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 3818 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 3819 } else { # big-endian 3820 s/le\?/#le#/o or 3821 s/be\?//o or 3822 s/\?([a-z]+)/$1/o; 3823 } 3824 3825 print $_,"\n"; 3826} 3827 3828close STDOUT; 3829