1#! /usr/bin/env perl 2# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16# 17# This module implements support for AES instructions as per PowerISA 18# specification version 2.07, first implemented by POWER8 processor. 19# The module is endian-agnostic in sense that it supports both big- 20# and little-endian cases. Data alignment in parallelizable modes is 21# handled with VSX loads and stores, which implies MSR.VSX flag being 22# set. It should also be noted that ISA specification doesn't prohibit 23# alignment exceptions for these instructions on page boundaries. 24# Initially alignment was handled in pure AltiVec/VMX way [when data 25# is aligned programmatically, which in turn guarantees exception- 26# free execution], but it turned to hamper performance when vcipher 27# instructions are interleaved. It's reckoned that eventual 28# misalignment penalties at page boundaries are in average lower 29# than additional overhead in pure AltiVec approach. 30# 31# May 2016 32# 33# Add XTS subroutine, 9x on little- and 12x improvement on big-endian 34# systems were measured. 35# 36###################################################################### 37# Current large-block performance in cycles per byte processed with 38# 128-bit key (less is better). 39# 40# CBC en-/decrypt CTR XTS 41# POWER8[le] 3.96/0.72 0.74 1.1 42# POWER8[be] 3.75/0.65 0.66 1.0 43 44$flavour = shift; 45 46if ($flavour =~ /64/) { 47 $SIZE_T =8; 48 $LRSAVE =2*$SIZE_T; 49 $STU ="stdu"; 50 $POP ="ld"; 51 $PUSH ="std"; 52 $UCMP ="cmpld"; 53 $SHL ="sldi"; 54} elsif ($flavour =~ /32/) { 55 $SIZE_T =4; 56 $LRSAVE =$SIZE_T; 57 $STU ="stwu"; 58 $POP ="lwz"; 59 $PUSH ="stw"; 60 $UCMP ="cmplw"; 61 $SHL ="slwi"; 62} else { die "nonsense $flavour"; } 63 64$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 65 66$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 67( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 68( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 69die "can't locate ppc-xlate.pl"; 70 71open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 72 73$FRAME=8*$SIZE_T; 74$prefix="aes_p8"; 75 76$sp="r1"; 77$vrsave="r12"; 78 79######################################################################### 80{{{ # Key setup procedures # 81my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 82my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 83my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 84 85$code.=<<___; 86.machine "any" 87 88.text 89 90.align 7 91rcon: 92.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 93.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 94.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 95.long 0,0,0,0 ?asis 96Lconsts: 97 mflr r0 98 bcl 20,31,\$+4 99 mflr $ptr #vvvvv "distance between . and rcon 100 addi $ptr,$ptr,-0x48 101 mtlr r0 102 blr 103 .long 0 104 .byte 0,12,0x14,0,0,0,0,0 105.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 106 107.globl .${prefix}_set_encrypt_key 108Lset_encrypt_key: 109 mflr r11 110 $PUSH r11,$LRSAVE($sp) 111 112 li $ptr,-1 113 ${UCMP}i $inp,0 114 beq- Lenc_key_abort # if ($inp==0) return -1; 115 ${UCMP}i $out,0 116 beq- Lenc_key_abort # if ($out==0) return -1; 117 li $ptr,-2 118 cmpwi $bits,128 119 blt- Lenc_key_abort 120 cmpwi $bits,256 121 bgt- Lenc_key_abort 122 andi. r0,$bits,0x3f 123 bne- Lenc_key_abort 124 125 lis r0,0xfff0 126 mfspr $vrsave,256 127 mtspr 256,r0 128 129 bl Lconsts 130 mtlr r11 131 132 neg r9,$inp 133 lvx $in0,0,$inp 134 addi $inp,$inp,15 # 15 is not typo 135 lvsr $key,0,r9 # borrow $key 136 li r8,0x20 137 cmpwi $bits,192 138 lvx $in1,0,$inp 139 le?vspltisb $mask,0x0f # borrow $mask 140 lvx $rcon,0,$ptr 141 le?vxor $key,$key,$mask # adjust for byte swap 142 lvx $mask,r8,$ptr 143 addi $ptr,$ptr,0x10 144 vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 145 li $cnt,8 146 vxor $zero,$zero,$zero 147 mtctr $cnt 148 149 ?lvsr $outperm,0,$out 150 vspltisb $outmask,-1 151 lvx $outhead,0,$out 152 ?vperm $outmask,$zero,$outmask,$outperm 153 154 blt Loop128 155 addi $inp,$inp,8 156 beq L192 157 addi $inp,$inp,8 158 b L256 159 160.align 4 161Loop128: 162 vperm $key,$in0,$in0,$mask # rotate-n-splat 163 vsldoi $tmp,$zero,$in0,12 # >>32 164 vperm $outtail,$in0,$in0,$outperm # rotate 165 vsel $stage,$outhead,$outtail,$outmask 166 vmr $outhead,$outtail 167 vcipherlast $key,$key,$rcon 168 stvx $stage,0,$out 169 addi $out,$out,16 170 171 vxor $in0,$in0,$tmp 172 vsldoi $tmp,$zero,$tmp,12 # >>32 173 vxor $in0,$in0,$tmp 174 vsldoi $tmp,$zero,$tmp,12 # >>32 175 vxor $in0,$in0,$tmp 176 vadduwm $rcon,$rcon,$rcon 177 vxor $in0,$in0,$key 178 bdnz Loop128 179 180 lvx $rcon,0,$ptr # last two round keys 181 182 vperm $key,$in0,$in0,$mask # rotate-n-splat 183 vsldoi $tmp,$zero,$in0,12 # >>32 184 vperm $outtail,$in0,$in0,$outperm # rotate 185 vsel $stage,$outhead,$outtail,$outmask 186 vmr $outhead,$outtail 187 vcipherlast $key,$key,$rcon 188 stvx $stage,0,$out 189 addi $out,$out,16 190 191 vxor $in0,$in0,$tmp 192 vsldoi $tmp,$zero,$tmp,12 # >>32 193 vxor $in0,$in0,$tmp 194 vsldoi $tmp,$zero,$tmp,12 # >>32 195 vxor $in0,$in0,$tmp 196 vadduwm $rcon,$rcon,$rcon 197 vxor $in0,$in0,$key 198 199 vperm $key,$in0,$in0,$mask # rotate-n-splat 200 vsldoi $tmp,$zero,$in0,12 # >>32 201 vperm $outtail,$in0,$in0,$outperm # rotate 202 vsel $stage,$outhead,$outtail,$outmask 203 vmr $outhead,$outtail 204 vcipherlast $key,$key,$rcon 205 stvx $stage,0,$out 206 addi $out,$out,16 207 208 vxor $in0,$in0,$tmp 209 vsldoi $tmp,$zero,$tmp,12 # >>32 210 vxor $in0,$in0,$tmp 211 vsldoi $tmp,$zero,$tmp,12 # >>32 212 vxor $in0,$in0,$tmp 213 vxor $in0,$in0,$key 214 vperm $outtail,$in0,$in0,$outperm # rotate 215 vsel $stage,$outhead,$outtail,$outmask 216 vmr $outhead,$outtail 217 stvx $stage,0,$out 218 219 addi $inp,$out,15 # 15 is not typo 220 addi $out,$out,0x50 221 222 li $rounds,10 223 b Ldone 224 225.align 4 226L192: 227 lvx $tmp,0,$inp 228 li $cnt,4 229 vperm $outtail,$in0,$in0,$outperm # rotate 230 vsel $stage,$outhead,$outtail,$outmask 231 vmr $outhead,$outtail 232 stvx $stage,0,$out 233 addi $out,$out,16 234 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 235 vspltisb $key,8 # borrow $key 236 mtctr $cnt 237 vsububm $mask,$mask,$key # adjust the mask 238 239Loop192: 240 vperm $key,$in1,$in1,$mask # roate-n-splat 241 vsldoi $tmp,$zero,$in0,12 # >>32 242 vcipherlast $key,$key,$rcon 243 244 vxor $in0,$in0,$tmp 245 vsldoi $tmp,$zero,$tmp,12 # >>32 246 vxor $in0,$in0,$tmp 247 vsldoi $tmp,$zero,$tmp,12 # >>32 248 vxor $in0,$in0,$tmp 249 250 vsldoi $stage,$zero,$in1,8 251 vspltw $tmp,$in0,3 252 vxor $tmp,$tmp,$in1 253 vsldoi $in1,$zero,$in1,12 # >>32 254 vadduwm $rcon,$rcon,$rcon 255 vxor $in1,$in1,$tmp 256 vxor $in0,$in0,$key 257 vxor $in1,$in1,$key 258 vsldoi $stage,$stage,$in0,8 259 260 vperm $key,$in1,$in1,$mask # rotate-n-splat 261 vsldoi $tmp,$zero,$in0,12 # >>32 262 vperm $outtail,$stage,$stage,$outperm # rotate 263 vsel $stage,$outhead,$outtail,$outmask 264 vmr $outhead,$outtail 265 vcipherlast $key,$key,$rcon 266 stvx $stage,0,$out 267 addi $out,$out,16 268 269 vsldoi $stage,$in0,$in1,8 270 vxor $in0,$in0,$tmp 271 vsldoi $tmp,$zero,$tmp,12 # >>32 272 vperm $outtail,$stage,$stage,$outperm # rotate 273 vsel $stage,$outhead,$outtail,$outmask 274 vmr $outhead,$outtail 275 vxor $in0,$in0,$tmp 276 vsldoi $tmp,$zero,$tmp,12 # >>32 277 vxor $in0,$in0,$tmp 278 stvx $stage,0,$out 279 addi $out,$out,16 280 281 vspltw $tmp,$in0,3 282 vxor $tmp,$tmp,$in1 283 vsldoi $in1,$zero,$in1,12 # >>32 284 vadduwm $rcon,$rcon,$rcon 285 vxor $in1,$in1,$tmp 286 vxor $in0,$in0,$key 287 vxor $in1,$in1,$key 288 vperm $outtail,$in0,$in0,$outperm # rotate 289 vsel $stage,$outhead,$outtail,$outmask 290 vmr $outhead,$outtail 291 stvx $stage,0,$out 292 addi $inp,$out,15 # 15 is not typo 293 addi $out,$out,16 294 bdnz Loop192 295 296 li $rounds,12 297 addi $out,$out,0x20 298 b Ldone 299 300.align 4 301L256: 302 lvx $tmp,0,$inp 303 li $cnt,7 304 li $rounds,14 305 vperm $outtail,$in0,$in0,$outperm # rotate 306 vsel $stage,$outhead,$outtail,$outmask 307 vmr $outhead,$outtail 308 stvx $stage,0,$out 309 addi $out,$out,16 310 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 311 mtctr $cnt 312 313Loop256: 314 vperm $key,$in1,$in1,$mask # rotate-n-splat 315 vsldoi $tmp,$zero,$in0,12 # >>32 316 vperm $outtail,$in1,$in1,$outperm # rotate 317 vsel $stage,$outhead,$outtail,$outmask 318 vmr $outhead,$outtail 319 vcipherlast $key,$key,$rcon 320 stvx $stage,0,$out 321 addi $out,$out,16 322 323 vxor $in0,$in0,$tmp 324 vsldoi $tmp,$zero,$tmp,12 # >>32 325 vxor $in0,$in0,$tmp 326 vsldoi $tmp,$zero,$tmp,12 # >>32 327 vxor $in0,$in0,$tmp 328 vadduwm $rcon,$rcon,$rcon 329 vxor $in0,$in0,$key 330 vperm $outtail,$in0,$in0,$outperm # rotate 331 vsel $stage,$outhead,$outtail,$outmask 332 vmr $outhead,$outtail 333 stvx $stage,0,$out 334 addi $inp,$out,15 # 15 is not typo 335 addi $out,$out,16 336 bdz Ldone 337 338 vspltw $key,$in0,3 # just splat 339 vsldoi $tmp,$zero,$in1,12 # >>32 340 vsbox $key,$key 341 342 vxor $in1,$in1,$tmp 343 vsldoi $tmp,$zero,$tmp,12 # >>32 344 vxor $in1,$in1,$tmp 345 vsldoi $tmp,$zero,$tmp,12 # >>32 346 vxor $in1,$in1,$tmp 347 348 vxor $in1,$in1,$key 349 b Loop256 350 351.align 4 352Ldone: 353 lvx $in1,0,$inp # redundant in aligned case 354 vsel $in1,$outhead,$in1,$outmask 355 stvx $in1,0,$inp 356 li $ptr,0 357 mtspr 256,$vrsave 358 stw $rounds,0($out) 359 360Lenc_key_abort: 361 mr r3,$ptr 362 blr 363 .long 0 364 .byte 0,12,0x14,1,0,0,3,0 365 .long 0 366.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 367 368.globl .${prefix}_set_decrypt_key 369 $STU $sp,-$FRAME($sp) 370 mflr r10 371 $PUSH r10,$FRAME+$LRSAVE($sp) 372 bl Lset_encrypt_key 373 mtlr r10 374 375 cmpwi r3,0 376 bne- Ldec_key_abort 377 378 slwi $cnt,$rounds,4 379 subi $inp,$out,240 # first round key 380 srwi $rounds,$rounds,1 381 add $out,$inp,$cnt # last round key 382 mtctr $rounds 383 384Ldeckey: 385 lwz r0, 0($inp) 386 lwz r6, 4($inp) 387 lwz r7, 8($inp) 388 lwz r8, 12($inp) 389 addi $inp,$inp,16 390 lwz r9, 0($out) 391 lwz r10,4($out) 392 lwz r11,8($out) 393 lwz r12,12($out) 394 stw r0, 0($out) 395 stw r6, 4($out) 396 stw r7, 8($out) 397 stw r8, 12($out) 398 subi $out,$out,16 399 stw r9, -16($inp) 400 stw r10,-12($inp) 401 stw r11,-8($inp) 402 stw r12,-4($inp) 403 bdnz Ldeckey 404 405 xor r3,r3,r3 # return value 406Ldec_key_abort: 407 addi $sp,$sp,$FRAME 408 blr 409 .long 0 410 .byte 0,12,4,1,0x80,0,3,0 411 .long 0 412.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 413___ 414}}} 415######################################################################### 416{{{ # Single block en- and decrypt procedures # 417sub gen_block () { 418my $dir = shift; 419my $n = $dir eq "de" ? "n" : ""; 420my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 421 422$code.=<<___; 423.globl .${prefix}_${dir}crypt 424 lwz $rounds,240($key) 425 lis r0,0xfc00 426 mfspr $vrsave,256 427 li $idx,15 # 15 is not typo 428 mtspr 256,r0 429 430 lvx v0,0,$inp 431 neg r11,$out 432 lvx v1,$idx,$inp 433 lvsl v2,0,$inp # inpperm 434 le?vspltisb v4,0x0f 435 ?lvsl v3,0,r11 # outperm 436 le?vxor v2,v2,v4 437 li $idx,16 438 vperm v0,v0,v1,v2 # align [and byte swap in LE] 439 lvx v1,0,$key 440 ?lvsl v5,0,$key # keyperm 441 srwi $rounds,$rounds,1 442 lvx v2,$idx,$key 443 addi $idx,$idx,16 444 subi $rounds,$rounds,1 445 ?vperm v1,v1,v2,v5 # align round key 446 447 vxor v0,v0,v1 448 lvx v1,$idx,$key 449 addi $idx,$idx,16 450 mtctr $rounds 451 452Loop_${dir}c: 453 ?vperm v2,v2,v1,v5 454 v${n}cipher v0,v0,v2 455 lvx v2,$idx,$key 456 addi $idx,$idx,16 457 ?vperm v1,v1,v2,v5 458 v${n}cipher v0,v0,v1 459 lvx v1,$idx,$key 460 addi $idx,$idx,16 461 bdnz Loop_${dir}c 462 463 ?vperm v2,v2,v1,v5 464 v${n}cipher v0,v0,v2 465 lvx v2,$idx,$key 466 ?vperm v1,v1,v2,v5 467 v${n}cipherlast v0,v0,v1 468 469 vspltisb v2,-1 470 vxor v1,v1,v1 471 li $idx,15 # 15 is not typo 472 ?vperm v2,v1,v2,v3 # outmask 473 le?vxor v3,v3,v4 474 lvx v1,0,$out # outhead 475 vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 476 vsel v1,v1,v0,v2 477 lvx v4,$idx,$out 478 stvx v1,0,$out 479 vsel v0,v0,v4,v2 480 stvx v0,$idx,$out 481 482 mtspr 256,$vrsave 483 blr 484 .long 0 485 .byte 0,12,0x14,0,0,0,3,0 486 .long 0 487.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 488___ 489} 490&gen_block("en"); 491&gen_block("de"); 492}}} 493######################################################################### 494{{{ # CBC en- and decrypt procedures # 495my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); 496my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 497my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= 498 map("v$_",(4..10)); 499$code.=<<___; 500.globl .${prefix}_cbc_encrypt 501 ${UCMP}i $len,16 502 bltlr- 503 504 cmpwi $enc,0 # test direction 505 lis r0,0xffe0 506 mfspr $vrsave,256 507 mtspr 256,r0 508 509 li $idx,15 510 vxor $rndkey0,$rndkey0,$rndkey0 511 le?vspltisb $tmp,0x0f 512 513 lvx $ivec,0,$ivp # load [unaligned] iv 514 lvsl $inpperm,0,$ivp 515 lvx $inptail,$idx,$ivp 516 le?vxor $inpperm,$inpperm,$tmp 517 vperm $ivec,$ivec,$inptail,$inpperm 518 519 neg r11,$inp 520 ?lvsl $keyperm,0,$key # prepare for unaligned key 521 lwz $rounds,240($key) 522 523 lvsr $inpperm,0,r11 # prepare for unaligned load 524 lvx $inptail,0,$inp 525 addi $inp,$inp,15 # 15 is not typo 526 le?vxor $inpperm,$inpperm,$tmp 527 528 ?lvsr $outperm,0,$out # prepare for unaligned store 529 vspltisb $outmask,-1 530 lvx $outhead,0,$out 531 ?vperm $outmask,$rndkey0,$outmask,$outperm 532 le?vxor $outperm,$outperm,$tmp 533 534 srwi $rounds,$rounds,1 535 li $idx,16 536 subi $rounds,$rounds,1 537 beq Lcbc_dec 538 539Lcbc_enc: 540 vmr $inout,$inptail 541 lvx $inptail,0,$inp 542 addi $inp,$inp,16 543 mtctr $rounds 544 subi $len,$len,16 # len-=16 545 546 lvx $rndkey0,0,$key 547 vperm $inout,$inout,$inptail,$inpperm 548 lvx $rndkey1,$idx,$key 549 addi $idx,$idx,16 550 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 551 vxor $inout,$inout,$rndkey0 552 lvx $rndkey0,$idx,$key 553 addi $idx,$idx,16 554 vxor $inout,$inout,$ivec 555 556Loop_cbc_enc: 557 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 558 vcipher $inout,$inout,$rndkey1 559 lvx $rndkey1,$idx,$key 560 addi $idx,$idx,16 561 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 562 vcipher $inout,$inout,$rndkey0 563 lvx $rndkey0,$idx,$key 564 addi $idx,$idx,16 565 bdnz Loop_cbc_enc 566 567 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 568 vcipher $inout,$inout,$rndkey1 569 lvx $rndkey1,$idx,$key 570 li $idx,16 571 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 572 vcipherlast $ivec,$inout,$rndkey0 573 ${UCMP}i $len,16 574 575 vperm $tmp,$ivec,$ivec,$outperm 576 vsel $inout,$outhead,$tmp,$outmask 577 vmr $outhead,$tmp 578 stvx $inout,0,$out 579 addi $out,$out,16 580 bge Lcbc_enc 581 582 b Lcbc_done 583 584.align 4 585Lcbc_dec: 586 ${UCMP}i $len,128 587 bge _aesp8_cbc_decrypt8x 588 vmr $tmp,$inptail 589 lvx $inptail,0,$inp 590 addi $inp,$inp,16 591 mtctr $rounds 592 subi $len,$len,16 # len-=16 593 594 lvx $rndkey0,0,$key 595 vperm $tmp,$tmp,$inptail,$inpperm 596 lvx $rndkey1,$idx,$key 597 addi $idx,$idx,16 598 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 599 vxor $inout,$tmp,$rndkey0 600 lvx $rndkey0,$idx,$key 601 addi $idx,$idx,16 602 603Loop_cbc_dec: 604 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 605 vncipher $inout,$inout,$rndkey1 606 lvx $rndkey1,$idx,$key 607 addi $idx,$idx,16 608 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 609 vncipher $inout,$inout,$rndkey0 610 lvx $rndkey0,$idx,$key 611 addi $idx,$idx,16 612 bdnz Loop_cbc_dec 613 614 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 615 vncipher $inout,$inout,$rndkey1 616 lvx $rndkey1,$idx,$key 617 li $idx,16 618 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 619 vncipherlast $inout,$inout,$rndkey0 620 ${UCMP}i $len,16 621 622 vxor $inout,$inout,$ivec 623 vmr $ivec,$tmp 624 vperm $tmp,$inout,$inout,$outperm 625 vsel $inout,$outhead,$tmp,$outmask 626 vmr $outhead,$tmp 627 stvx $inout,0,$out 628 addi $out,$out,16 629 bge Lcbc_dec 630 631Lcbc_done: 632 addi $out,$out,-1 633 lvx $inout,0,$out # redundant in aligned case 634 vsel $inout,$outhead,$inout,$outmask 635 stvx $inout,0,$out 636 637 neg $enc,$ivp # write [unaligned] iv 638 li $idx,15 # 15 is not typo 639 vxor $rndkey0,$rndkey0,$rndkey0 640 vspltisb $outmask,-1 641 le?vspltisb $tmp,0x0f 642 ?lvsl $outperm,0,$enc 643 ?vperm $outmask,$rndkey0,$outmask,$outperm 644 le?vxor $outperm,$outperm,$tmp 645 lvx $outhead,0,$ivp 646 vperm $ivec,$ivec,$ivec,$outperm 647 vsel $inout,$outhead,$ivec,$outmask 648 lvx $inptail,$idx,$ivp 649 stvx $inout,0,$ivp 650 vsel $inout,$ivec,$inptail,$outmask 651 stvx $inout,$idx,$ivp 652 653 mtspr 256,$vrsave 654 blr 655 .long 0 656 .byte 0,12,0x14,0,0,0,6,0 657 .long 0 658___ 659######################################################################### 660{{ # Optimized CBC decrypt procedure # 661my $key_="r11"; 662my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 663my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); 664my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); 665my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 666 # v26-v31 last 6 round keys 667my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 668 669$code.=<<___; 670.align 5 671_aesp8_cbc_decrypt8x: 672 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 673 li r10,`$FRAME+8*16+15` 674 li r11,`$FRAME+8*16+31` 675 stvx v20,r10,$sp # ABI says so 676 addi r10,r10,32 677 stvx v21,r11,$sp 678 addi r11,r11,32 679 stvx v22,r10,$sp 680 addi r10,r10,32 681 stvx v23,r11,$sp 682 addi r11,r11,32 683 stvx v24,r10,$sp 684 addi r10,r10,32 685 stvx v25,r11,$sp 686 addi r11,r11,32 687 stvx v26,r10,$sp 688 addi r10,r10,32 689 stvx v27,r11,$sp 690 addi r11,r11,32 691 stvx v28,r10,$sp 692 addi r10,r10,32 693 stvx v29,r11,$sp 694 addi r11,r11,32 695 stvx v30,r10,$sp 696 stvx v31,r11,$sp 697 li r0,-1 698 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 699 li $x10,0x10 700 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 701 li $x20,0x20 702 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 703 li $x30,0x30 704 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 705 li $x40,0x40 706 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 707 li $x50,0x50 708 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 709 li $x60,0x60 710 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 711 li $x70,0x70 712 mtspr 256,r0 713 714 subi $rounds,$rounds,3 # -4 in total 715 subi $len,$len,128 # bias 716 717 lvx $rndkey0,$x00,$key # load key schedule 718 lvx v30,$x10,$key 719 addi $key,$key,0x20 720 lvx v31,$x00,$key 721 ?vperm $rndkey0,$rndkey0,v30,$keyperm 722 addi $key_,$sp,$FRAME+15 723 mtctr $rounds 724 725Load_cbc_dec_key: 726 ?vperm v24,v30,v31,$keyperm 727 lvx v30,$x10,$key 728 addi $key,$key,0x20 729 stvx v24,$x00,$key_ # off-load round[1] 730 ?vperm v25,v31,v30,$keyperm 731 lvx v31,$x00,$key 732 stvx v25,$x10,$key_ # off-load round[2] 733 addi $key_,$key_,0x20 734 bdnz Load_cbc_dec_key 735 736 lvx v26,$x10,$key 737 ?vperm v24,v30,v31,$keyperm 738 lvx v27,$x20,$key 739 stvx v24,$x00,$key_ # off-load round[3] 740 ?vperm v25,v31,v26,$keyperm 741 lvx v28,$x30,$key 742 stvx v25,$x10,$key_ # off-load round[4] 743 addi $key_,$sp,$FRAME+15 # rewind $key_ 744 ?vperm v26,v26,v27,$keyperm 745 lvx v29,$x40,$key 746 ?vperm v27,v27,v28,$keyperm 747 lvx v30,$x50,$key 748 ?vperm v28,v28,v29,$keyperm 749 lvx v31,$x60,$key 750 ?vperm v29,v29,v30,$keyperm 751 lvx $out0,$x70,$key # borrow $out0 752 ?vperm v30,v30,v31,$keyperm 753 lvx v24,$x00,$key_ # pre-load round[1] 754 ?vperm v31,v31,$out0,$keyperm 755 lvx v25,$x10,$key_ # pre-load round[2] 756 757 #lvx $inptail,0,$inp # "caller" already did this 758 #addi $inp,$inp,15 # 15 is not typo 759 subi $inp,$inp,15 # undo "caller" 760 761 le?li $idx,8 762 lvx_u $in0,$x00,$inp # load first 8 "words" 763 le?lvsl $inpperm,0,$idx 764 le?vspltisb $tmp,0x0f 765 lvx_u $in1,$x10,$inp 766 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 767 lvx_u $in2,$x20,$inp 768 le?vperm $in0,$in0,$in0,$inpperm 769 lvx_u $in3,$x30,$inp 770 le?vperm $in1,$in1,$in1,$inpperm 771 lvx_u $in4,$x40,$inp 772 le?vperm $in2,$in2,$in2,$inpperm 773 vxor $out0,$in0,$rndkey0 774 lvx_u $in5,$x50,$inp 775 le?vperm $in3,$in3,$in3,$inpperm 776 vxor $out1,$in1,$rndkey0 777 lvx_u $in6,$x60,$inp 778 le?vperm $in4,$in4,$in4,$inpperm 779 vxor $out2,$in2,$rndkey0 780 lvx_u $in7,$x70,$inp 781 addi $inp,$inp,0x80 782 le?vperm $in5,$in5,$in5,$inpperm 783 vxor $out3,$in3,$rndkey0 784 le?vperm $in6,$in6,$in6,$inpperm 785 vxor $out4,$in4,$rndkey0 786 le?vperm $in7,$in7,$in7,$inpperm 787 vxor $out5,$in5,$rndkey0 788 vxor $out6,$in6,$rndkey0 789 vxor $out7,$in7,$rndkey0 790 791 mtctr $rounds 792 b Loop_cbc_dec8x 793.align 5 794Loop_cbc_dec8x: 795 vncipher $out0,$out0,v24 796 vncipher $out1,$out1,v24 797 vncipher $out2,$out2,v24 798 vncipher $out3,$out3,v24 799 vncipher $out4,$out4,v24 800 vncipher $out5,$out5,v24 801 vncipher $out6,$out6,v24 802 vncipher $out7,$out7,v24 803 lvx v24,$x20,$key_ # round[3] 804 addi $key_,$key_,0x20 805 806 vncipher $out0,$out0,v25 807 vncipher $out1,$out1,v25 808 vncipher $out2,$out2,v25 809 vncipher $out3,$out3,v25 810 vncipher $out4,$out4,v25 811 vncipher $out5,$out5,v25 812 vncipher $out6,$out6,v25 813 vncipher $out7,$out7,v25 814 lvx v25,$x10,$key_ # round[4] 815 bdnz Loop_cbc_dec8x 816 817 subic $len,$len,128 # $len-=128 818 vncipher $out0,$out0,v24 819 vncipher $out1,$out1,v24 820 vncipher $out2,$out2,v24 821 vncipher $out3,$out3,v24 822 vncipher $out4,$out4,v24 823 vncipher $out5,$out5,v24 824 vncipher $out6,$out6,v24 825 vncipher $out7,$out7,v24 826 827 subfe. r0,r0,r0 # borrow?-1:0 828 vncipher $out0,$out0,v25 829 vncipher $out1,$out1,v25 830 vncipher $out2,$out2,v25 831 vncipher $out3,$out3,v25 832 vncipher $out4,$out4,v25 833 vncipher $out5,$out5,v25 834 vncipher $out6,$out6,v25 835 vncipher $out7,$out7,v25 836 837 and r0,r0,$len 838 vncipher $out0,$out0,v26 839 vncipher $out1,$out1,v26 840 vncipher $out2,$out2,v26 841 vncipher $out3,$out3,v26 842 vncipher $out4,$out4,v26 843 vncipher $out5,$out5,v26 844 vncipher $out6,$out6,v26 845 vncipher $out7,$out7,v26 846 847 add $inp,$inp,r0 # $inp is adjusted in such 848 # way that at exit from the 849 # loop inX-in7 are loaded 850 # with last "words" 851 vncipher $out0,$out0,v27 852 vncipher $out1,$out1,v27 853 vncipher $out2,$out2,v27 854 vncipher $out3,$out3,v27 855 vncipher $out4,$out4,v27 856 vncipher $out5,$out5,v27 857 vncipher $out6,$out6,v27 858 vncipher $out7,$out7,v27 859 860 addi $key_,$sp,$FRAME+15 # rewind $key_ 861 vncipher $out0,$out0,v28 862 vncipher $out1,$out1,v28 863 vncipher $out2,$out2,v28 864 vncipher $out3,$out3,v28 865 vncipher $out4,$out4,v28 866 vncipher $out5,$out5,v28 867 vncipher $out6,$out6,v28 868 vncipher $out7,$out7,v28 869 lvx v24,$x00,$key_ # re-pre-load round[1] 870 871 vncipher $out0,$out0,v29 872 vncipher $out1,$out1,v29 873 vncipher $out2,$out2,v29 874 vncipher $out3,$out3,v29 875 vncipher $out4,$out4,v29 876 vncipher $out5,$out5,v29 877 vncipher $out6,$out6,v29 878 vncipher $out7,$out7,v29 879 lvx v25,$x10,$key_ # re-pre-load round[2] 880 881 vncipher $out0,$out0,v30 882 vxor $ivec,$ivec,v31 # xor with last round key 883 vncipher $out1,$out1,v30 884 vxor $in0,$in0,v31 885 vncipher $out2,$out2,v30 886 vxor $in1,$in1,v31 887 vncipher $out3,$out3,v30 888 vxor $in2,$in2,v31 889 vncipher $out4,$out4,v30 890 vxor $in3,$in3,v31 891 vncipher $out5,$out5,v30 892 vxor $in4,$in4,v31 893 vncipher $out6,$out6,v30 894 vxor $in5,$in5,v31 895 vncipher $out7,$out7,v30 896 vxor $in6,$in6,v31 897 898 vncipherlast $out0,$out0,$ivec 899 vncipherlast $out1,$out1,$in0 900 lvx_u $in0,$x00,$inp # load next input block 901 vncipherlast $out2,$out2,$in1 902 lvx_u $in1,$x10,$inp 903 vncipherlast $out3,$out3,$in2 904 le?vperm $in0,$in0,$in0,$inpperm 905 lvx_u $in2,$x20,$inp 906 vncipherlast $out4,$out4,$in3 907 le?vperm $in1,$in1,$in1,$inpperm 908 lvx_u $in3,$x30,$inp 909 vncipherlast $out5,$out5,$in4 910 le?vperm $in2,$in2,$in2,$inpperm 911 lvx_u $in4,$x40,$inp 912 vncipherlast $out6,$out6,$in5 913 le?vperm $in3,$in3,$in3,$inpperm 914 lvx_u $in5,$x50,$inp 915 vncipherlast $out7,$out7,$in6 916 le?vperm $in4,$in4,$in4,$inpperm 917 lvx_u $in6,$x60,$inp 918 vmr $ivec,$in7 919 le?vperm $in5,$in5,$in5,$inpperm 920 lvx_u $in7,$x70,$inp 921 addi $inp,$inp,0x80 922 923 le?vperm $out0,$out0,$out0,$inpperm 924 le?vperm $out1,$out1,$out1,$inpperm 925 stvx_u $out0,$x00,$out 926 le?vperm $in6,$in6,$in6,$inpperm 927 vxor $out0,$in0,$rndkey0 928 le?vperm $out2,$out2,$out2,$inpperm 929 stvx_u $out1,$x10,$out 930 le?vperm $in7,$in7,$in7,$inpperm 931 vxor $out1,$in1,$rndkey0 932 le?vperm $out3,$out3,$out3,$inpperm 933 stvx_u $out2,$x20,$out 934 vxor $out2,$in2,$rndkey0 935 le?vperm $out4,$out4,$out4,$inpperm 936 stvx_u $out3,$x30,$out 937 vxor $out3,$in3,$rndkey0 938 le?vperm $out5,$out5,$out5,$inpperm 939 stvx_u $out4,$x40,$out 940 vxor $out4,$in4,$rndkey0 941 le?vperm $out6,$out6,$out6,$inpperm 942 stvx_u $out5,$x50,$out 943 vxor $out5,$in5,$rndkey0 944 le?vperm $out7,$out7,$out7,$inpperm 945 stvx_u $out6,$x60,$out 946 vxor $out6,$in6,$rndkey0 947 stvx_u $out7,$x70,$out 948 addi $out,$out,0x80 949 vxor $out7,$in7,$rndkey0 950 951 mtctr $rounds 952 beq Loop_cbc_dec8x # did $len-=128 borrow? 953 954 addic. $len,$len,128 955 beq Lcbc_dec8x_done 956 nop 957 nop 958 959Loop_cbc_dec8x_tail: # up to 7 "words" tail... 960 vncipher $out1,$out1,v24 961 vncipher $out2,$out2,v24 962 vncipher $out3,$out3,v24 963 vncipher $out4,$out4,v24 964 vncipher $out5,$out5,v24 965 vncipher $out6,$out6,v24 966 vncipher $out7,$out7,v24 967 lvx v24,$x20,$key_ # round[3] 968 addi $key_,$key_,0x20 969 970 vncipher $out1,$out1,v25 971 vncipher $out2,$out2,v25 972 vncipher $out3,$out3,v25 973 vncipher $out4,$out4,v25 974 vncipher $out5,$out5,v25 975 vncipher $out6,$out6,v25 976 vncipher $out7,$out7,v25 977 lvx v25,$x10,$key_ # round[4] 978 bdnz Loop_cbc_dec8x_tail 979 980 vncipher $out1,$out1,v24 981 vncipher $out2,$out2,v24 982 vncipher $out3,$out3,v24 983 vncipher $out4,$out4,v24 984 vncipher $out5,$out5,v24 985 vncipher $out6,$out6,v24 986 vncipher $out7,$out7,v24 987 988 vncipher $out1,$out1,v25 989 vncipher $out2,$out2,v25 990 vncipher $out3,$out3,v25 991 vncipher $out4,$out4,v25 992 vncipher $out5,$out5,v25 993 vncipher $out6,$out6,v25 994 vncipher $out7,$out7,v25 995 996 vncipher $out1,$out1,v26 997 vncipher $out2,$out2,v26 998 vncipher $out3,$out3,v26 999 vncipher $out4,$out4,v26 1000 vncipher $out5,$out5,v26 1001 vncipher $out6,$out6,v26 1002 vncipher $out7,$out7,v26 1003 1004 vncipher $out1,$out1,v27 1005 vncipher $out2,$out2,v27 1006 vncipher $out3,$out3,v27 1007 vncipher $out4,$out4,v27 1008 vncipher $out5,$out5,v27 1009 vncipher $out6,$out6,v27 1010 vncipher $out7,$out7,v27 1011 1012 vncipher $out1,$out1,v28 1013 vncipher $out2,$out2,v28 1014 vncipher $out3,$out3,v28 1015 vncipher $out4,$out4,v28 1016 vncipher $out5,$out5,v28 1017 vncipher $out6,$out6,v28 1018 vncipher $out7,$out7,v28 1019 1020 vncipher $out1,$out1,v29 1021 vncipher $out2,$out2,v29 1022 vncipher $out3,$out3,v29 1023 vncipher $out4,$out4,v29 1024 vncipher $out5,$out5,v29 1025 vncipher $out6,$out6,v29 1026 vncipher $out7,$out7,v29 1027 1028 vncipher $out1,$out1,v30 1029 vxor $ivec,$ivec,v31 # last round key 1030 vncipher $out2,$out2,v30 1031 vxor $in1,$in1,v31 1032 vncipher $out3,$out3,v30 1033 vxor $in2,$in2,v31 1034 vncipher $out4,$out4,v30 1035 vxor $in3,$in3,v31 1036 vncipher $out5,$out5,v30 1037 vxor $in4,$in4,v31 1038 vncipher $out6,$out6,v30 1039 vxor $in5,$in5,v31 1040 vncipher $out7,$out7,v30 1041 vxor $in6,$in6,v31 1042 1043 cmplwi $len,32 # switch($len) 1044 blt Lcbc_dec8x_one 1045 nop 1046 beq Lcbc_dec8x_two 1047 cmplwi $len,64 1048 blt Lcbc_dec8x_three 1049 nop 1050 beq Lcbc_dec8x_four 1051 cmplwi $len,96 1052 blt Lcbc_dec8x_five 1053 nop 1054 beq Lcbc_dec8x_six 1055 1056Lcbc_dec8x_seven: 1057 vncipherlast $out1,$out1,$ivec 1058 vncipherlast $out2,$out2,$in1 1059 vncipherlast $out3,$out3,$in2 1060 vncipherlast $out4,$out4,$in3 1061 vncipherlast $out5,$out5,$in4 1062 vncipherlast $out6,$out6,$in5 1063 vncipherlast $out7,$out7,$in6 1064 vmr $ivec,$in7 1065 1066 le?vperm $out1,$out1,$out1,$inpperm 1067 le?vperm $out2,$out2,$out2,$inpperm 1068 stvx_u $out1,$x00,$out 1069 le?vperm $out3,$out3,$out3,$inpperm 1070 stvx_u $out2,$x10,$out 1071 le?vperm $out4,$out4,$out4,$inpperm 1072 stvx_u $out3,$x20,$out 1073 le?vperm $out5,$out5,$out5,$inpperm 1074 stvx_u $out4,$x30,$out 1075 le?vperm $out6,$out6,$out6,$inpperm 1076 stvx_u $out5,$x40,$out 1077 le?vperm $out7,$out7,$out7,$inpperm 1078 stvx_u $out6,$x50,$out 1079 stvx_u $out7,$x60,$out 1080 addi $out,$out,0x70 1081 b Lcbc_dec8x_done 1082 1083.align 5 1084Lcbc_dec8x_six: 1085 vncipherlast $out2,$out2,$ivec 1086 vncipherlast $out3,$out3,$in2 1087 vncipherlast $out4,$out4,$in3 1088 vncipherlast $out5,$out5,$in4 1089 vncipherlast $out6,$out6,$in5 1090 vncipherlast $out7,$out7,$in6 1091 vmr $ivec,$in7 1092 1093 le?vperm $out2,$out2,$out2,$inpperm 1094 le?vperm $out3,$out3,$out3,$inpperm 1095 stvx_u $out2,$x00,$out 1096 le?vperm $out4,$out4,$out4,$inpperm 1097 stvx_u $out3,$x10,$out 1098 le?vperm $out5,$out5,$out5,$inpperm 1099 stvx_u $out4,$x20,$out 1100 le?vperm $out6,$out6,$out6,$inpperm 1101 stvx_u $out5,$x30,$out 1102 le?vperm $out7,$out7,$out7,$inpperm 1103 stvx_u $out6,$x40,$out 1104 stvx_u $out7,$x50,$out 1105 addi $out,$out,0x60 1106 b Lcbc_dec8x_done 1107 1108.align 5 1109Lcbc_dec8x_five: 1110 vncipherlast $out3,$out3,$ivec 1111 vncipherlast $out4,$out4,$in3 1112 vncipherlast $out5,$out5,$in4 1113 vncipherlast $out6,$out6,$in5 1114 vncipherlast $out7,$out7,$in6 1115 vmr $ivec,$in7 1116 1117 le?vperm $out3,$out3,$out3,$inpperm 1118 le?vperm $out4,$out4,$out4,$inpperm 1119 stvx_u $out3,$x00,$out 1120 le?vperm $out5,$out5,$out5,$inpperm 1121 stvx_u $out4,$x10,$out 1122 le?vperm $out6,$out6,$out6,$inpperm 1123 stvx_u $out5,$x20,$out 1124 le?vperm $out7,$out7,$out7,$inpperm 1125 stvx_u $out6,$x30,$out 1126 stvx_u $out7,$x40,$out 1127 addi $out,$out,0x50 1128 b Lcbc_dec8x_done 1129 1130.align 5 1131Lcbc_dec8x_four: 1132 vncipherlast $out4,$out4,$ivec 1133 vncipherlast $out5,$out5,$in4 1134 vncipherlast $out6,$out6,$in5 1135 vncipherlast $out7,$out7,$in6 1136 vmr $ivec,$in7 1137 1138 le?vperm $out4,$out4,$out4,$inpperm 1139 le?vperm $out5,$out5,$out5,$inpperm 1140 stvx_u $out4,$x00,$out 1141 le?vperm $out6,$out6,$out6,$inpperm 1142 stvx_u $out5,$x10,$out 1143 le?vperm $out7,$out7,$out7,$inpperm 1144 stvx_u $out6,$x20,$out 1145 stvx_u $out7,$x30,$out 1146 addi $out,$out,0x40 1147 b Lcbc_dec8x_done 1148 1149.align 5 1150Lcbc_dec8x_three: 1151 vncipherlast $out5,$out5,$ivec 1152 vncipherlast $out6,$out6,$in5 1153 vncipherlast $out7,$out7,$in6 1154 vmr $ivec,$in7 1155 1156 le?vperm $out5,$out5,$out5,$inpperm 1157 le?vperm $out6,$out6,$out6,$inpperm 1158 stvx_u $out5,$x00,$out 1159 le?vperm $out7,$out7,$out7,$inpperm 1160 stvx_u $out6,$x10,$out 1161 stvx_u $out7,$x20,$out 1162 addi $out,$out,0x30 1163 b Lcbc_dec8x_done 1164 1165.align 5 1166Lcbc_dec8x_two: 1167 vncipherlast $out6,$out6,$ivec 1168 vncipherlast $out7,$out7,$in6 1169 vmr $ivec,$in7 1170 1171 le?vperm $out6,$out6,$out6,$inpperm 1172 le?vperm $out7,$out7,$out7,$inpperm 1173 stvx_u $out6,$x00,$out 1174 stvx_u $out7,$x10,$out 1175 addi $out,$out,0x20 1176 b Lcbc_dec8x_done 1177 1178.align 5 1179Lcbc_dec8x_one: 1180 vncipherlast $out7,$out7,$ivec 1181 vmr $ivec,$in7 1182 1183 le?vperm $out7,$out7,$out7,$inpperm 1184 stvx_u $out7,0,$out 1185 addi $out,$out,0x10 1186 1187Lcbc_dec8x_done: 1188 le?vperm $ivec,$ivec,$ivec,$inpperm 1189 stvx_u $ivec,0,$ivp # write [unaligned] iv 1190 1191 li r10,`$FRAME+15` 1192 li r11,`$FRAME+31` 1193 stvx $inpperm,r10,$sp # wipe copies of round keys 1194 addi r10,r10,32 1195 stvx $inpperm,r11,$sp 1196 addi r11,r11,32 1197 stvx $inpperm,r10,$sp 1198 addi r10,r10,32 1199 stvx $inpperm,r11,$sp 1200 addi r11,r11,32 1201 stvx $inpperm,r10,$sp 1202 addi r10,r10,32 1203 stvx $inpperm,r11,$sp 1204 addi r11,r11,32 1205 stvx $inpperm,r10,$sp 1206 addi r10,r10,32 1207 stvx $inpperm,r11,$sp 1208 addi r11,r11,32 1209 1210 mtspr 256,$vrsave 1211 lvx v20,r10,$sp # ABI says so 1212 addi r10,r10,32 1213 lvx v21,r11,$sp 1214 addi r11,r11,32 1215 lvx v22,r10,$sp 1216 addi r10,r10,32 1217 lvx v23,r11,$sp 1218 addi r11,r11,32 1219 lvx v24,r10,$sp 1220 addi r10,r10,32 1221 lvx v25,r11,$sp 1222 addi r11,r11,32 1223 lvx v26,r10,$sp 1224 addi r10,r10,32 1225 lvx v27,r11,$sp 1226 addi r11,r11,32 1227 lvx v28,r10,$sp 1228 addi r10,r10,32 1229 lvx v29,r11,$sp 1230 addi r11,r11,32 1231 lvx v30,r10,$sp 1232 lvx v31,r11,$sp 1233 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1234 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1235 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1236 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1237 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1238 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1239 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1240 blr 1241 .long 0 1242 .byte 0,12,0x14,0,0x80,6,6,0 1243 .long 0 1244.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt 1245___ 1246}} }}} 1247 1248######################################################################### 1249{{{ # CTR procedure[s] # 1250my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); 1251my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 1252my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= 1253 map("v$_",(4..11)); 1254my $dat=$tmp; 1255 1256$code.=<<___; 1257.globl .${prefix}_ctr32_encrypt_blocks 1258 ${UCMP}i $len,1 1259 bltlr- 1260 1261 lis r0,0xfff0 1262 mfspr $vrsave,256 1263 mtspr 256,r0 1264 1265 li $idx,15 1266 vxor $rndkey0,$rndkey0,$rndkey0 1267 le?vspltisb $tmp,0x0f 1268 1269 lvx $ivec,0,$ivp # load [unaligned] iv 1270 lvsl $inpperm,0,$ivp 1271 lvx $inptail,$idx,$ivp 1272 vspltisb $one,1 1273 le?vxor $inpperm,$inpperm,$tmp 1274 vperm $ivec,$ivec,$inptail,$inpperm 1275 vsldoi $one,$rndkey0,$one,1 1276 1277 neg r11,$inp 1278 ?lvsl $keyperm,0,$key # prepare for unaligned key 1279 lwz $rounds,240($key) 1280 1281 lvsr $inpperm,0,r11 # prepare for unaligned load 1282 lvx $inptail,0,$inp 1283 addi $inp,$inp,15 # 15 is not typo 1284 le?vxor $inpperm,$inpperm,$tmp 1285 1286 srwi $rounds,$rounds,1 1287 li $idx,16 1288 subi $rounds,$rounds,1 1289 1290 ${UCMP}i $len,8 1291 bge _aesp8_ctr32_encrypt8x 1292 1293 ?lvsr $outperm,0,$out # prepare for unaligned store 1294 vspltisb $outmask,-1 1295 lvx $outhead,0,$out 1296 ?vperm $outmask,$rndkey0,$outmask,$outperm 1297 le?vxor $outperm,$outperm,$tmp 1298 1299 lvx $rndkey0,0,$key 1300 mtctr $rounds 1301 lvx $rndkey1,$idx,$key 1302 addi $idx,$idx,16 1303 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1304 vxor $inout,$ivec,$rndkey0 1305 lvx $rndkey0,$idx,$key 1306 addi $idx,$idx,16 1307 b Loop_ctr32_enc 1308 1309.align 5 1310Loop_ctr32_enc: 1311 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1312 vcipher $inout,$inout,$rndkey1 1313 lvx $rndkey1,$idx,$key 1314 addi $idx,$idx,16 1315 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1316 vcipher $inout,$inout,$rndkey0 1317 lvx $rndkey0,$idx,$key 1318 addi $idx,$idx,16 1319 bdnz Loop_ctr32_enc 1320 1321 vadduwm $ivec,$ivec,$one 1322 vmr $dat,$inptail 1323 lvx $inptail,0,$inp 1324 addi $inp,$inp,16 1325 subic. $len,$len,1 # blocks-- 1326 1327 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1328 vcipher $inout,$inout,$rndkey1 1329 lvx $rndkey1,$idx,$key 1330 vperm $dat,$dat,$inptail,$inpperm 1331 li $idx,16 1332 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm 1333 lvx $rndkey0,0,$key 1334 vxor $dat,$dat,$rndkey1 # last round key 1335 vcipherlast $inout,$inout,$dat 1336 1337 lvx $rndkey1,$idx,$key 1338 addi $idx,$idx,16 1339 vperm $inout,$inout,$inout,$outperm 1340 vsel $dat,$outhead,$inout,$outmask 1341 mtctr $rounds 1342 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1343 vmr $outhead,$inout 1344 vxor $inout,$ivec,$rndkey0 1345 lvx $rndkey0,$idx,$key 1346 addi $idx,$idx,16 1347 stvx $dat,0,$out 1348 addi $out,$out,16 1349 bne Loop_ctr32_enc 1350 1351 addi $out,$out,-1 1352 lvx $inout,0,$out # redundant in aligned case 1353 vsel $inout,$outhead,$inout,$outmask 1354 stvx $inout,0,$out 1355 1356 mtspr 256,$vrsave 1357 blr 1358 .long 0 1359 .byte 0,12,0x14,0,0,0,6,0 1360 .long 0 1361___ 1362######################################################################### 1363{{ # Optimized CTR procedure # 1364my $key_="r11"; 1365my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 1366my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); 1367my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); 1368my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 1369 # v26-v31 last 6 round keys 1370my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 1371my ($two,$three,$four)=($outhead,$outperm,$outmask); 1372 1373$code.=<<___; 1374.align 5 1375_aesp8_ctr32_encrypt8x: 1376 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 1377 li r10,`$FRAME+8*16+15` 1378 li r11,`$FRAME+8*16+31` 1379 stvx v20,r10,$sp # ABI says so 1380 addi r10,r10,32 1381 stvx v21,r11,$sp 1382 addi r11,r11,32 1383 stvx v22,r10,$sp 1384 addi r10,r10,32 1385 stvx v23,r11,$sp 1386 addi r11,r11,32 1387 stvx v24,r10,$sp 1388 addi r10,r10,32 1389 stvx v25,r11,$sp 1390 addi r11,r11,32 1391 stvx v26,r10,$sp 1392 addi r10,r10,32 1393 stvx v27,r11,$sp 1394 addi r11,r11,32 1395 stvx v28,r10,$sp 1396 addi r10,r10,32 1397 stvx v29,r11,$sp 1398 addi r11,r11,32 1399 stvx v30,r10,$sp 1400 stvx v31,r11,$sp 1401 li r0,-1 1402 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 1403 li $x10,0x10 1404 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1405 li $x20,0x20 1406 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1407 li $x30,0x30 1408 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1409 li $x40,0x40 1410 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1411 li $x50,0x50 1412 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1413 li $x60,0x60 1414 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1415 li $x70,0x70 1416 mtspr 256,r0 1417 1418 subi $rounds,$rounds,3 # -4 in total 1419 1420 lvx $rndkey0,$x00,$key # load key schedule 1421 lvx v30,$x10,$key 1422 addi $key,$key,0x20 1423 lvx v31,$x00,$key 1424 ?vperm $rndkey0,$rndkey0,v30,$keyperm 1425 addi $key_,$sp,$FRAME+15 1426 mtctr $rounds 1427 1428Load_ctr32_enc_key: 1429 ?vperm v24,v30,v31,$keyperm 1430 lvx v30,$x10,$key 1431 addi $key,$key,0x20 1432 stvx v24,$x00,$key_ # off-load round[1] 1433 ?vperm v25,v31,v30,$keyperm 1434 lvx v31,$x00,$key 1435 stvx v25,$x10,$key_ # off-load round[2] 1436 addi $key_,$key_,0x20 1437 bdnz Load_ctr32_enc_key 1438 1439 lvx v26,$x10,$key 1440 ?vperm v24,v30,v31,$keyperm 1441 lvx v27,$x20,$key 1442 stvx v24,$x00,$key_ # off-load round[3] 1443 ?vperm v25,v31,v26,$keyperm 1444 lvx v28,$x30,$key 1445 stvx v25,$x10,$key_ # off-load round[4] 1446 addi $key_,$sp,$FRAME+15 # rewind $key_ 1447 ?vperm v26,v26,v27,$keyperm 1448 lvx v29,$x40,$key 1449 ?vperm v27,v27,v28,$keyperm 1450 lvx v30,$x50,$key 1451 ?vperm v28,v28,v29,$keyperm 1452 lvx v31,$x60,$key 1453 ?vperm v29,v29,v30,$keyperm 1454 lvx $out0,$x70,$key # borrow $out0 1455 ?vperm v30,v30,v31,$keyperm 1456 lvx v24,$x00,$key_ # pre-load round[1] 1457 ?vperm v31,v31,$out0,$keyperm 1458 lvx v25,$x10,$key_ # pre-load round[2] 1459 1460 vadduqm $two,$one,$one 1461 subi $inp,$inp,15 # undo "caller" 1462 $SHL $len,$len,4 1463 1464 vadduqm $out1,$ivec,$one # counter values ... 1465 vadduqm $out2,$ivec,$two 1466 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1467 le?li $idx,8 1468 vadduqm $out3,$out1,$two 1469 vxor $out1,$out1,$rndkey0 1470 le?lvsl $inpperm,0,$idx 1471 vadduqm $out4,$out2,$two 1472 vxor $out2,$out2,$rndkey0 1473 le?vspltisb $tmp,0x0f 1474 vadduqm $out5,$out3,$two 1475 vxor $out3,$out3,$rndkey0 1476 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 1477 vadduqm $out6,$out4,$two 1478 vxor $out4,$out4,$rndkey0 1479 vadduqm $out7,$out5,$two 1480 vxor $out5,$out5,$rndkey0 1481 vadduqm $ivec,$out6,$two # next counter value 1482 vxor $out6,$out6,$rndkey0 1483 vxor $out7,$out7,$rndkey0 1484 1485 mtctr $rounds 1486 b Loop_ctr32_enc8x 1487.align 5 1488Loop_ctr32_enc8x: 1489 vcipher $out0,$out0,v24 1490 vcipher $out1,$out1,v24 1491 vcipher $out2,$out2,v24 1492 vcipher $out3,$out3,v24 1493 vcipher $out4,$out4,v24 1494 vcipher $out5,$out5,v24 1495 vcipher $out6,$out6,v24 1496 vcipher $out7,$out7,v24 1497Loop_ctr32_enc8x_middle: 1498 lvx v24,$x20,$key_ # round[3] 1499 addi $key_,$key_,0x20 1500 1501 vcipher $out0,$out0,v25 1502 vcipher $out1,$out1,v25 1503 vcipher $out2,$out2,v25 1504 vcipher $out3,$out3,v25 1505 vcipher $out4,$out4,v25 1506 vcipher $out5,$out5,v25 1507 vcipher $out6,$out6,v25 1508 vcipher $out7,$out7,v25 1509 lvx v25,$x10,$key_ # round[4] 1510 bdnz Loop_ctr32_enc8x 1511 1512 subic r11,$len,256 # $len-256, borrow $key_ 1513 vcipher $out0,$out0,v24 1514 vcipher $out1,$out1,v24 1515 vcipher $out2,$out2,v24 1516 vcipher $out3,$out3,v24 1517 vcipher $out4,$out4,v24 1518 vcipher $out5,$out5,v24 1519 vcipher $out6,$out6,v24 1520 vcipher $out7,$out7,v24 1521 1522 subfe r0,r0,r0 # borrow?-1:0 1523 vcipher $out0,$out0,v25 1524 vcipher $out1,$out1,v25 1525 vcipher $out2,$out2,v25 1526 vcipher $out3,$out3,v25 1527 vcipher $out4,$out4,v25 1528 vcipher $out5,$out5,v25 1529 vcipher $out6,$out6,v25 1530 vcipher $out7,$out7,v25 1531 1532 and r0,r0,r11 1533 addi $key_,$sp,$FRAME+15 # rewind $key_ 1534 vcipher $out0,$out0,v26 1535 vcipher $out1,$out1,v26 1536 vcipher $out2,$out2,v26 1537 vcipher $out3,$out3,v26 1538 vcipher $out4,$out4,v26 1539 vcipher $out5,$out5,v26 1540 vcipher $out6,$out6,v26 1541 vcipher $out7,$out7,v26 1542 lvx v24,$x00,$key_ # re-pre-load round[1] 1543 1544 subic $len,$len,129 # $len-=129 1545 vcipher $out0,$out0,v27 1546 addi $len,$len,1 # $len-=128 really 1547 vcipher $out1,$out1,v27 1548 vcipher $out2,$out2,v27 1549 vcipher $out3,$out3,v27 1550 vcipher $out4,$out4,v27 1551 vcipher $out5,$out5,v27 1552 vcipher $out6,$out6,v27 1553 vcipher $out7,$out7,v27 1554 lvx v25,$x10,$key_ # re-pre-load round[2] 1555 1556 vcipher $out0,$out0,v28 1557 lvx_u $in0,$x00,$inp # load input 1558 vcipher $out1,$out1,v28 1559 lvx_u $in1,$x10,$inp 1560 vcipher $out2,$out2,v28 1561 lvx_u $in2,$x20,$inp 1562 vcipher $out3,$out3,v28 1563 lvx_u $in3,$x30,$inp 1564 vcipher $out4,$out4,v28 1565 lvx_u $in4,$x40,$inp 1566 vcipher $out5,$out5,v28 1567 lvx_u $in5,$x50,$inp 1568 vcipher $out6,$out6,v28 1569 lvx_u $in6,$x60,$inp 1570 vcipher $out7,$out7,v28 1571 lvx_u $in7,$x70,$inp 1572 addi $inp,$inp,0x80 1573 1574 vcipher $out0,$out0,v29 1575 le?vperm $in0,$in0,$in0,$inpperm 1576 vcipher $out1,$out1,v29 1577 le?vperm $in1,$in1,$in1,$inpperm 1578 vcipher $out2,$out2,v29 1579 le?vperm $in2,$in2,$in2,$inpperm 1580 vcipher $out3,$out3,v29 1581 le?vperm $in3,$in3,$in3,$inpperm 1582 vcipher $out4,$out4,v29 1583 le?vperm $in4,$in4,$in4,$inpperm 1584 vcipher $out5,$out5,v29 1585 le?vperm $in5,$in5,$in5,$inpperm 1586 vcipher $out6,$out6,v29 1587 le?vperm $in6,$in6,$in6,$inpperm 1588 vcipher $out7,$out7,v29 1589 le?vperm $in7,$in7,$in7,$inpperm 1590 1591 add $inp,$inp,r0 # $inp is adjusted in such 1592 # way that at exit from the 1593 # loop inX-in7 are loaded 1594 # with last "words" 1595 subfe. r0,r0,r0 # borrow?-1:0 1596 vcipher $out0,$out0,v30 1597 vxor $in0,$in0,v31 # xor with last round key 1598 vcipher $out1,$out1,v30 1599 vxor $in1,$in1,v31 1600 vcipher $out2,$out2,v30 1601 vxor $in2,$in2,v31 1602 vcipher $out3,$out3,v30 1603 vxor $in3,$in3,v31 1604 vcipher $out4,$out4,v30 1605 vxor $in4,$in4,v31 1606 vcipher $out5,$out5,v30 1607 vxor $in5,$in5,v31 1608 vcipher $out6,$out6,v30 1609 vxor $in6,$in6,v31 1610 vcipher $out7,$out7,v30 1611 vxor $in7,$in7,v31 1612 1613 bne Lctr32_enc8x_break # did $len-129 borrow? 1614 1615 vcipherlast $in0,$out0,$in0 1616 vcipherlast $in1,$out1,$in1 1617 vadduqm $out1,$ivec,$one # counter values ... 1618 vcipherlast $in2,$out2,$in2 1619 vadduqm $out2,$ivec,$two 1620 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1621 vcipherlast $in3,$out3,$in3 1622 vadduqm $out3,$out1,$two 1623 vxor $out1,$out1,$rndkey0 1624 vcipherlast $in4,$out4,$in4 1625 vadduqm $out4,$out2,$two 1626 vxor $out2,$out2,$rndkey0 1627 vcipherlast $in5,$out5,$in5 1628 vadduqm $out5,$out3,$two 1629 vxor $out3,$out3,$rndkey0 1630 vcipherlast $in6,$out6,$in6 1631 vadduqm $out6,$out4,$two 1632 vxor $out4,$out4,$rndkey0 1633 vcipherlast $in7,$out7,$in7 1634 vadduqm $out7,$out5,$two 1635 vxor $out5,$out5,$rndkey0 1636 le?vperm $in0,$in0,$in0,$inpperm 1637 vadduqm $ivec,$out6,$two # next counter value 1638 vxor $out6,$out6,$rndkey0 1639 le?vperm $in1,$in1,$in1,$inpperm 1640 vxor $out7,$out7,$rndkey0 1641 mtctr $rounds 1642 1643 vcipher $out0,$out0,v24 1644 stvx_u $in0,$x00,$out 1645 le?vperm $in2,$in2,$in2,$inpperm 1646 vcipher $out1,$out1,v24 1647 stvx_u $in1,$x10,$out 1648 le?vperm $in3,$in3,$in3,$inpperm 1649 vcipher $out2,$out2,v24 1650 stvx_u $in2,$x20,$out 1651 le?vperm $in4,$in4,$in4,$inpperm 1652 vcipher $out3,$out3,v24 1653 stvx_u $in3,$x30,$out 1654 le?vperm $in5,$in5,$in5,$inpperm 1655 vcipher $out4,$out4,v24 1656 stvx_u $in4,$x40,$out 1657 le?vperm $in6,$in6,$in6,$inpperm 1658 vcipher $out5,$out5,v24 1659 stvx_u $in5,$x50,$out 1660 le?vperm $in7,$in7,$in7,$inpperm 1661 vcipher $out6,$out6,v24 1662 stvx_u $in6,$x60,$out 1663 vcipher $out7,$out7,v24 1664 stvx_u $in7,$x70,$out 1665 addi $out,$out,0x80 1666 1667 b Loop_ctr32_enc8x_middle 1668 1669.align 5 1670Lctr32_enc8x_break: 1671 cmpwi $len,-0x60 1672 blt Lctr32_enc8x_one 1673 nop 1674 beq Lctr32_enc8x_two 1675 cmpwi $len,-0x40 1676 blt Lctr32_enc8x_three 1677 nop 1678 beq Lctr32_enc8x_four 1679 cmpwi $len,-0x20 1680 blt Lctr32_enc8x_five 1681 nop 1682 beq Lctr32_enc8x_six 1683 cmpwi $len,0x00 1684 blt Lctr32_enc8x_seven 1685 1686Lctr32_enc8x_eight: 1687 vcipherlast $out0,$out0,$in0 1688 vcipherlast $out1,$out1,$in1 1689 vcipherlast $out2,$out2,$in2 1690 vcipherlast $out3,$out3,$in3 1691 vcipherlast $out4,$out4,$in4 1692 vcipherlast $out5,$out5,$in5 1693 vcipherlast $out6,$out6,$in6 1694 vcipherlast $out7,$out7,$in7 1695 1696 le?vperm $out0,$out0,$out0,$inpperm 1697 le?vperm $out1,$out1,$out1,$inpperm 1698 stvx_u $out0,$x00,$out 1699 le?vperm $out2,$out2,$out2,$inpperm 1700 stvx_u $out1,$x10,$out 1701 le?vperm $out3,$out3,$out3,$inpperm 1702 stvx_u $out2,$x20,$out 1703 le?vperm $out4,$out4,$out4,$inpperm 1704 stvx_u $out3,$x30,$out 1705 le?vperm $out5,$out5,$out5,$inpperm 1706 stvx_u $out4,$x40,$out 1707 le?vperm $out6,$out6,$out6,$inpperm 1708 stvx_u $out5,$x50,$out 1709 le?vperm $out7,$out7,$out7,$inpperm 1710 stvx_u $out6,$x60,$out 1711 stvx_u $out7,$x70,$out 1712 addi $out,$out,0x80 1713 b Lctr32_enc8x_done 1714 1715.align 5 1716Lctr32_enc8x_seven: 1717 vcipherlast $out0,$out0,$in1 1718 vcipherlast $out1,$out1,$in2 1719 vcipherlast $out2,$out2,$in3 1720 vcipherlast $out3,$out3,$in4 1721 vcipherlast $out4,$out4,$in5 1722 vcipherlast $out5,$out5,$in6 1723 vcipherlast $out6,$out6,$in7 1724 1725 le?vperm $out0,$out0,$out0,$inpperm 1726 le?vperm $out1,$out1,$out1,$inpperm 1727 stvx_u $out0,$x00,$out 1728 le?vperm $out2,$out2,$out2,$inpperm 1729 stvx_u $out1,$x10,$out 1730 le?vperm $out3,$out3,$out3,$inpperm 1731 stvx_u $out2,$x20,$out 1732 le?vperm $out4,$out4,$out4,$inpperm 1733 stvx_u $out3,$x30,$out 1734 le?vperm $out5,$out5,$out5,$inpperm 1735 stvx_u $out4,$x40,$out 1736 le?vperm $out6,$out6,$out6,$inpperm 1737 stvx_u $out5,$x50,$out 1738 stvx_u $out6,$x60,$out 1739 addi $out,$out,0x70 1740 b Lctr32_enc8x_done 1741 1742.align 5 1743Lctr32_enc8x_six: 1744 vcipherlast $out0,$out0,$in2 1745 vcipherlast $out1,$out1,$in3 1746 vcipherlast $out2,$out2,$in4 1747 vcipherlast $out3,$out3,$in5 1748 vcipherlast $out4,$out4,$in6 1749 vcipherlast $out5,$out5,$in7 1750 1751 le?vperm $out0,$out0,$out0,$inpperm 1752 le?vperm $out1,$out1,$out1,$inpperm 1753 stvx_u $out0,$x00,$out 1754 le?vperm $out2,$out2,$out2,$inpperm 1755 stvx_u $out1,$x10,$out 1756 le?vperm $out3,$out3,$out3,$inpperm 1757 stvx_u $out2,$x20,$out 1758 le?vperm $out4,$out4,$out4,$inpperm 1759 stvx_u $out3,$x30,$out 1760 le?vperm $out5,$out5,$out5,$inpperm 1761 stvx_u $out4,$x40,$out 1762 stvx_u $out5,$x50,$out 1763 addi $out,$out,0x60 1764 b Lctr32_enc8x_done 1765 1766.align 5 1767Lctr32_enc8x_five: 1768 vcipherlast $out0,$out0,$in3 1769 vcipherlast $out1,$out1,$in4 1770 vcipherlast $out2,$out2,$in5 1771 vcipherlast $out3,$out3,$in6 1772 vcipherlast $out4,$out4,$in7 1773 1774 le?vperm $out0,$out0,$out0,$inpperm 1775 le?vperm $out1,$out1,$out1,$inpperm 1776 stvx_u $out0,$x00,$out 1777 le?vperm $out2,$out2,$out2,$inpperm 1778 stvx_u $out1,$x10,$out 1779 le?vperm $out3,$out3,$out3,$inpperm 1780 stvx_u $out2,$x20,$out 1781 le?vperm $out4,$out4,$out4,$inpperm 1782 stvx_u $out3,$x30,$out 1783 stvx_u $out4,$x40,$out 1784 addi $out,$out,0x50 1785 b Lctr32_enc8x_done 1786 1787.align 5 1788Lctr32_enc8x_four: 1789 vcipherlast $out0,$out0,$in4 1790 vcipherlast $out1,$out1,$in5 1791 vcipherlast $out2,$out2,$in6 1792 vcipherlast $out3,$out3,$in7 1793 1794 le?vperm $out0,$out0,$out0,$inpperm 1795 le?vperm $out1,$out1,$out1,$inpperm 1796 stvx_u $out0,$x00,$out 1797 le?vperm $out2,$out2,$out2,$inpperm 1798 stvx_u $out1,$x10,$out 1799 le?vperm $out3,$out3,$out3,$inpperm 1800 stvx_u $out2,$x20,$out 1801 stvx_u $out3,$x30,$out 1802 addi $out,$out,0x40 1803 b Lctr32_enc8x_done 1804 1805.align 5 1806Lctr32_enc8x_three: 1807 vcipherlast $out0,$out0,$in5 1808 vcipherlast $out1,$out1,$in6 1809 vcipherlast $out2,$out2,$in7 1810 1811 le?vperm $out0,$out0,$out0,$inpperm 1812 le?vperm $out1,$out1,$out1,$inpperm 1813 stvx_u $out0,$x00,$out 1814 le?vperm $out2,$out2,$out2,$inpperm 1815 stvx_u $out1,$x10,$out 1816 stvx_u $out2,$x20,$out 1817 addi $out,$out,0x30 1818 b Lcbc_dec8x_done 1819 1820.align 5 1821Lctr32_enc8x_two: 1822 vcipherlast $out0,$out0,$in6 1823 vcipherlast $out1,$out1,$in7 1824 1825 le?vperm $out0,$out0,$out0,$inpperm 1826 le?vperm $out1,$out1,$out1,$inpperm 1827 stvx_u $out0,$x00,$out 1828 stvx_u $out1,$x10,$out 1829 addi $out,$out,0x20 1830 b Lcbc_dec8x_done 1831 1832.align 5 1833Lctr32_enc8x_one: 1834 vcipherlast $out0,$out0,$in7 1835 1836 le?vperm $out0,$out0,$out0,$inpperm 1837 stvx_u $out0,0,$out 1838 addi $out,$out,0x10 1839 1840Lctr32_enc8x_done: 1841 li r10,`$FRAME+15` 1842 li r11,`$FRAME+31` 1843 stvx $inpperm,r10,$sp # wipe copies of round keys 1844 addi r10,r10,32 1845 stvx $inpperm,r11,$sp 1846 addi r11,r11,32 1847 stvx $inpperm,r10,$sp 1848 addi r10,r10,32 1849 stvx $inpperm,r11,$sp 1850 addi r11,r11,32 1851 stvx $inpperm,r10,$sp 1852 addi r10,r10,32 1853 stvx $inpperm,r11,$sp 1854 addi r11,r11,32 1855 stvx $inpperm,r10,$sp 1856 addi r10,r10,32 1857 stvx $inpperm,r11,$sp 1858 addi r11,r11,32 1859 1860 mtspr 256,$vrsave 1861 lvx v20,r10,$sp # ABI says so 1862 addi r10,r10,32 1863 lvx v21,r11,$sp 1864 addi r11,r11,32 1865 lvx v22,r10,$sp 1866 addi r10,r10,32 1867 lvx v23,r11,$sp 1868 addi r11,r11,32 1869 lvx v24,r10,$sp 1870 addi r10,r10,32 1871 lvx v25,r11,$sp 1872 addi r11,r11,32 1873 lvx v26,r10,$sp 1874 addi r10,r10,32 1875 lvx v27,r11,$sp 1876 addi r11,r11,32 1877 lvx v28,r10,$sp 1878 addi r10,r10,32 1879 lvx v29,r11,$sp 1880 addi r11,r11,32 1881 lvx v30,r10,$sp 1882 lvx v31,r11,$sp 1883 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1884 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1885 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1886 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1887 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1888 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1889 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1890 blr 1891 .long 0 1892 .byte 0,12,0x14,0,0x80,6,6,0 1893 .long 0 1894.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks 1895___ 1896}} }}} 1897 1898######################################################################### 1899{{{ # XTS procedures # 1900# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # 1901# const AES_KEY *key1, const AES_KEY *key2, # 1902# [const] unsigned char iv[16]); # 1903# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # 1904# input tweak value is assumed to be encrypted already, and last tweak # 1905# value, one suitable for consecutive call on same chunk of data, is # 1906# written back to original buffer. In addition, in "tweak chaining" # 1907# mode only complete input blocks are processed. # 1908 1909my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); 1910my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); 1911my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); 1912my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); 1913my $taillen = $key2; 1914 1915 ($inp,$idx) = ($idx,$inp); # reassign 1916 1917$code.=<<___; 1918.globl .${prefix}_xts_encrypt 1919 mr $inp,r3 # reassign 1920 li r3,-1 1921 ${UCMP}i $len,16 1922 bltlr- 1923 1924 lis r0,0xfff0 1925 mfspr r12,256 # save vrsave 1926 li r11,0 1927 mtspr 256,r0 1928 1929 vspltisb $seven,0x07 # 0x070707..07 1930 le?lvsl $leperm,r11,r11 1931 le?vspltisb $tmp,0x0f 1932 le?vxor $leperm,$leperm,$seven 1933 1934 li $idx,15 1935 lvx $tweak,0,$ivp # load [unaligned] iv 1936 lvsl $inpperm,0,$ivp 1937 lvx $inptail,$idx,$ivp 1938 le?vxor $inpperm,$inpperm,$tmp 1939 vperm $tweak,$tweak,$inptail,$inpperm 1940 1941 neg r11,$inp 1942 lvsr $inpperm,0,r11 # prepare for unaligned load 1943 lvx $inout,0,$inp 1944 addi $inp,$inp,15 # 15 is not typo 1945 le?vxor $inpperm,$inpperm,$tmp 1946 1947 ${UCMP}i $key2,0 # key2==NULL? 1948 beq Lxts_enc_no_key2 1949 1950 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 1951 lwz $rounds,240($key2) 1952 srwi $rounds,$rounds,1 1953 subi $rounds,$rounds,1 1954 li $idx,16 1955 1956 lvx $rndkey0,0,$key2 1957 lvx $rndkey1,$idx,$key2 1958 addi $idx,$idx,16 1959 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1960 vxor $tweak,$tweak,$rndkey0 1961 lvx $rndkey0,$idx,$key2 1962 addi $idx,$idx,16 1963 mtctr $rounds 1964 1965Ltweak_xts_enc: 1966 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1967 vcipher $tweak,$tweak,$rndkey1 1968 lvx $rndkey1,$idx,$key2 1969 addi $idx,$idx,16 1970 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1971 vcipher $tweak,$tweak,$rndkey0 1972 lvx $rndkey0,$idx,$key2 1973 addi $idx,$idx,16 1974 bdnz Ltweak_xts_enc 1975 1976 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1977 vcipher $tweak,$tweak,$rndkey1 1978 lvx $rndkey1,$idx,$key2 1979 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1980 vcipherlast $tweak,$tweak,$rndkey0 1981 1982 li $ivp,0 # don't chain the tweak 1983 b Lxts_enc 1984 1985Lxts_enc_no_key2: 1986 li $idx,-16 1987 and $len,$len,$idx # in "tweak chaining" 1988 # mode only complete 1989 # blocks are processed 1990Lxts_enc: 1991 lvx $inptail,0,$inp 1992 addi $inp,$inp,16 1993 1994 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 1995 lwz $rounds,240($key1) 1996 srwi $rounds,$rounds,1 1997 subi $rounds,$rounds,1 1998 li $idx,16 1999 2000 vslb $eighty7,$seven,$seven # 0x808080..80 2001 vor $eighty7,$eighty7,$seven # 0x878787..87 2002 vspltisb $tmp,1 # 0x010101..01 2003 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2004 2005 ${UCMP}i $len,96 2006 bge _aesp8_xts_encrypt6x 2007 2008 andi. $taillen,$len,15 2009 subic r0,$len,32 2010 subi $taillen,$taillen,16 2011 subfe r0,r0,r0 2012 and r0,r0,$taillen 2013 add $inp,$inp,r0 2014 2015 lvx $rndkey0,0,$key1 2016 lvx $rndkey1,$idx,$key1 2017 addi $idx,$idx,16 2018 vperm $inout,$inout,$inptail,$inpperm 2019 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2020 vxor $inout,$inout,$tweak 2021 vxor $inout,$inout,$rndkey0 2022 lvx $rndkey0,$idx,$key1 2023 addi $idx,$idx,16 2024 mtctr $rounds 2025 b Loop_xts_enc 2026 2027.align 5 2028Loop_xts_enc: 2029 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2030 vcipher $inout,$inout,$rndkey1 2031 lvx $rndkey1,$idx,$key1 2032 addi $idx,$idx,16 2033 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2034 vcipher $inout,$inout,$rndkey0 2035 lvx $rndkey0,$idx,$key1 2036 addi $idx,$idx,16 2037 bdnz Loop_xts_enc 2038 2039 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2040 vcipher $inout,$inout,$rndkey1 2041 lvx $rndkey1,$idx,$key1 2042 li $idx,16 2043 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2044 vxor $rndkey0,$rndkey0,$tweak 2045 vcipherlast $output,$inout,$rndkey0 2046 2047 le?vperm $tmp,$output,$output,$leperm 2048 be?nop 2049 le?stvx_u $tmp,0,$out 2050 be?stvx_u $output,0,$out 2051 addi $out,$out,16 2052 2053 subic. $len,$len,16 2054 beq Lxts_enc_done 2055 2056 vmr $inout,$inptail 2057 lvx $inptail,0,$inp 2058 addi $inp,$inp,16 2059 lvx $rndkey0,0,$key1 2060 lvx $rndkey1,$idx,$key1 2061 addi $idx,$idx,16 2062 2063 subic r0,$len,32 2064 subfe r0,r0,r0 2065 and r0,r0,$taillen 2066 add $inp,$inp,r0 2067 2068 vsrab $tmp,$tweak,$seven # next tweak value 2069 vaddubm $tweak,$tweak,$tweak 2070 vsldoi $tmp,$tmp,$tmp,15 2071 vand $tmp,$tmp,$eighty7 2072 vxor $tweak,$tweak,$tmp 2073 2074 vperm $inout,$inout,$inptail,$inpperm 2075 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2076 vxor $inout,$inout,$tweak 2077 vxor $output,$output,$rndkey0 # just in case $len<16 2078 vxor $inout,$inout,$rndkey0 2079 lvx $rndkey0,$idx,$key1 2080 addi $idx,$idx,16 2081 2082 mtctr $rounds 2083 ${UCMP}i $len,16 2084 bge Loop_xts_enc 2085 2086 vxor $output,$output,$tweak 2087 lvsr $inpperm,0,$len # $inpperm is no longer needed 2088 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2089 vspltisb $tmp,-1 2090 vperm $inptail,$inptail,$tmp,$inpperm 2091 vsel $inout,$inout,$output,$inptail 2092 2093 subi r11,$out,17 2094 subi $out,$out,16 2095 mtctr $len 2096 li $len,16 2097Loop_xts_enc_steal: 2098 lbzu r0,1(r11) 2099 stb r0,16(r11) 2100 bdnz Loop_xts_enc_steal 2101 2102 mtctr $rounds 2103 b Loop_xts_enc # one more time... 2104 2105Lxts_enc_done: 2106 ${UCMP}i $ivp,0 2107 beq Lxts_enc_ret 2108 2109 vsrab $tmp,$tweak,$seven # next tweak value 2110 vaddubm $tweak,$tweak,$tweak 2111 vsldoi $tmp,$tmp,$tmp,15 2112 vand $tmp,$tmp,$eighty7 2113 vxor $tweak,$tweak,$tmp 2114 2115 le?vperm $tweak,$tweak,$tweak,$leperm 2116 stvx_u $tweak,0,$ivp 2117 2118Lxts_enc_ret: 2119 mtspr 256,r12 # restore vrsave 2120 li r3,0 2121 blr 2122 .long 0 2123 .byte 0,12,0x04,0,0x80,6,6,0 2124 .long 0 2125.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt 2126 2127.globl .${prefix}_xts_decrypt 2128 mr $inp,r3 # reassign 2129 li r3,-1 2130 ${UCMP}i $len,16 2131 bltlr- 2132 2133 lis r0,0xfff8 2134 mfspr r12,256 # save vrsave 2135 li r11,0 2136 mtspr 256,r0 2137 2138 andi. r0,$len,15 2139 neg r0,r0 2140 andi. r0,r0,16 2141 sub $len,$len,r0 2142 2143 vspltisb $seven,0x07 # 0x070707..07 2144 le?lvsl $leperm,r11,r11 2145 le?vspltisb $tmp,0x0f 2146 le?vxor $leperm,$leperm,$seven 2147 2148 li $idx,15 2149 lvx $tweak,0,$ivp # load [unaligned] iv 2150 lvsl $inpperm,0,$ivp 2151 lvx $inptail,$idx,$ivp 2152 le?vxor $inpperm,$inpperm,$tmp 2153 vperm $tweak,$tweak,$inptail,$inpperm 2154 2155 neg r11,$inp 2156 lvsr $inpperm,0,r11 # prepare for unaligned load 2157 lvx $inout,0,$inp 2158 addi $inp,$inp,15 # 15 is not typo 2159 le?vxor $inpperm,$inpperm,$tmp 2160 2161 ${UCMP}i $key2,0 # key2==NULL? 2162 beq Lxts_dec_no_key2 2163 2164 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2165 lwz $rounds,240($key2) 2166 srwi $rounds,$rounds,1 2167 subi $rounds,$rounds,1 2168 li $idx,16 2169 2170 lvx $rndkey0,0,$key2 2171 lvx $rndkey1,$idx,$key2 2172 addi $idx,$idx,16 2173 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2174 vxor $tweak,$tweak,$rndkey0 2175 lvx $rndkey0,$idx,$key2 2176 addi $idx,$idx,16 2177 mtctr $rounds 2178 2179Ltweak_xts_dec: 2180 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2181 vcipher $tweak,$tweak,$rndkey1 2182 lvx $rndkey1,$idx,$key2 2183 addi $idx,$idx,16 2184 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2185 vcipher $tweak,$tweak,$rndkey0 2186 lvx $rndkey0,$idx,$key2 2187 addi $idx,$idx,16 2188 bdnz Ltweak_xts_dec 2189 2190 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2191 vcipher $tweak,$tweak,$rndkey1 2192 lvx $rndkey1,$idx,$key2 2193 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2194 vcipherlast $tweak,$tweak,$rndkey0 2195 2196 li $ivp,0 # don't chain the tweak 2197 b Lxts_dec 2198 2199Lxts_dec_no_key2: 2200 neg $idx,$len 2201 andi. $idx,$idx,15 2202 add $len,$len,$idx # in "tweak chaining" 2203 # mode only complete 2204 # blocks are processed 2205Lxts_dec: 2206 lvx $inptail,0,$inp 2207 addi $inp,$inp,16 2208 2209 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2210 lwz $rounds,240($key1) 2211 srwi $rounds,$rounds,1 2212 subi $rounds,$rounds,1 2213 li $idx,16 2214 2215 vslb $eighty7,$seven,$seven # 0x808080..80 2216 vor $eighty7,$eighty7,$seven # 0x878787..87 2217 vspltisb $tmp,1 # 0x010101..01 2218 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2219 2220 ${UCMP}i $len,96 2221 bge _aesp8_xts_decrypt6x 2222 2223 lvx $rndkey0,0,$key1 2224 lvx $rndkey1,$idx,$key1 2225 addi $idx,$idx,16 2226 vperm $inout,$inout,$inptail,$inpperm 2227 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2228 vxor $inout,$inout,$tweak 2229 vxor $inout,$inout,$rndkey0 2230 lvx $rndkey0,$idx,$key1 2231 addi $idx,$idx,16 2232 mtctr $rounds 2233 2234 ${UCMP}i $len,16 2235 blt Ltail_xts_dec 2236 be?b Loop_xts_dec 2237 2238.align 5 2239Loop_xts_dec: 2240 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2241 vncipher $inout,$inout,$rndkey1 2242 lvx $rndkey1,$idx,$key1 2243 addi $idx,$idx,16 2244 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2245 vncipher $inout,$inout,$rndkey0 2246 lvx $rndkey0,$idx,$key1 2247 addi $idx,$idx,16 2248 bdnz Loop_xts_dec 2249 2250 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2251 vncipher $inout,$inout,$rndkey1 2252 lvx $rndkey1,$idx,$key1 2253 li $idx,16 2254 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2255 vxor $rndkey0,$rndkey0,$tweak 2256 vncipherlast $output,$inout,$rndkey0 2257 2258 le?vperm $tmp,$output,$output,$leperm 2259 be?nop 2260 le?stvx_u $tmp,0,$out 2261 be?stvx_u $output,0,$out 2262 addi $out,$out,16 2263 2264 subic. $len,$len,16 2265 beq Lxts_dec_done 2266 2267 vmr $inout,$inptail 2268 lvx $inptail,0,$inp 2269 addi $inp,$inp,16 2270 lvx $rndkey0,0,$key1 2271 lvx $rndkey1,$idx,$key1 2272 addi $idx,$idx,16 2273 2274 vsrab $tmp,$tweak,$seven # next tweak value 2275 vaddubm $tweak,$tweak,$tweak 2276 vsldoi $tmp,$tmp,$tmp,15 2277 vand $tmp,$tmp,$eighty7 2278 vxor $tweak,$tweak,$tmp 2279 2280 vperm $inout,$inout,$inptail,$inpperm 2281 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2282 vxor $inout,$inout,$tweak 2283 vxor $inout,$inout,$rndkey0 2284 lvx $rndkey0,$idx,$key1 2285 addi $idx,$idx,16 2286 2287 mtctr $rounds 2288 ${UCMP}i $len,16 2289 bge Loop_xts_dec 2290 2291Ltail_xts_dec: 2292 vsrab $tmp,$tweak,$seven # next tweak value 2293 vaddubm $tweak1,$tweak,$tweak 2294 vsldoi $tmp,$tmp,$tmp,15 2295 vand $tmp,$tmp,$eighty7 2296 vxor $tweak1,$tweak1,$tmp 2297 2298 subi $inp,$inp,16 2299 add $inp,$inp,$len 2300 2301 vxor $inout,$inout,$tweak # :-( 2302 vxor $inout,$inout,$tweak1 # :-) 2303 2304Loop_xts_dec_short: 2305 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2306 vncipher $inout,$inout,$rndkey1 2307 lvx $rndkey1,$idx,$key1 2308 addi $idx,$idx,16 2309 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2310 vncipher $inout,$inout,$rndkey0 2311 lvx $rndkey0,$idx,$key1 2312 addi $idx,$idx,16 2313 bdnz Loop_xts_dec_short 2314 2315 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2316 vncipher $inout,$inout,$rndkey1 2317 lvx $rndkey1,$idx,$key1 2318 li $idx,16 2319 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2320 vxor $rndkey0,$rndkey0,$tweak1 2321 vncipherlast $output,$inout,$rndkey0 2322 2323 le?vperm $tmp,$output,$output,$leperm 2324 be?nop 2325 le?stvx_u $tmp,0,$out 2326 be?stvx_u $output,0,$out 2327 2328 vmr $inout,$inptail 2329 lvx $inptail,0,$inp 2330 #addi $inp,$inp,16 2331 lvx $rndkey0,0,$key1 2332 lvx $rndkey1,$idx,$key1 2333 addi $idx,$idx,16 2334 vperm $inout,$inout,$inptail,$inpperm 2335 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2336 2337 lvsr $inpperm,0,$len # $inpperm is no longer needed 2338 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2339 vspltisb $tmp,-1 2340 vperm $inptail,$inptail,$tmp,$inpperm 2341 vsel $inout,$inout,$output,$inptail 2342 2343 vxor $rndkey0,$rndkey0,$tweak 2344 vxor $inout,$inout,$rndkey0 2345 lvx $rndkey0,$idx,$key1 2346 addi $idx,$idx,16 2347 2348 subi r11,$out,1 2349 mtctr $len 2350 li $len,16 2351Loop_xts_dec_steal: 2352 lbzu r0,1(r11) 2353 stb r0,16(r11) 2354 bdnz Loop_xts_dec_steal 2355 2356 mtctr $rounds 2357 b Loop_xts_dec # one more time... 2358 2359Lxts_dec_done: 2360 ${UCMP}i $ivp,0 2361 beq Lxts_dec_ret 2362 2363 vsrab $tmp,$tweak,$seven # next tweak value 2364 vaddubm $tweak,$tweak,$tweak 2365 vsldoi $tmp,$tmp,$tmp,15 2366 vand $tmp,$tmp,$eighty7 2367 vxor $tweak,$tweak,$tmp 2368 2369 le?vperm $tweak,$tweak,$tweak,$leperm 2370 stvx_u $tweak,0,$ivp 2371 2372Lxts_dec_ret: 2373 mtspr 256,r12 # restore vrsave 2374 li r3,0 2375 blr 2376 .long 0 2377 .byte 0,12,0x04,0,0x80,6,6,0 2378 .long 0 2379.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt 2380___ 2381######################################################################### 2382{{ # Optimized XTS procedures # 2383my $key_=$key2; 2384my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); 2385 $x00=0 if ($flavour =~ /osx/); 2386my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); 2387my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); 2388my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); 2389my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 2390 # v26-v31 last 6 round keys 2391my ($keyperm)=($out0); # aliases with "caller", redundant assignment 2392my $taillen=$x70; 2393 2394$code.=<<___; 2395.align 5 2396_aesp8_xts_encrypt6x: 2397 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 2398 mflr r11 2399 li r7,`$FRAME+8*16+15` 2400 li r3,`$FRAME+8*16+31` 2401 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 2402 stvx v20,r7,$sp # ABI says so 2403 addi r7,r7,32 2404 stvx v21,r3,$sp 2405 addi r3,r3,32 2406 stvx v22,r7,$sp 2407 addi r7,r7,32 2408 stvx v23,r3,$sp 2409 addi r3,r3,32 2410 stvx v24,r7,$sp 2411 addi r7,r7,32 2412 stvx v25,r3,$sp 2413 addi r3,r3,32 2414 stvx v26,r7,$sp 2415 addi r7,r7,32 2416 stvx v27,r3,$sp 2417 addi r3,r3,32 2418 stvx v28,r7,$sp 2419 addi r7,r7,32 2420 stvx v29,r3,$sp 2421 addi r3,r3,32 2422 stvx v30,r7,$sp 2423 stvx v31,r3,$sp 2424 li r0,-1 2425 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 2426 li $x10,0x10 2427 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2428 li $x20,0x20 2429 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2430 li $x30,0x30 2431 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2432 li $x40,0x40 2433 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2434 li $x50,0x50 2435 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2436 li $x60,0x60 2437 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2438 li $x70,0x70 2439 mtspr 256,r0 2440 2441 subi $rounds,$rounds,3 # -4 in total 2442 2443 lvx $rndkey0,$x00,$key1 # load key schedule 2444 lvx v30,$x10,$key1 2445 addi $key1,$key1,0x20 2446 lvx v31,$x00,$key1 2447 ?vperm $rndkey0,$rndkey0,v30,$keyperm 2448 addi $key_,$sp,$FRAME+15 2449 mtctr $rounds 2450 2451Load_xts_enc_key: 2452 ?vperm v24,v30,v31,$keyperm 2453 lvx v30,$x10,$key1 2454 addi $key1,$key1,0x20 2455 stvx v24,$x00,$key_ # off-load round[1] 2456 ?vperm v25,v31,v30,$keyperm 2457 lvx v31,$x00,$key1 2458 stvx v25,$x10,$key_ # off-load round[2] 2459 addi $key_,$key_,0x20 2460 bdnz Load_xts_enc_key 2461 2462 lvx v26,$x10,$key1 2463 ?vperm v24,v30,v31,$keyperm 2464 lvx v27,$x20,$key1 2465 stvx v24,$x00,$key_ # off-load round[3] 2466 ?vperm v25,v31,v26,$keyperm 2467 lvx v28,$x30,$key1 2468 stvx v25,$x10,$key_ # off-load round[4] 2469 addi $key_,$sp,$FRAME+15 # rewind $key_ 2470 ?vperm v26,v26,v27,$keyperm 2471 lvx v29,$x40,$key1 2472 ?vperm v27,v27,v28,$keyperm 2473 lvx v30,$x50,$key1 2474 ?vperm v28,v28,v29,$keyperm 2475 lvx v31,$x60,$key1 2476 ?vperm v29,v29,v30,$keyperm 2477 lvx $twk5,$x70,$key1 # borrow $twk5 2478 ?vperm v30,v30,v31,$keyperm 2479 lvx v24,$x00,$key_ # pre-load round[1] 2480 ?vperm v31,v31,$twk5,$keyperm 2481 lvx v25,$x10,$key_ # pre-load round[2] 2482 2483 vperm $in0,$inout,$inptail,$inpperm 2484 subi $inp,$inp,31 # undo "caller" 2485 vxor $twk0,$tweak,$rndkey0 2486 vsrab $tmp,$tweak,$seven # next tweak value 2487 vaddubm $tweak,$tweak,$tweak 2488 vsldoi $tmp,$tmp,$tmp,15 2489 vand $tmp,$tmp,$eighty7 2490 vxor $out0,$in0,$twk0 2491 vxor $tweak,$tweak,$tmp 2492 2493 lvx_u $in1,$x10,$inp 2494 vxor $twk1,$tweak,$rndkey0 2495 vsrab $tmp,$tweak,$seven # next tweak value 2496 vaddubm $tweak,$tweak,$tweak 2497 vsldoi $tmp,$tmp,$tmp,15 2498 le?vperm $in1,$in1,$in1,$leperm 2499 vand $tmp,$tmp,$eighty7 2500 vxor $out1,$in1,$twk1 2501 vxor $tweak,$tweak,$tmp 2502 2503 lvx_u $in2,$x20,$inp 2504 andi. $taillen,$len,15 2505 vxor $twk2,$tweak,$rndkey0 2506 vsrab $tmp,$tweak,$seven # next tweak value 2507 vaddubm $tweak,$tweak,$tweak 2508 vsldoi $tmp,$tmp,$tmp,15 2509 le?vperm $in2,$in2,$in2,$leperm 2510 vand $tmp,$tmp,$eighty7 2511 vxor $out2,$in2,$twk2 2512 vxor $tweak,$tweak,$tmp 2513 2514 lvx_u $in3,$x30,$inp 2515 sub $len,$len,$taillen 2516 vxor $twk3,$tweak,$rndkey0 2517 vsrab $tmp,$tweak,$seven # next tweak value 2518 vaddubm $tweak,$tweak,$tweak 2519 vsldoi $tmp,$tmp,$tmp,15 2520 le?vperm $in3,$in3,$in3,$leperm 2521 vand $tmp,$tmp,$eighty7 2522 vxor $out3,$in3,$twk3 2523 vxor $tweak,$tweak,$tmp 2524 2525 lvx_u $in4,$x40,$inp 2526 subi $len,$len,0x60 2527 vxor $twk4,$tweak,$rndkey0 2528 vsrab $tmp,$tweak,$seven # next tweak value 2529 vaddubm $tweak,$tweak,$tweak 2530 vsldoi $tmp,$tmp,$tmp,15 2531 le?vperm $in4,$in4,$in4,$leperm 2532 vand $tmp,$tmp,$eighty7 2533 vxor $out4,$in4,$twk4 2534 vxor $tweak,$tweak,$tmp 2535 2536 lvx_u $in5,$x50,$inp 2537 addi $inp,$inp,0x60 2538 vxor $twk5,$tweak,$rndkey0 2539 vsrab $tmp,$tweak,$seven # next tweak value 2540 vaddubm $tweak,$tweak,$tweak 2541 vsldoi $tmp,$tmp,$tmp,15 2542 le?vperm $in5,$in5,$in5,$leperm 2543 vand $tmp,$tmp,$eighty7 2544 vxor $out5,$in5,$twk5 2545 vxor $tweak,$tweak,$tmp 2546 2547 vxor v31,v31,$rndkey0 2548 mtctr $rounds 2549 b Loop_xts_enc6x 2550 2551.align 5 2552Loop_xts_enc6x: 2553 vcipher $out0,$out0,v24 2554 vcipher $out1,$out1,v24 2555 vcipher $out2,$out2,v24 2556 vcipher $out3,$out3,v24 2557 vcipher $out4,$out4,v24 2558 vcipher $out5,$out5,v24 2559 lvx v24,$x20,$key_ # round[3] 2560 addi $key_,$key_,0x20 2561 2562 vcipher $out0,$out0,v25 2563 vcipher $out1,$out1,v25 2564 vcipher $out2,$out2,v25 2565 vcipher $out3,$out3,v25 2566 vcipher $out4,$out4,v25 2567 vcipher $out5,$out5,v25 2568 lvx v25,$x10,$key_ # round[4] 2569 bdnz Loop_xts_enc6x 2570 2571 subic $len,$len,96 # $len-=96 2572 vxor $in0,$twk0,v31 # xor with last round key 2573 vcipher $out0,$out0,v24 2574 vcipher $out1,$out1,v24 2575 vsrab $tmp,$tweak,$seven # next tweak value 2576 vxor $twk0,$tweak,$rndkey0 2577 vaddubm $tweak,$tweak,$tweak 2578 vcipher $out2,$out2,v24 2579 vcipher $out3,$out3,v24 2580 vsldoi $tmp,$tmp,$tmp,15 2581 vcipher $out4,$out4,v24 2582 vcipher $out5,$out5,v24 2583 2584 subfe. r0,r0,r0 # borrow?-1:0 2585 vand $tmp,$tmp,$eighty7 2586 vcipher $out0,$out0,v25 2587 vcipher $out1,$out1,v25 2588 vxor $tweak,$tweak,$tmp 2589 vcipher $out2,$out2,v25 2590 vcipher $out3,$out3,v25 2591 vxor $in1,$twk1,v31 2592 vsrab $tmp,$tweak,$seven # next tweak value 2593 vxor $twk1,$tweak,$rndkey0 2594 vcipher $out4,$out4,v25 2595 vcipher $out5,$out5,v25 2596 2597 and r0,r0,$len 2598 vaddubm $tweak,$tweak,$tweak 2599 vsldoi $tmp,$tmp,$tmp,15 2600 vcipher $out0,$out0,v26 2601 vcipher $out1,$out1,v26 2602 vand $tmp,$tmp,$eighty7 2603 vcipher $out2,$out2,v26 2604 vcipher $out3,$out3,v26 2605 vxor $tweak,$tweak,$tmp 2606 vcipher $out4,$out4,v26 2607 vcipher $out5,$out5,v26 2608 2609 add $inp,$inp,r0 # $inp is adjusted in such 2610 # way that at exit from the 2611 # loop inX-in5 are loaded 2612 # with last "words" 2613 vxor $in2,$twk2,v31 2614 vsrab $tmp,$tweak,$seven # next tweak value 2615 vxor $twk2,$tweak,$rndkey0 2616 vaddubm $tweak,$tweak,$tweak 2617 vcipher $out0,$out0,v27 2618 vcipher $out1,$out1,v27 2619 vsldoi $tmp,$tmp,$tmp,15 2620 vcipher $out2,$out2,v27 2621 vcipher $out3,$out3,v27 2622 vand $tmp,$tmp,$eighty7 2623 vcipher $out4,$out4,v27 2624 vcipher $out5,$out5,v27 2625 2626 addi $key_,$sp,$FRAME+15 # rewind $key_ 2627 vxor $tweak,$tweak,$tmp 2628 vcipher $out0,$out0,v28 2629 vcipher $out1,$out1,v28 2630 vxor $in3,$twk3,v31 2631 vsrab $tmp,$tweak,$seven # next tweak value 2632 vxor $twk3,$tweak,$rndkey0 2633 vcipher $out2,$out2,v28 2634 vcipher $out3,$out3,v28 2635 vaddubm $tweak,$tweak,$tweak 2636 vsldoi $tmp,$tmp,$tmp,15 2637 vcipher $out4,$out4,v28 2638 vcipher $out5,$out5,v28 2639 lvx v24,$x00,$key_ # re-pre-load round[1] 2640 vand $tmp,$tmp,$eighty7 2641 2642 vcipher $out0,$out0,v29 2643 vcipher $out1,$out1,v29 2644 vxor $tweak,$tweak,$tmp 2645 vcipher $out2,$out2,v29 2646 vcipher $out3,$out3,v29 2647 vxor $in4,$twk4,v31 2648 vsrab $tmp,$tweak,$seven # next tweak value 2649 vxor $twk4,$tweak,$rndkey0 2650 vcipher $out4,$out4,v29 2651 vcipher $out5,$out5,v29 2652 lvx v25,$x10,$key_ # re-pre-load round[2] 2653 vaddubm $tweak,$tweak,$tweak 2654 vsldoi $tmp,$tmp,$tmp,15 2655 2656 vcipher $out0,$out0,v30 2657 vcipher $out1,$out1,v30 2658 vand $tmp,$tmp,$eighty7 2659 vcipher $out2,$out2,v30 2660 vcipher $out3,$out3,v30 2661 vxor $tweak,$tweak,$tmp 2662 vcipher $out4,$out4,v30 2663 vcipher $out5,$out5,v30 2664 vxor $in5,$twk5,v31 2665 vsrab $tmp,$tweak,$seven # next tweak value 2666 vxor $twk5,$tweak,$rndkey0 2667 2668 vcipherlast $out0,$out0,$in0 2669 lvx_u $in0,$x00,$inp # load next input block 2670 vaddubm $tweak,$tweak,$tweak 2671 vsldoi $tmp,$tmp,$tmp,15 2672 vcipherlast $out1,$out1,$in1 2673 lvx_u $in1,$x10,$inp 2674 vcipherlast $out2,$out2,$in2 2675 le?vperm $in0,$in0,$in0,$leperm 2676 lvx_u $in2,$x20,$inp 2677 vand $tmp,$tmp,$eighty7 2678 vcipherlast $out3,$out3,$in3 2679 le?vperm $in1,$in1,$in1,$leperm 2680 lvx_u $in3,$x30,$inp 2681 vcipherlast $out4,$out4,$in4 2682 le?vperm $in2,$in2,$in2,$leperm 2683 lvx_u $in4,$x40,$inp 2684 vxor $tweak,$tweak,$tmp 2685 vcipherlast $tmp,$out5,$in5 # last block might be needed 2686 # in stealing mode 2687 le?vperm $in3,$in3,$in3,$leperm 2688 lvx_u $in5,$x50,$inp 2689 addi $inp,$inp,0x60 2690 le?vperm $in4,$in4,$in4,$leperm 2691 le?vperm $in5,$in5,$in5,$leperm 2692 2693 le?vperm $out0,$out0,$out0,$leperm 2694 le?vperm $out1,$out1,$out1,$leperm 2695 stvx_u $out0,$x00,$out # store output 2696 vxor $out0,$in0,$twk0 2697 le?vperm $out2,$out2,$out2,$leperm 2698 stvx_u $out1,$x10,$out 2699 vxor $out1,$in1,$twk1 2700 le?vperm $out3,$out3,$out3,$leperm 2701 stvx_u $out2,$x20,$out 2702 vxor $out2,$in2,$twk2 2703 le?vperm $out4,$out4,$out4,$leperm 2704 stvx_u $out3,$x30,$out 2705 vxor $out3,$in3,$twk3 2706 le?vperm $out5,$tmp,$tmp,$leperm 2707 stvx_u $out4,$x40,$out 2708 vxor $out4,$in4,$twk4 2709 le?stvx_u $out5,$x50,$out 2710 be?stvx_u $tmp, $x50,$out 2711 vxor $out5,$in5,$twk5 2712 addi $out,$out,0x60 2713 2714 mtctr $rounds 2715 beq Loop_xts_enc6x # did $len-=96 borrow? 2716 2717 addic. $len,$len,0x60 2718 beq Lxts_enc6x_zero 2719 cmpwi $len,0x20 2720 blt Lxts_enc6x_one 2721 nop 2722 beq Lxts_enc6x_two 2723 cmpwi $len,0x40 2724 blt Lxts_enc6x_three 2725 nop 2726 beq Lxts_enc6x_four 2727 2728Lxts_enc6x_five: 2729 vxor $out0,$in1,$twk0 2730 vxor $out1,$in2,$twk1 2731 vxor $out2,$in3,$twk2 2732 vxor $out3,$in4,$twk3 2733 vxor $out4,$in5,$twk4 2734 2735 bl _aesp8_xts_enc5x 2736 2737 le?vperm $out0,$out0,$out0,$leperm 2738 vmr $twk0,$twk5 # unused tweak 2739 le?vperm $out1,$out1,$out1,$leperm 2740 stvx_u $out0,$x00,$out # store output 2741 le?vperm $out2,$out2,$out2,$leperm 2742 stvx_u $out1,$x10,$out 2743 le?vperm $out3,$out3,$out3,$leperm 2744 stvx_u $out2,$x20,$out 2745 vxor $tmp,$out4,$twk5 # last block prep for stealing 2746 le?vperm $out4,$out4,$out4,$leperm 2747 stvx_u $out3,$x30,$out 2748 stvx_u $out4,$x40,$out 2749 addi $out,$out,0x50 2750 bne Lxts_enc6x_steal 2751 b Lxts_enc6x_done 2752 2753.align 4 2754Lxts_enc6x_four: 2755 vxor $out0,$in2,$twk0 2756 vxor $out1,$in3,$twk1 2757 vxor $out2,$in4,$twk2 2758 vxor $out3,$in5,$twk3 2759 vxor $out4,$out4,$out4 2760 2761 bl _aesp8_xts_enc5x 2762 2763 le?vperm $out0,$out0,$out0,$leperm 2764 vmr $twk0,$twk4 # unused tweak 2765 le?vperm $out1,$out1,$out1,$leperm 2766 stvx_u $out0,$x00,$out # store output 2767 le?vperm $out2,$out2,$out2,$leperm 2768 stvx_u $out1,$x10,$out 2769 vxor $tmp,$out3,$twk4 # last block prep for stealing 2770 le?vperm $out3,$out3,$out3,$leperm 2771 stvx_u $out2,$x20,$out 2772 stvx_u $out3,$x30,$out 2773 addi $out,$out,0x40 2774 bne Lxts_enc6x_steal 2775 b Lxts_enc6x_done 2776 2777.align 4 2778Lxts_enc6x_three: 2779 vxor $out0,$in3,$twk0 2780 vxor $out1,$in4,$twk1 2781 vxor $out2,$in5,$twk2 2782 vxor $out3,$out3,$out3 2783 vxor $out4,$out4,$out4 2784 2785 bl _aesp8_xts_enc5x 2786 2787 le?vperm $out0,$out0,$out0,$leperm 2788 vmr $twk0,$twk3 # unused tweak 2789 le?vperm $out1,$out1,$out1,$leperm 2790 stvx_u $out0,$x00,$out # store output 2791 vxor $tmp,$out2,$twk3 # last block prep for stealing 2792 le?vperm $out2,$out2,$out2,$leperm 2793 stvx_u $out1,$x10,$out 2794 stvx_u $out2,$x20,$out 2795 addi $out,$out,0x30 2796 bne Lxts_enc6x_steal 2797 b Lxts_enc6x_done 2798 2799.align 4 2800Lxts_enc6x_two: 2801 vxor $out0,$in4,$twk0 2802 vxor $out1,$in5,$twk1 2803 vxor $out2,$out2,$out2 2804 vxor $out3,$out3,$out3 2805 vxor $out4,$out4,$out4 2806 2807 bl _aesp8_xts_enc5x 2808 2809 le?vperm $out0,$out0,$out0,$leperm 2810 vmr $twk0,$twk2 # unused tweak 2811 vxor $tmp,$out1,$twk2 # last block prep for stealing 2812 le?vperm $out1,$out1,$out1,$leperm 2813 stvx_u $out0,$x00,$out # store output 2814 stvx_u $out1,$x10,$out 2815 addi $out,$out,0x20 2816 bne Lxts_enc6x_steal 2817 b Lxts_enc6x_done 2818 2819.align 4 2820Lxts_enc6x_one: 2821 vxor $out0,$in5,$twk0 2822 nop 2823Loop_xts_enc1x: 2824 vcipher $out0,$out0,v24 2825 lvx v24,$x20,$key_ # round[3] 2826 addi $key_,$key_,0x20 2827 2828 vcipher $out0,$out0,v25 2829 lvx v25,$x10,$key_ # round[4] 2830 bdnz Loop_xts_enc1x 2831 2832 add $inp,$inp,$taillen 2833 cmpwi $taillen,0 2834 vcipher $out0,$out0,v24 2835 2836 subi $inp,$inp,16 2837 vcipher $out0,$out0,v25 2838 2839 lvsr $inpperm,0,$taillen 2840 vcipher $out0,$out0,v26 2841 2842 lvx_u $in0,0,$inp 2843 vcipher $out0,$out0,v27 2844 2845 addi $key_,$sp,$FRAME+15 # rewind $key_ 2846 vcipher $out0,$out0,v28 2847 lvx v24,$x00,$key_ # re-pre-load round[1] 2848 2849 vcipher $out0,$out0,v29 2850 lvx v25,$x10,$key_ # re-pre-load round[2] 2851 vxor $twk0,$twk0,v31 2852 2853 le?vperm $in0,$in0,$in0,$leperm 2854 vcipher $out0,$out0,v30 2855 2856 vperm $in0,$in0,$in0,$inpperm 2857 vcipherlast $out0,$out0,$twk0 2858 2859 vmr $twk0,$twk1 # unused tweak 2860 vxor $tmp,$out0,$twk1 # last block prep for stealing 2861 le?vperm $out0,$out0,$out0,$leperm 2862 stvx_u $out0,$x00,$out # store output 2863 addi $out,$out,0x10 2864 bne Lxts_enc6x_steal 2865 b Lxts_enc6x_done 2866 2867.align 4 2868Lxts_enc6x_zero: 2869 cmpwi $taillen,0 2870 beq Lxts_enc6x_done 2871 2872 add $inp,$inp,$taillen 2873 subi $inp,$inp,16 2874 lvx_u $in0,0,$inp 2875 lvsr $inpperm,0,$taillen # $in5 is no more 2876 le?vperm $in0,$in0,$in0,$leperm 2877 vperm $in0,$in0,$in0,$inpperm 2878 vxor $tmp,$tmp,$twk0 2879Lxts_enc6x_steal: 2880 vxor $in0,$in0,$twk0 2881 vxor $out0,$out0,$out0 2882 vspltisb $out1,-1 2883 vperm $out0,$out0,$out1,$inpperm 2884 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? 2885 2886 subi r30,$out,17 2887 subi $out,$out,16 2888 mtctr $taillen 2889Loop_xts_enc6x_steal: 2890 lbzu r0,1(r30) 2891 stb r0,16(r30) 2892 bdnz Loop_xts_enc6x_steal 2893 2894 li $taillen,0 2895 mtctr $rounds 2896 b Loop_xts_enc1x # one more time... 2897 2898.align 4 2899Lxts_enc6x_done: 2900 ${UCMP}i $ivp,0 2901 beq Lxts_enc6x_ret 2902 2903 vxor $tweak,$twk0,$rndkey0 2904 le?vperm $tweak,$tweak,$tweak,$leperm 2905 stvx_u $tweak,0,$ivp 2906 2907Lxts_enc6x_ret: 2908 mtlr r11 2909 li r10,`$FRAME+15` 2910 li r11,`$FRAME+31` 2911 stvx $seven,r10,$sp # wipe copies of round keys 2912 addi r10,r10,32 2913 stvx $seven,r11,$sp 2914 addi r11,r11,32 2915 stvx $seven,r10,$sp 2916 addi r10,r10,32 2917 stvx $seven,r11,$sp 2918 addi r11,r11,32 2919 stvx $seven,r10,$sp 2920 addi r10,r10,32 2921 stvx $seven,r11,$sp 2922 addi r11,r11,32 2923 stvx $seven,r10,$sp 2924 addi r10,r10,32 2925 stvx $seven,r11,$sp 2926 addi r11,r11,32 2927 2928 mtspr 256,$vrsave 2929 lvx v20,r10,$sp # ABI says so 2930 addi r10,r10,32 2931 lvx v21,r11,$sp 2932 addi r11,r11,32 2933 lvx v22,r10,$sp 2934 addi r10,r10,32 2935 lvx v23,r11,$sp 2936 addi r11,r11,32 2937 lvx v24,r10,$sp 2938 addi r10,r10,32 2939 lvx v25,r11,$sp 2940 addi r11,r11,32 2941 lvx v26,r10,$sp 2942 addi r10,r10,32 2943 lvx v27,r11,$sp 2944 addi r11,r11,32 2945 lvx v28,r10,$sp 2946 addi r10,r10,32 2947 lvx v29,r11,$sp 2948 addi r11,r11,32 2949 lvx v30,r10,$sp 2950 lvx v31,r11,$sp 2951 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2952 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2953 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2954 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2955 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2956 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2957 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 2958 blr 2959 .long 0 2960 .byte 0,12,0x04,1,0x80,6,6,0 2961 .long 0 2962 2963.align 5 2964_aesp8_xts_enc5x: 2965 vcipher $out0,$out0,v24 2966 vcipher $out1,$out1,v24 2967 vcipher $out2,$out2,v24 2968 vcipher $out3,$out3,v24 2969 vcipher $out4,$out4,v24 2970 lvx v24,$x20,$key_ # round[3] 2971 addi $key_,$key_,0x20 2972 2973 vcipher $out0,$out0,v25 2974 vcipher $out1,$out1,v25 2975 vcipher $out2,$out2,v25 2976 vcipher $out3,$out3,v25 2977 vcipher $out4,$out4,v25 2978 lvx v25,$x10,$key_ # round[4] 2979 bdnz _aesp8_xts_enc5x 2980 2981 add $inp,$inp,$taillen 2982 cmpwi $taillen,0 2983 vcipher $out0,$out0,v24 2984 vcipher $out1,$out1,v24 2985 vcipher $out2,$out2,v24 2986 vcipher $out3,$out3,v24 2987 vcipher $out4,$out4,v24 2988 2989 subi $inp,$inp,16 2990 vcipher $out0,$out0,v25 2991 vcipher $out1,$out1,v25 2992 vcipher $out2,$out2,v25 2993 vcipher $out3,$out3,v25 2994 vcipher $out4,$out4,v25 2995 vxor $twk0,$twk0,v31 2996 2997 vcipher $out0,$out0,v26 2998 lvsr $inpperm,r0,$taillen # $in5 is no more 2999 vcipher $out1,$out1,v26 3000 vcipher $out2,$out2,v26 3001 vcipher $out3,$out3,v26 3002 vcipher $out4,$out4,v26 3003 vxor $in1,$twk1,v31 3004 3005 vcipher $out0,$out0,v27 3006 lvx_u $in0,0,$inp 3007 vcipher $out1,$out1,v27 3008 vcipher $out2,$out2,v27 3009 vcipher $out3,$out3,v27 3010 vcipher $out4,$out4,v27 3011 vxor $in2,$twk2,v31 3012 3013 addi $key_,$sp,$FRAME+15 # rewind $key_ 3014 vcipher $out0,$out0,v28 3015 vcipher $out1,$out1,v28 3016 vcipher $out2,$out2,v28 3017 vcipher $out3,$out3,v28 3018 vcipher $out4,$out4,v28 3019 lvx v24,$x00,$key_ # re-pre-load round[1] 3020 vxor $in3,$twk3,v31 3021 3022 vcipher $out0,$out0,v29 3023 le?vperm $in0,$in0,$in0,$leperm 3024 vcipher $out1,$out1,v29 3025 vcipher $out2,$out2,v29 3026 vcipher $out3,$out3,v29 3027 vcipher $out4,$out4,v29 3028 lvx v25,$x10,$key_ # re-pre-load round[2] 3029 vxor $in4,$twk4,v31 3030 3031 vcipher $out0,$out0,v30 3032 vperm $in0,$in0,$in0,$inpperm 3033 vcipher $out1,$out1,v30 3034 vcipher $out2,$out2,v30 3035 vcipher $out3,$out3,v30 3036 vcipher $out4,$out4,v30 3037 3038 vcipherlast $out0,$out0,$twk0 3039 vcipherlast $out1,$out1,$in1 3040 vcipherlast $out2,$out2,$in2 3041 vcipherlast $out3,$out3,$in3 3042 vcipherlast $out4,$out4,$in4 3043 blr 3044 .long 0 3045 .byte 0,12,0x14,0,0,0,0,0 3046 3047.align 5 3048_aesp8_xts_decrypt6x: 3049 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 3050 mflr r11 3051 li r7,`$FRAME+8*16+15` 3052 li r3,`$FRAME+8*16+31` 3053 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 3054 stvx v20,r7,$sp # ABI says so 3055 addi r7,r7,32 3056 stvx v21,r3,$sp 3057 addi r3,r3,32 3058 stvx v22,r7,$sp 3059 addi r7,r7,32 3060 stvx v23,r3,$sp 3061 addi r3,r3,32 3062 stvx v24,r7,$sp 3063 addi r7,r7,32 3064 stvx v25,r3,$sp 3065 addi r3,r3,32 3066 stvx v26,r7,$sp 3067 addi r7,r7,32 3068 stvx v27,r3,$sp 3069 addi r3,r3,32 3070 stvx v28,r7,$sp 3071 addi r7,r7,32 3072 stvx v29,r3,$sp 3073 addi r3,r3,32 3074 stvx v30,r7,$sp 3075 stvx v31,r3,$sp 3076 li r0,-1 3077 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 3078 li $x10,0x10 3079 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3080 li $x20,0x20 3081 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3082 li $x30,0x30 3083 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3084 li $x40,0x40 3085 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3086 li $x50,0x50 3087 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3088 li $x60,0x60 3089 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3090 li $x70,0x70 3091 mtspr 256,r0 3092 3093 subi $rounds,$rounds,3 # -4 in total 3094 3095 lvx $rndkey0,$x00,$key1 # load key schedule 3096 lvx v30,$x10,$key1 3097 addi $key1,$key1,0x20 3098 lvx v31,$x00,$key1 3099 ?vperm $rndkey0,$rndkey0,v30,$keyperm 3100 addi $key_,$sp,$FRAME+15 3101 mtctr $rounds 3102 3103Load_xts_dec_key: 3104 ?vperm v24,v30,v31,$keyperm 3105 lvx v30,$x10,$key1 3106 addi $key1,$key1,0x20 3107 stvx v24,$x00,$key_ # off-load round[1] 3108 ?vperm v25,v31,v30,$keyperm 3109 lvx v31,$x00,$key1 3110 stvx v25,$x10,$key_ # off-load round[2] 3111 addi $key_,$key_,0x20 3112 bdnz Load_xts_dec_key 3113 3114 lvx v26,$x10,$key1 3115 ?vperm v24,v30,v31,$keyperm 3116 lvx v27,$x20,$key1 3117 stvx v24,$x00,$key_ # off-load round[3] 3118 ?vperm v25,v31,v26,$keyperm 3119 lvx v28,$x30,$key1 3120 stvx v25,$x10,$key_ # off-load round[4] 3121 addi $key_,$sp,$FRAME+15 # rewind $key_ 3122 ?vperm v26,v26,v27,$keyperm 3123 lvx v29,$x40,$key1 3124 ?vperm v27,v27,v28,$keyperm 3125 lvx v30,$x50,$key1 3126 ?vperm v28,v28,v29,$keyperm 3127 lvx v31,$x60,$key1 3128 ?vperm v29,v29,v30,$keyperm 3129 lvx $twk5,$x70,$key1 # borrow $twk5 3130 ?vperm v30,v30,v31,$keyperm 3131 lvx v24,$x00,$key_ # pre-load round[1] 3132 ?vperm v31,v31,$twk5,$keyperm 3133 lvx v25,$x10,$key_ # pre-load round[2] 3134 3135 vperm $in0,$inout,$inptail,$inpperm 3136 subi $inp,$inp,31 # undo "caller" 3137 vxor $twk0,$tweak,$rndkey0 3138 vsrab $tmp,$tweak,$seven # next tweak value 3139 vaddubm $tweak,$tweak,$tweak 3140 vsldoi $tmp,$tmp,$tmp,15 3141 vand $tmp,$tmp,$eighty7 3142 vxor $out0,$in0,$twk0 3143 vxor $tweak,$tweak,$tmp 3144 3145 lvx_u $in1,$x10,$inp 3146 vxor $twk1,$tweak,$rndkey0 3147 vsrab $tmp,$tweak,$seven # next tweak value 3148 vaddubm $tweak,$tweak,$tweak 3149 vsldoi $tmp,$tmp,$tmp,15 3150 le?vperm $in1,$in1,$in1,$leperm 3151 vand $tmp,$tmp,$eighty7 3152 vxor $out1,$in1,$twk1 3153 vxor $tweak,$tweak,$tmp 3154 3155 lvx_u $in2,$x20,$inp 3156 andi. $taillen,$len,15 3157 vxor $twk2,$tweak,$rndkey0 3158 vsrab $tmp,$tweak,$seven # next tweak value 3159 vaddubm $tweak,$tweak,$tweak 3160 vsldoi $tmp,$tmp,$tmp,15 3161 le?vperm $in2,$in2,$in2,$leperm 3162 vand $tmp,$tmp,$eighty7 3163 vxor $out2,$in2,$twk2 3164 vxor $tweak,$tweak,$tmp 3165 3166 lvx_u $in3,$x30,$inp 3167 sub $len,$len,$taillen 3168 vxor $twk3,$tweak,$rndkey0 3169 vsrab $tmp,$tweak,$seven # next tweak value 3170 vaddubm $tweak,$tweak,$tweak 3171 vsldoi $tmp,$tmp,$tmp,15 3172 le?vperm $in3,$in3,$in3,$leperm 3173 vand $tmp,$tmp,$eighty7 3174 vxor $out3,$in3,$twk3 3175 vxor $tweak,$tweak,$tmp 3176 3177 lvx_u $in4,$x40,$inp 3178 subi $len,$len,0x60 3179 vxor $twk4,$tweak,$rndkey0 3180 vsrab $tmp,$tweak,$seven # next tweak value 3181 vaddubm $tweak,$tweak,$tweak 3182 vsldoi $tmp,$tmp,$tmp,15 3183 le?vperm $in4,$in4,$in4,$leperm 3184 vand $tmp,$tmp,$eighty7 3185 vxor $out4,$in4,$twk4 3186 vxor $tweak,$tweak,$tmp 3187 3188 lvx_u $in5,$x50,$inp 3189 addi $inp,$inp,0x60 3190 vxor $twk5,$tweak,$rndkey0 3191 vsrab $tmp,$tweak,$seven # next tweak value 3192 vaddubm $tweak,$tweak,$tweak 3193 vsldoi $tmp,$tmp,$tmp,15 3194 le?vperm $in5,$in5,$in5,$leperm 3195 vand $tmp,$tmp,$eighty7 3196 vxor $out5,$in5,$twk5 3197 vxor $tweak,$tweak,$tmp 3198 3199 vxor v31,v31,$rndkey0 3200 mtctr $rounds 3201 b Loop_xts_dec6x 3202 3203.align 5 3204Loop_xts_dec6x: 3205 vncipher $out0,$out0,v24 3206 vncipher $out1,$out1,v24 3207 vncipher $out2,$out2,v24 3208 vncipher $out3,$out3,v24 3209 vncipher $out4,$out4,v24 3210 vncipher $out5,$out5,v24 3211 lvx v24,$x20,$key_ # round[3] 3212 addi $key_,$key_,0x20 3213 3214 vncipher $out0,$out0,v25 3215 vncipher $out1,$out1,v25 3216 vncipher $out2,$out2,v25 3217 vncipher $out3,$out3,v25 3218 vncipher $out4,$out4,v25 3219 vncipher $out5,$out5,v25 3220 lvx v25,$x10,$key_ # round[4] 3221 bdnz Loop_xts_dec6x 3222 3223 subic $len,$len,96 # $len-=96 3224 vxor $in0,$twk0,v31 # xor with last round key 3225 vncipher $out0,$out0,v24 3226 vncipher $out1,$out1,v24 3227 vsrab $tmp,$tweak,$seven # next tweak value 3228 vxor $twk0,$tweak,$rndkey0 3229 vaddubm $tweak,$tweak,$tweak 3230 vncipher $out2,$out2,v24 3231 vncipher $out3,$out3,v24 3232 vsldoi $tmp,$tmp,$tmp,15 3233 vncipher $out4,$out4,v24 3234 vncipher $out5,$out5,v24 3235 3236 subfe. r0,r0,r0 # borrow?-1:0 3237 vand $tmp,$tmp,$eighty7 3238 vncipher $out0,$out0,v25 3239 vncipher $out1,$out1,v25 3240 vxor $tweak,$tweak,$tmp 3241 vncipher $out2,$out2,v25 3242 vncipher $out3,$out3,v25 3243 vxor $in1,$twk1,v31 3244 vsrab $tmp,$tweak,$seven # next tweak value 3245 vxor $twk1,$tweak,$rndkey0 3246 vncipher $out4,$out4,v25 3247 vncipher $out5,$out5,v25 3248 3249 and r0,r0,$len 3250 vaddubm $tweak,$tweak,$tweak 3251 vsldoi $tmp,$tmp,$tmp,15 3252 vncipher $out0,$out0,v26 3253 vncipher $out1,$out1,v26 3254 vand $tmp,$tmp,$eighty7 3255 vncipher $out2,$out2,v26 3256 vncipher $out3,$out3,v26 3257 vxor $tweak,$tweak,$tmp 3258 vncipher $out4,$out4,v26 3259 vncipher $out5,$out5,v26 3260 3261 add $inp,$inp,r0 # $inp is adjusted in such 3262 # way that at exit from the 3263 # loop inX-in5 are loaded 3264 # with last "words" 3265 vxor $in2,$twk2,v31 3266 vsrab $tmp,$tweak,$seven # next tweak value 3267 vxor $twk2,$tweak,$rndkey0 3268 vaddubm $tweak,$tweak,$tweak 3269 vncipher $out0,$out0,v27 3270 vncipher $out1,$out1,v27 3271 vsldoi $tmp,$tmp,$tmp,15 3272 vncipher $out2,$out2,v27 3273 vncipher $out3,$out3,v27 3274 vand $tmp,$tmp,$eighty7 3275 vncipher $out4,$out4,v27 3276 vncipher $out5,$out5,v27 3277 3278 addi $key_,$sp,$FRAME+15 # rewind $key_ 3279 vxor $tweak,$tweak,$tmp 3280 vncipher $out0,$out0,v28 3281 vncipher $out1,$out1,v28 3282 vxor $in3,$twk3,v31 3283 vsrab $tmp,$tweak,$seven # next tweak value 3284 vxor $twk3,$tweak,$rndkey0 3285 vncipher $out2,$out2,v28 3286 vncipher $out3,$out3,v28 3287 vaddubm $tweak,$tweak,$tweak 3288 vsldoi $tmp,$tmp,$tmp,15 3289 vncipher $out4,$out4,v28 3290 vncipher $out5,$out5,v28 3291 lvx v24,$x00,$key_ # re-pre-load round[1] 3292 vand $tmp,$tmp,$eighty7 3293 3294 vncipher $out0,$out0,v29 3295 vncipher $out1,$out1,v29 3296 vxor $tweak,$tweak,$tmp 3297 vncipher $out2,$out2,v29 3298 vncipher $out3,$out3,v29 3299 vxor $in4,$twk4,v31 3300 vsrab $tmp,$tweak,$seven # next tweak value 3301 vxor $twk4,$tweak,$rndkey0 3302 vncipher $out4,$out4,v29 3303 vncipher $out5,$out5,v29 3304 lvx v25,$x10,$key_ # re-pre-load round[2] 3305 vaddubm $tweak,$tweak,$tweak 3306 vsldoi $tmp,$tmp,$tmp,15 3307 3308 vncipher $out0,$out0,v30 3309 vncipher $out1,$out1,v30 3310 vand $tmp,$tmp,$eighty7 3311 vncipher $out2,$out2,v30 3312 vncipher $out3,$out3,v30 3313 vxor $tweak,$tweak,$tmp 3314 vncipher $out4,$out4,v30 3315 vncipher $out5,$out5,v30 3316 vxor $in5,$twk5,v31 3317 vsrab $tmp,$tweak,$seven # next tweak value 3318 vxor $twk5,$tweak,$rndkey0 3319 3320 vncipherlast $out0,$out0,$in0 3321 lvx_u $in0,$x00,$inp # load next input block 3322 vaddubm $tweak,$tweak,$tweak 3323 vsldoi $tmp,$tmp,$tmp,15 3324 vncipherlast $out1,$out1,$in1 3325 lvx_u $in1,$x10,$inp 3326 vncipherlast $out2,$out2,$in2 3327 le?vperm $in0,$in0,$in0,$leperm 3328 lvx_u $in2,$x20,$inp 3329 vand $tmp,$tmp,$eighty7 3330 vncipherlast $out3,$out3,$in3 3331 le?vperm $in1,$in1,$in1,$leperm 3332 lvx_u $in3,$x30,$inp 3333 vncipherlast $out4,$out4,$in4 3334 le?vperm $in2,$in2,$in2,$leperm 3335 lvx_u $in4,$x40,$inp 3336 vxor $tweak,$tweak,$tmp 3337 vncipherlast $out5,$out5,$in5 3338 le?vperm $in3,$in3,$in3,$leperm 3339 lvx_u $in5,$x50,$inp 3340 addi $inp,$inp,0x60 3341 le?vperm $in4,$in4,$in4,$leperm 3342 le?vperm $in5,$in5,$in5,$leperm 3343 3344 le?vperm $out0,$out0,$out0,$leperm 3345 le?vperm $out1,$out1,$out1,$leperm 3346 stvx_u $out0,$x00,$out # store output 3347 vxor $out0,$in0,$twk0 3348 le?vperm $out2,$out2,$out2,$leperm 3349 stvx_u $out1,$x10,$out 3350 vxor $out1,$in1,$twk1 3351 le?vperm $out3,$out3,$out3,$leperm 3352 stvx_u $out2,$x20,$out 3353 vxor $out2,$in2,$twk2 3354 le?vperm $out4,$out4,$out4,$leperm 3355 stvx_u $out3,$x30,$out 3356 vxor $out3,$in3,$twk3 3357 le?vperm $out5,$out5,$out5,$leperm 3358 stvx_u $out4,$x40,$out 3359 vxor $out4,$in4,$twk4 3360 stvx_u $out5,$x50,$out 3361 vxor $out5,$in5,$twk5 3362 addi $out,$out,0x60 3363 3364 mtctr $rounds 3365 beq Loop_xts_dec6x # did $len-=96 borrow? 3366 3367 addic. $len,$len,0x60 3368 beq Lxts_dec6x_zero 3369 cmpwi $len,0x20 3370 blt Lxts_dec6x_one 3371 nop 3372 beq Lxts_dec6x_two 3373 cmpwi $len,0x40 3374 blt Lxts_dec6x_three 3375 nop 3376 beq Lxts_dec6x_four 3377 3378Lxts_dec6x_five: 3379 vxor $out0,$in1,$twk0 3380 vxor $out1,$in2,$twk1 3381 vxor $out2,$in3,$twk2 3382 vxor $out3,$in4,$twk3 3383 vxor $out4,$in5,$twk4 3384 3385 bl _aesp8_xts_dec5x 3386 3387 le?vperm $out0,$out0,$out0,$leperm 3388 vmr $twk0,$twk5 # unused tweak 3389 vxor $twk1,$tweak,$rndkey0 3390 le?vperm $out1,$out1,$out1,$leperm 3391 stvx_u $out0,$x00,$out # store output 3392 vxor $out0,$in0,$twk1 3393 le?vperm $out2,$out2,$out2,$leperm 3394 stvx_u $out1,$x10,$out 3395 le?vperm $out3,$out3,$out3,$leperm 3396 stvx_u $out2,$x20,$out 3397 le?vperm $out4,$out4,$out4,$leperm 3398 stvx_u $out3,$x30,$out 3399 stvx_u $out4,$x40,$out 3400 addi $out,$out,0x50 3401 bne Lxts_dec6x_steal 3402 b Lxts_dec6x_done 3403 3404.align 4 3405Lxts_dec6x_four: 3406 vxor $out0,$in2,$twk0 3407 vxor $out1,$in3,$twk1 3408 vxor $out2,$in4,$twk2 3409 vxor $out3,$in5,$twk3 3410 vxor $out4,$out4,$out4 3411 3412 bl _aesp8_xts_dec5x 3413 3414 le?vperm $out0,$out0,$out0,$leperm 3415 vmr $twk0,$twk4 # unused tweak 3416 vmr $twk1,$twk5 3417 le?vperm $out1,$out1,$out1,$leperm 3418 stvx_u $out0,$x00,$out # store output 3419 vxor $out0,$in0,$twk5 3420 le?vperm $out2,$out2,$out2,$leperm 3421 stvx_u $out1,$x10,$out 3422 le?vperm $out3,$out3,$out3,$leperm 3423 stvx_u $out2,$x20,$out 3424 stvx_u $out3,$x30,$out 3425 addi $out,$out,0x40 3426 bne Lxts_dec6x_steal 3427 b Lxts_dec6x_done 3428 3429.align 4 3430Lxts_dec6x_three: 3431 vxor $out0,$in3,$twk0 3432 vxor $out1,$in4,$twk1 3433 vxor $out2,$in5,$twk2 3434 vxor $out3,$out3,$out3 3435 vxor $out4,$out4,$out4 3436 3437 bl _aesp8_xts_dec5x 3438 3439 le?vperm $out0,$out0,$out0,$leperm 3440 vmr $twk0,$twk3 # unused tweak 3441 vmr $twk1,$twk4 3442 le?vperm $out1,$out1,$out1,$leperm 3443 stvx_u $out0,$x00,$out # store output 3444 vxor $out0,$in0,$twk4 3445 le?vperm $out2,$out2,$out2,$leperm 3446 stvx_u $out1,$x10,$out 3447 stvx_u $out2,$x20,$out 3448 addi $out,$out,0x30 3449 bne Lxts_dec6x_steal 3450 b Lxts_dec6x_done 3451 3452.align 4 3453Lxts_dec6x_two: 3454 vxor $out0,$in4,$twk0 3455 vxor $out1,$in5,$twk1 3456 vxor $out2,$out2,$out2 3457 vxor $out3,$out3,$out3 3458 vxor $out4,$out4,$out4 3459 3460 bl _aesp8_xts_dec5x 3461 3462 le?vperm $out0,$out0,$out0,$leperm 3463 vmr $twk0,$twk2 # unused tweak 3464 vmr $twk1,$twk3 3465 le?vperm $out1,$out1,$out1,$leperm 3466 stvx_u $out0,$x00,$out # store output 3467 vxor $out0,$in0,$twk3 3468 stvx_u $out1,$x10,$out 3469 addi $out,$out,0x20 3470 bne Lxts_dec6x_steal 3471 b Lxts_dec6x_done 3472 3473.align 4 3474Lxts_dec6x_one: 3475 vxor $out0,$in5,$twk0 3476 nop 3477Loop_xts_dec1x: 3478 vncipher $out0,$out0,v24 3479 lvx v24,$x20,$key_ # round[3] 3480 addi $key_,$key_,0x20 3481 3482 vncipher $out0,$out0,v25 3483 lvx v25,$x10,$key_ # round[4] 3484 bdnz Loop_xts_dec1x 3485 3486 subi r0,$taillen,1 3487 vncipher $out0,$out0,v24 3488 3489 andi. r0,r0,16 3490 cmpwi $taillen,0 3491 vncipher $out0,$out0,v25 3492 3493 sub $inp,$inp,r0 3494 vncipher $out0,$out0,v26 3495 3496 lvx_u $in0,0,$inp 3497 vncipher $out0,$out0,v27 3498 3499 addi $key_,$sp,$FRAME+15 # rewind $key_ 3500 vncipher $out0,$out0,v28 3501 lvx v24,$x00,$key_ # re-pre-load round[1] 3502 3503 vncipher $out0,$out0,v29 3504 lvx v25,$x10,$key_ # re-pre-load round[2] 3505 vxor $twk0,$twk0,v31 3506 3507 le?vperm $in0,$in0,$in0,$leperm 3508 vncipher $out0,$out0,v30 3509 3510 mtctr $rounds 3511 vncipherlast $out0,$out0,$twk0 3512 3513 vmr $twk0,$twk1 # unused tweak 3514 vmr $twk1,$twk2 3515 le?vperm $out0,$out0,$out0,$leperm 3516 stvx_u $out0,$x00,$out # store output 3517 addi $out,$out,0x10 3518 vxor $out0,$in0,$twk2 3519 bne Lxts_dec6x_steal 3520 b Lxts_dec6x_done 3521 3522.align 4 3523Lxts_dec6x_zero: 3524 cmpwi $taillen,0 3525 beq Lxts_dec6x_done 3526 3527 lvx_u $in0,0,$inp 3528 le?vperm $in0,$in0,$in0,$leperm 3529 vxor $out0,$in0,$twk1 3530Lxts_dec6x_steal: 3531 vncipher $out0,$out0,v24 3532 lvx v24,$x20,$key_ # round[3] 3533 addi $key_,$key_,0x20 3534 3535 vncipher $out0,$out0,v25 3536 lvx v25,$x10,$key_ # round[4] 3537 bdnz Lxts_dec6x_steal 3538 3539 add $inp,$inp,$taillen 3540 vncipher $out0,$out0,v24 3541 3542 cmpwi $taillen,0 3543 vncipher $out0,$out0,v25 3544 3545 lvx_u $in0,0,$inp 3546 vncipher $out0,$out0,v26 3547 3548 lvsr $inpperm,0,$taillen # $in5 is no more 3549 vncipher $out0,$out0,v27 3550 3551 addi $key_,$sp,$FRAME+15 # rewind $key_ 3552 vncipher $out0,$out0,v28 3553 lvx v24,$x00,$key_ # re-pre-load round[1] 3554 3555 vncipher $out0,$out0,v29 3556 lvx v25,$x10,$key_ # re-pre-load round[2] 3557 vxor $twk1,$twk1,v31 3558 3559 le?vperm $in0,$in0,$in0,$leperm 3560 vncipher $out0,$out0,v30 3561 3562 vperm $in0,$in0,$in0,$inpperm 3563 vncipherlast $tmp,$out0,$twk1 3564 3565 le?vperm $out0,$tmp,$tmp,$leperm 3566 le?stvx_u $out0,0,$out 3567 be?stvx_u $tmp,0,$out 3568 3569 vxor $out0,$out0,$out0 3570 vspltisb $out1,-1 3571 vperm $out0,$out0,$out1,$inpperm 3572 vsel $out0,$in0,$tmp,$out0 3573 vxor $out0,$out0,$twk0 3574 3575 subi r30,$out,1 3576 mtctr $taillen 3577Loop_xts_dec6x_steal: 3578 lbzu r0,1(r30) 3579 stb r0,16(r30) 3580 bdnz Loop_xts_dec6x_steal 3581 3582 li $taillen,0 3583 mtctr $rounds 3584 b Loop_xts_dec1x # one more time... 3585 3586.align 4 3587Lxts_dec6x_done: 3588 ${UCMP}i $ivp,0 3589 beq Lxts_dec6x_ret 3590 3591 vxor $tweak,$twk0,$rndkey0 3592 le?vperm $tweak,$tweak,$tweak,$leperm 3593 stvx_u $tweak,0,$ivp 3594 3595Lxts_dec6x_ret: 3596 mtlr r11 3597 li r10,`$FRAME+15` 3598 li r11,`$FRAME+31` 3599 stvx $seven,r10,$sp # wipe copies of round keys 3600 addi r10,r10,32 3601 stvx $seven,r11,$sp 3602 addi r11,r11,32 3603 stvx $seven,r10,$sp 3604 addi r10,r10,32 3605 stvx $seven,r11,$sp 3606 addi r11,r11,32 3607 stvx $seven,r10,$sp 3608 addi r10,r10,32 3609 stvx $seven,r11,$sp 3610 addi r11,r11,32 3611 stvx $seven,r10,$sp 3612 addi r10,r10,32 3613 stvx $seven,r11,$sp 3614 addi r11,r11,32 3615 3616 mtspr 256,$vrsave 3617 lvx v20,r10,$sp # ABI says so 3618 addi r10,r10,32 3619 lvx v21,r11,$sp 3620 addi r11,r11,32 3621 lvx v22,r10,$sp 3622 addi r10,r10,32 3623 lvx v23,r11,$sp 3624 addi r11,r11,32 3625 lvx v24,r10,$sp 3626 addi r10,r10,32 3627 lvx v25,r11,$sp 3628 addi r11,r11,32 3629 lvx v26,r10,$sp 3630 addi r10,r10,32 3631 lvx v27,r11,$sp 3632 addi r11,r11,32 3633 lvx v28,r10,$sp 3634 addi r10,r10,32 3635 lvx v29,r11,$sp 3636 addi r11,r11,32 3637 lvx v30,r10,$sp 3638 lvx v31,r11,$sp 3639 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3640 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3641 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3642 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3643 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3644 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3645 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3646 blr 3647 .long 0 3648 .byte 0,12,0x04,1,0x80,6,6,0 3649 .long 0 3650 3651.align 5 3652_aesp8_xts_dec5x: 3653 vncipher $out0,$out0,v24 3654 vncipher $out1,$out1,v24 3655 vncipher $out2,$out2,v24 3656 vncipher $out3,$out3,v24 3657 vncipher $out4,$out4,v24 3658 lvx v24,$x20,$key_ # round[3] 3659 addi $key_,$key_,0x20 3660 3661 vncipher $out0,$out0,v25 3662 vncipher $out1,$out1,v25 3663 vncipher $out2,$out2,v25 3664 vncipher $out3,$out3,v25 3665 vncipher $out4,$out4,v25 3666 lvx v25,$x10,$key_ # round[4] 3667 bdnz _aesp8_xts_dec5x 3668 3669 subi r0,$taillen,1 3670 vncipher $out0,$out0,v24 3671 vncipher $out1,$out1,v24 3672 vncipher $out2,$out2,v24 3673 vncipher $out3,$out3,v24 3674 vncipher $out4,$out4,v24 3675 3676 andi. r0,r0,16 3677 cmpwi $taillen,0 3678 vncipher $out0,$out0,v25 3679 vncipher $out1,$out1,v25 3680 vncipher $out2,$out2,v25 3681 vncipher $out3,$out3,v25 3682 vncipher $out4,$out4,v25 3683 vxor $twk0,$twk0,v31 3684 3685 sub $inp,$inp,r0 3686 vncipher $out0,$out0,v26 3687 vncipher $out1,$out1,v26 3688 vncipher $out2,$out2,v26 3689 vncipher $out3,$out3,v26 3690 vncipher $out4,$out4,v26 3691 vxor $in1,$twk1,v31 3692 3693 vncipher $out0,$out0,v27 3694 lvx_u $in0,0,$inp 3695 vncipher $out1,$out1,v27 3696 vncipher $out2,$out2,v27 3697 vncipher $out3,$out3,v27 3698 vncipher $out4,$out4,v27 3699 vxor $in2,$twk2,v31 3700 3701 addi $key_,$sp,$FRAME+15 # rewind $key_ 3702 vncipher $out0,$out0,v28 3703 vncipher $out1,$out1,v28 3704 vncipher $out2,$out2,v28 3705 vncipher $out3,$out3,v28 3706 vncipher $out4,$out4,v28 3707 lvx v24,$x00,$key_ # re-pre-load round[1] 3708 vxor $in3,$twk3,v31 3709 3710 vncipher $out0,$out0,v29 3711 le?vperm $in0,$in0,$in0,$leperm 3712 vncipher $out1,$out1,v29 3713 vncipher $out2,$out2,v29 3714 vncipher $out3,$out3,v29 3715 vncipher $out4,$out4,v29 3716 lvx v25,$x10,$key_ # re-pre-load round[2] 3717 vxor $in4,$twk4,v31 3718 3719 vncipher $out0,$out0,v30 3720 vncipher $out1,$out1,v30 3721 vncipher $out2,$out2,v30 3722 vncipher $out3,$out3,v30 3723 vncipher $out4,$out4,v30 3724 3725 vncipherlast $out0,$out0,$twk0 3726 vncipherlast $out1,$out1,$in1 3727 vncipherlast $out2,$out2,$in2 3728 vncipherlast $out3,$out3,$in3 3729 vncipherlast $out4,$out4,$in4 3730 mtctr $rounds 3731 blr 3732 .long 0 3733 .byte 0,12,0x14,0,0,0,0,0 3734___ 3735}} }}} 3736 3737my $consts=1; 3738foreach(split("\n",$code)) { 3739 s/\`([^\`]*)\`/eval($1)/geo; 3740 3741 # constants table endian-specific conversion 3742 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 3743 my $conv=$3; 3744 my @bytes=(); 3745 3746 # convert to endian-agnostic format 3747 if ($1 eq "long") { 3748 foreach (split(/,\s*/,$2)) { 3749 my $l = /^0/?oct:int; 3750 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 3751 } 3752 } else { 3753 @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 3754 } 3755 3756 # little-endian conversion 3757 if ($flavour =~ /le$/o) { 3758 SWITCH: for($conv) { 3759 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 3760 /\?rev/ && do { @bytes=reverse(@bytes); last; }; 3761 } 3762 } 3763 3764 #emit 3765 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 3766 next; 3767 } 3768 $consts=0 if (m/Lconsts:/o); # end of table 3769 3770 # instructions prefixed with '?' are endian-specific and need 3771 # to be adjusted accordingly... 3772 if ($flavour =~ /le$/o) { # little-endian 3773 s/le\?//o or 3774 s/be\?/#be#/o or 3775 s/\?lvsr/lvsl/o or 3776 s/\?lvsl/lvsr/o or 3777 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 3778 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 3779 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 3780 } else { # big-endian 3781 s/le\?/#le#/o or 3782 s/be\?//o or 3783 s/\?([a-z]+)/$1/o; 3784 } 3785 3786 print $_,"\n"; 3787} 3788 3789close STDOUT; 3790