1#!/usr/bin/env perl 2# 3# ==================================================================== 4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 5# project. The module is, however, dual licensed under OpenSSL and 6# CRYPTOGAMS licenses depending on where you obtain it. For further 7# details see http://www.openssl.org/~appro/cryptogams/. 8# ==================================================================== 9# 10# This module implements support for AES instructions as per PowerISA 11# specification version 2.07, first implemented by POWER8 processor. 12# The module is endian-agnostic in sense that it supports both big- 13# and little-endian cases. Data alignment in parallelizable modes is 14# handled with VSX loads and stores, which implies MSR.VSX flag being 15# set. It should also be noted that ISA specification doesn't prohibit 16# alignment exceptions for these instructions on page boundaries. 17# Initially alignment was handled in pure AltiVec/VMX way [when data 18# is aligned programmatically, which in turn guarantees exception- 19# free execution], but it turned to hamper performance when vcipher 20# instructions are interleaved. It's reckoned that eventual 21# misalignment penalties at page boundaries are in average lower 22# than additional overhead in pure AltiVec approach. 23 24$flavour = shift; 25 26if ($flavour =~ /64/) { 27 $SIZE_T =8; 28 $LRSAVE =2*$SIZE_T; 29 $STU ="stdu"; 30 $POP ="ld"; 31 $PUSH ="std"; 32 $UCMP ="cmpld"; 33 $SHL ="sldi"; 34} elsif ($flavour =~ /32/) { 35 $SIZE_T =4; 36 $LRSAVE =$SIZE_T; 37 $STU ="stwu"; 38 $POP ="lwz"; 39 $PUSH ="stw"; 40 $UCMP ="cmplw"; 41 $SHL ="slwi"; 42} else { die "nonsense $flavour"; } 43 44$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 45 46$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 47( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 48( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 49die "can't locate ppc-xlate.pl"; 50 51open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 52 53$FRAME=8*$SIZE_T; 54$prefix="aes_p8"; 55 56$sp="r1"; 57$vrsave="r12"; 58 59######################################################################### 60{{{ # Key setup procedures # 61my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 62my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 63my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 64 65$code.=<<___; 66.machine "any" 67 68.text 69 70.align 7 71rcon: 72.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 73.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 74.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 75.long 0,0,0,0 ?asis 76Lconsts: 77 mflr r0 78 bcl 20,31,\$+4 79 mflr $ptr #vvvvv "distance between . and rcon 80 addi $ptr,$ptr,-0x48 81 mtlr r0 82 blr 83 .long 0 84 .byte 0,12,0x14,0,0,0,0,0 85.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 86 87.globl .${prefix}_set_encrypt_key 88Lset_encrypt_key: 89 mflr r11 90 $PUSH r11,$LRSAVE($sp) 91 92 li $ptr,-1 93 ${UCMP}i $inp,0 94 beq- Lenc_key_abort # if ($inp==0) return -1; 95 ${UCMP}i $out,0 96 beq- Lenc_key_abort # if ($out==0) return -1; 97 li $ptr,-2 98 cmpwi $bits,128 99 blt- Lenc_key_abort 100 cmpwi $bits,256 101 bgt- Lenc_key_abort 102 andi. r0,$bits,0x3f 103 bne- Lenc_key_abort 104 105 lis r0,0xfff0 106 mfspr $vrsave,256 107 mtspr 256,r0 108 109 bl Lconsts 110 mtlr r11 111 112 neg r9,$inp 113 lvx $in0,0,$inp 114 addi $inp,$inp,15 # 15 is not typo 115 lvsr $key,0,r9 # borrow $key 116 li r8,0x20 117 cmpwi $bits,192 118 lvx $in1,0,$inp 119 le?vspltisb $mask,0x0f # borrow $mask 120 lvx $rcon,0,$ptr 121 le?vxor $key,$key,$mask # adjust for byte swap 122 lvx $mask,r8,$ptr 123 addi $ptr,$ptr,0x10 124 vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 125 li $cnt,8 126 vxor $zero,$zero,$zero 127 mtctr $cnt 128 129 ?lvsr $outperm,0,$out 130 vspltisb $outmask,-1 131 lvx $outhead,0,$out 132 ?vperm $outmask,$zero,$outmask,$outperm 133 134 blt Loop128 135 addi $inp,$inp,8 136 beq L192 137 addi $inp,$inp,8 138 b L256 139 140.align 4 141Loop128: 142 vperm $key,$in0,$in0,$mask # rotate-n-splat 143 vsldoi $tmp,$zero,$in0,12 # >>32 144 vperm $outtail,$in0,$in0,$outperm # rotate 145 vsel $stage,$outhead,$outtail,$outmask 146 vmr $outhead,$outtail 147 vcipherlast $key,$key,$rcon 148 stvx $stage,0,$out 149 addi $out,$out,16 150 151 vxor $in0,$in0,$tmp 152 vsldoi $tmp,$zero,$tmp,12 # >>32 153 vxor $in0,$in0,$tmp 154 vsldoi $tmp,$zero,$tmp,12 # >>32 155 vxor $in0,$in0,$tmp 156 vadduwm $rcon,$rcon,$rcon 157 vxor $in0,$in0,$key 158 bdnz Loop128 159 160 lvx $rcon,0,$ptr # last two round keys 161 162 vperm $key,$in0,$in0,$mask # rotate-n-splat 163 vsldoi $tmp,$zero,$in0,12 # >>32 164 vperm $outtail,$in0,$in0,$outperm # rotate 165 vsel $stage,$outhead,$outtail,$outmask 166 vmr $outhead,$outtail 167 vcipherlast $key,$key,$rcon 168 stvx $stage,0,$out 169 addi $out,$out,16 170 171 vxor $in0,$in0,$tmp 172 vsldoi $tmp,$zero,$tmp,12 # >>32 173 vxor $in0,$in0,$tmp 174 vsldoi $tmp,$zero,$tmp,12 # >>32 175 vxor $in0,$in0,$tmp 176 vadduwm $rcon,$rcon,$rcon 177 vxor $in0,$in0,$key 178 179 vperm $key,$in0,$in0,$mask # rotate-n-splat 180 vsldoi $tmp,$zero,$in0,12 # >>32 181 vperm $outtail,$in0,$in0,$outperm # rotate 182 vsel $stage,$outhead,$outtail,$outmask 183 vmr $outhead,$outtail 184 vcipherlast $key,$key,$rcon 185 stvx $stage,0,$out 186 addi $out,$out,16 187 188 vxor $in0,$in0,$tmp 189 vsldoi $tmp,$zero,$tmp,12 # >>32 190 vxor $in0,$in0,$tmp 191 vsldoi $tmp,$zero,$tmp,12 # >>32 192 vxor $in0,$in0,$tmp 193 vxor $in0,$in0,$key 194 vperm $outtail,$in0,$in0,$outperm # rotate 195 vsel $stage,$outhead,$outtail,$outmask 196 vmr $outhead,$outtail 197 stvx $stage,0,$out 198 199 addi $inp,$out,15 # 15 is not typo 200 addi $out,$out,0x50 201 202 li $rounds,10 203 b Ldone 204 205.align 4 206L192: 207 lvx $tmp,0,$inp 208 li $cnt,4 209 vperm $outtail,$in0,$in0,$outperm # rotate 210 vsel $stage,$outhead,$outtail,$outmask 211 vmr $outhead,$outtail 212 stvx $stage,0,$out 213 addi $out,$out,16 214 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 215 vspltisb $key,8 # borrow $key 216 mtctr $cnt 217 vsububm $mask,$mask,$key # adjust the mask 218 219Loop192: 220 vperm $key,$in1,$in1,$mask # roate-n-splat 221 vsldoi $tmp,$zero,$in0,12 # >>32 222 vcipherlast $key,$key,$rcon 223 224 vxor $in0,$in0,$tmp 225 vsldoi $tmp,$zero,$tmp,12 # >>32 226 vxor $in0,$in0,$tmp 227 vsldoi $tmp,$zero,$tmp,12 # >>32 228 vxor $in0,$in0,$tmp 229 230 vsldoi $stage,$zero,$in1,8 231 vspltw $tmp,$in0,3 232 vxor $tmp,$tmp,$in1 233 vsldoi $in1,$zero,$in1,12 # >>32 234 vadduwm $rcon,$rcon,$rcon 235 vxor $in1,$in1,$tmp 236 vxor $in0,$in0,$key 237 vxor $in1,$in1,$key 238 vsldoi $stage,$stage,$in0,8 239 240 vperm $key,$in1,$in1,$mask # rotate-n-splat 241 vsldoi $tmp,$zero,$in0,12 # >>32 242 vperm $outtail,$stage,$stage,$outperm # rotate 243 vsel $stage,$outhead,$outtail,$outmask 244 vmr $outhead,$outtail 245 vcipherlast $key,$key,$rcon 246 stvx $stage,0,$out 247 addi $out,$out,16 248 249 vsldoi $stage,$in0,$in1,8 250 vxor $in0,$in0,$tmp 251 vsldoi $tmp,$zero,$tmp,12 # >>32 252 vperm $outtail,$stage,$stage,$outperm # rotate 253 vsel $stage,$outhead,$outtail,$outmask 254 vmr $outhead,$outtail 255 vxor $in0,$in0,$tmp 256 vsldoi $tmp,$zero,$tmp,12 # >>32 257 vxor $in0,$in0,$tmp 258 stvx $stage,0,$out 259 addi $out,$out,16 260 261 vspltw $tmp,$in0,3 262 vxor $tmp,$tmp,$in1 263 vsldoi $in1,$zero,$in1,12 # >>32 264 vadduwm $rcon,$rcon,$rcon 265 vxor $in1,$in1,$tmp 266 vxor $in0,$in0,$key 267 vxor $in1,$in1,$key 268 vperm $outtail,$in0,$in0,$outperm # rotate 269 vsel $stage,$outhead,$outtail,$outmask 270 vmr $outhead,$outtail 271 stvx $stage,0,$out 272 addi $inp,$out,15 # 15 is not typo 273 addi $out,$out,16 274 bdnz Loop192 275 276 li $rounds,12 277 addi $out,$out,0x20 278 b Ldone 279 280.align 4 281L256: 282 lvx $tmp,0,$inp 283 li $cnt,7 284 li $rounds,14 285 vperm $outtail,$in0,$in0,$outperm # rotate 286 vsel $stage,$outhead,$outtail,$outmask 287 vmr $outhead,$outtail 288 stvx $stage,0,$out 289 addi $out,$out,16 290 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 291 mtctr $cnt 292 293Loop256: 294 vperm $key,$in1,$in1,$mask # rotate-n-splat 295 vsldoi $tmp,$zero,$in0,12 # >>32 296 vperm $outtail,$in1,$in1,$outperm # rotate 297 vsel $stage,$outhead,$outtail,$outmask 298 vmr $outhead,$outtail 299 vcipherlast $key,$key,$rcon 300 stvx $stage,0,$out 301 addi $out,$out,16 302 303 vxor $in0,$in0,$tmp 304 vsldoi $tmp,$zero,$tmp,12 # >>32 305 vxor $in0,$in0,$tmp 306 vsldoi $tmp,$zero,$tmp,12 # >>32 307 vxor $in0,$in0,$tmp 308 vadduwm $rcon,$rcon,$rcon 309 vxor $in0,$in0,$key 310 vperm $outtail,$in0,$in0,$outperm # rotate 311 vsel $stage,$outhead,$outtail,$outmask 312 vmr $outhead,$outtail 313 stvx $stage,0,$out 314 addi $inp,$out,15 # 15 is not typo 315 addi $out,$out,16 316 bdz Ldone 317 318 vspltw $key,$in0,3 # just splat 319 vsldoi $tmp,$zero,$in1,12 # >>32 320 vsbox $key,$key 321 322 vxor $in1,$in1,$tmp 323 vsldoi $tmp,$zero,$tmp,12 # >>32 324 vxor $in1,$in1,$tmp 325 vsldoi $tmp,$zero,$tmp,12 # >>32 326 vxor $in1,$in1,$tmp 327 328 vxor $in1,$in1,$key 329 b Loop256 330 331.align 4 332Ldone: 333 lvx $in1,0,$inp # redundant in aligned case 334 vsel $in1,$outhead,$in1,$outmask 335 stvx $in1,0,$inp 336 li $ptr,0 337 mtspr 256,$vrsave 338 stw $rounds,0($out) 339 340Lenc_key_abort: 341 mr r3,$ptr 342 blr 343 .long 0 344 .byte 0,12,0x14,1,0,0,3,0 345 .long 0 346.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 347 348.globl .${prefix}_set_decrypt_key 349 $STU $sp,-$FRAME($sp) 350 mflr r10 351 $PUSH r10,$FRAME+$LRSAVE($sp) 352 bl Lset_encrypt_key 353 mtlr r10 354 355 cmpwi r3,0 356 bne- Ldec_key_abort 357 358 slwi $cnt,$rounds,4 359 subi $inp,$out,240 # first round key 360 srwi $rounds,$rounds,1 361 add $out,$inp,$cnt # last round key 362 mtctr $rounds 363 364Ldeckey: 365 lwz r0, 0($inp) 366 lwz r6, 4($inp) 367 lwz r7, 8($inp) 368 lwz r8, 12($inp) 369 addi $inp,$inp,16 370 lwz r9, 0($out) 371 lwz r10,4($out) 372 lwz r11,8($out) 373 lwz r12,12($out) 374 stw r0, 0($out) 375 stw r6, 4($out) 376 stw r7, 8($out) 377 stw r8, 12($out) 378 subi $out,$out,16 379 stw r9, -16($inp) 380 stw r10,-12($inp) 381 stw r11,-8($inp) 382 stw r12,-4($inp) 383 bdnz Ldeckey 384 385 xor r3,r3,r3 # return value 386Ldec_key_abort: 387 addi $sp,$sp,$FRAME 388 blr 389 .long 0 390 .byte 0,12,4,1,0x80,0,3,0 391 .long 0 392.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 393___ 394}}} 395######################################################################### 396{{{ # Single block en- and decrypt procedures # 397sub gen_block () { 398my $dir = shift; 399my $n = $dir eq "de" ? "n" : ""; 400my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 401 402$code.=<<___; 403.globl .${prefix}_${dir}crypt 404 lwz $rounds,240($key) 405 lis r0,0xfc00 406 mfspr $vrsave,256 407 li $idx,15 # 15 is not typo 408 mtspr 256,r0 409 410 lvx v0,0,$inp 411 neg r11,$out 412 lvx v1,$idx,$inp 413 lvsl v2,0,$inp # inpperm 414 le?vspltisb v4,0x0f 415 ?lvsl v3,0,r11 # outperm 416 le?vxor v2,v2,v4 417 li $idx,16 418 vperm v0,v0,v1,v2 # align [and byte swap in LE] 419 lvx v1,0,$key 420 ?lvsl v5,0,$key # keyperm 421 srwi $rounds,$rounds,1 422 lvx v2,$idx,$key 423 addi $idx,$idx,16 424 subi $rounds,$rounds,1 425 ?vperm v1,v1,v2,v5 # align round key 426 427 vxor v0,v0,v1 428 lvx v1,$idx,$key 429 addi $idx,$idx,16 430 mtctr $rounds 431 432Loop_${dir}c: 433 ?vperm v2,v2,v1,v5 434 v${n}cipher v0,v0,v2 435 lvx v2,$idx,$key 436 addi $idx,$idx,16 437 ?vperm v1,v1,v2,v5 438 v${n}cipher v0,v0,v1 439 lvx v1,$idx,$key 440 addi $idx,$idx,16 441 bdnz Loop_${dir}c 442 443 ?vperm v2,v2,v1,v5 444 v${n}cipher v0,v0,v2 445 lvx v2,$idx,$key 446 ?vperm v1,v1,v2,v5 447 v${n}cipherlast v0,v0,v1 448 449 vspltisb v2,-1 450 vxor v1,v1,v1 451 li $idx,15 # 15 is not typo 452 ?vperm v2,v1,v2,v3 # outmask 453 le?vxor v3,v3,v4 454 lvx v1,0,$out # outhead 455 vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 456 vsel v1,v1,v0,v2 457 lvx v4,$idx,$out 458 stvx v1,0,$out 459 vsel v0,v0,v4,v2 460 stvx v0,$idx,$out 461 462 mtspr 256,$vrsave 463 blr 464 .long 0 465 .byte 0,12,0x14,0,0,0,3,0 466 .long 0 467.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 468___ 469} 470&gen_block("en"); 471&gen_block("de"); 472}}} 473######################################################################### 474{{{ # CBC en- and decrypt procedures # 475my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); 476my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 477my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= 478 map("v$_",(4..10)); 479$code.=<<___; 480.globl .${prefix}_cbc_encrypt 481 ${UCMP}i $len,16 482 bltlr- 483 484 cmpwi $enc,0 # test direction 485 lis r0,0xffe0 486 mfspr $vrsave,256 487 mtspr 256,r0 488 489 li $idx,15 490 vxor $rndkey0,$rndkey0,$rndkey0 491 le?vspltisb $tmp,0x0f 492 493 lvx $ivec,0,$ivp # load [unaligned] iv 494 lvsl $inpperm,0,$ivp 495 lvx $inptail,$idx,$ivp 496 le?vxor $inpperm,$inpperm,$tmp 497 vperm $ivec,$ivec,$inptail,$inpperm 498 499 neg r11,$inp 500 ?lvsl $keyperm,0,$key # prepare for unaligned key 501 lwz $rounds,240($key) 502 503 lvsr $inpperm,0,r11 # prepare for unaligned load 504 lvx $inptail,0,$inp 505 addi $inp,$inp,15 # 15 is not typo 506 le?vxor $inpperm,$inpperm,$tmp 507 508 ?lvsr $outperm,0,$out # prepare for unaligned store 509 vspltisb $outmask,-1 510 lvx $outhead,0,$out 511 ?vperm $outmask,$rndkey0,$outmask,$outperm 512 le?vxor $outperm,$outperm,$tmp 513 514 srwi $rounds,$rounds,1 515 li $idx,16 516 subi $rounds,$rounds,1 517 beq Lcbc_dec 518 519Lcbc_enc: 520 vmr $inout,$inptail 521 lvx $inptail,0,$inp 522 addi $inp,$inp,16 523 mtctr $rounds 524 subi $len,$len,16 # len-=16 525 526 lvx $rndkey0,0,$key 527 vperm $inout,$inout,$inptail,$inpperm 528 lvx $rndkey1,$idx,$key 529 addi $idx,$idx,16 530 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 531 vxor $inout,$inout,$rndkey0 532 lvx $rndkey0,$idx,$key 533 addi $idx,$idx,16 534 vxor $inout,$inout,$ivec 535 536Loop_cbc_enc: 537 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 538 vcipher $inout,$inout,$rndkey1 539 lvx $rndkey1,$idx,$key 540 addi $idx,$idx,16 541 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 542 vcipher $inout,$inout,$rndkey0 543 lvx $rndkey0,$idx,$key 544 addi $idx,$idx,16 545 bdnz Loop_cbc_enc 546 547 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 548 vcipher $inout,$inout,$rndkey1 549 lvx $rndkey1,$idx,$key 550 li $idx,16 551 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 552 vcipherlast $ivec,$inout,$rndkey0 553 ${UCMP}i $len,16 554 555 vperm $tmp,$ivec,$ivec,$outperm 556 vsel $inout,$outhead,$tmp,$outmask 557 vmr $outhead,$tmp 558 stvx $inout,0,$out 559 addi $out,$out,16 560 bge Lcbc_enc 561 562 b Lcbc_done 563 564.align 4 565Lcbc_dec: 566 ${UCMP}i $len,128 567 bge _aesp8_cbc_decrypt8x 568 vmr $tmp,$inptail 569 lvx $inptail,0,$inp 570 addi $inp,$inp,16 571 mtctr $rounds 572 subi $len,$len,16 # len-=16 573 574 lvx $rndkey0,0,$key 575 vperm $tmp,$tmp,$inptail,$inpperm 576 lvx $rndkey1,$idx,$key 577 addi $idx,$idx,16 578 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 579 vxor $inout,$tmp,$rndkey0 580 lvx $rndkey0,$idx,$key 581 addi $idx,$idx,16 582 583Loop_cbc_dec: 584 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 585 vncipher $inout,$inout,$rndkey1 586 lvx $rndkey1,$idx,$key 587 addi $idx,$idx,16 588 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 589 vncipher $inout,$inout,$rndkey0 590 lvx $rndkey0,$idx,$key 591 addi $idx,$idx,16 592 bdnz Loop_cbc_dec 593 594 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 595 vncipher $inout,$inout,$rndkey1 596 lvx $rndkey1,$idx,$key 597 li $idx,16 598 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 599 vncipherlast $inout,$inout,$rndkey0 600 ${UCMP}i $len,16 601 602 vxor $inout,$inout,$ivec 603 vmr $ivec,$tmp 604 vperm $tmp,$inout,$inout,$outperm 605 vsel $inout,$outhead,$tmp,$outmask 606 vmr $outhead,$tmp 607 stvx $inout,0,$out 608 addi $out,$out,16 609 bge Lcbc_dec 610 611Lcbc_done: 612 addi $out,$out,-1 613 lvx $inout,0,$out # redundant in aligned case 614 vsel $inout,$outhead,$inout,$outmask 615 stvx $inout,0,$out 616 617 neg $enc,$ivp # write [unaligned] iv 618 li $idx,15 # 15 is not typo 619 vxor $rndkey0,$rndkey0,$rndkey0 620 vspltisb $outmask,-1 621 le?vspltisb $tmp,0x0f 622 ?lvsl $outperm,0,$enc 623 ?vperm $outmask,$rndkey0,$outmask,$outperm 624 le?vxor $outperm,$outperm,$tmp 625 lvx $outhead,0,$ivp 626 vperm $ivec,$ivec,$ivec,$outperm 627 vsel $inout,$outhead,$ivec,$outmask 628 lvx $inptail,$idx,$ivp 629 stvx $inout,0,$ivp 630 vsel $inout,$ivec,$inptail,$outmask 631 stvx $inout,$idx,$ivp 632 633 mtspr 256,$vrsave 634 blr 635 .long 0 636 .byte 0,12,0x14,0,0,0,6,0 637 .long 0 638___ 639######################################################################### 640{{ # Optimized CBC decrypt procedure # 641my $key_="r11"; 642my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 643my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); 644my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); 645my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 646 # v26-v31 last 6 round keys 647my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 648 649$code.=<<___; 650.align 5 651_aesp8_cbc_decrypt8x: 652 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 653 li r10,`$FRAME+8*16+15` 654 li r11,`$FRAME+8*16+31` 655 stvx v20,r10,$sp # ABI says so 656 addi r10,r10,32 657 stvx v21,r11,$sp 658 addi r11,r11,32 659 stvx v22,r10,$sp 660 addi r10,r10,32 661 stvx v23,r11,$sp 662 addi r11,r11,32 663 stvx v24,r10,$sp 664 addi r10,r10,32 665 stvx v25,r11,$sp 666 addi r11,r11,32 667 stvx v26,r10,$sp 668 addi r10,r10,32 669 stvx v27,r11,$sp 670 addi r11,r11,32 671 stvx v28,r10,$sp 672 addi r10,r10,32 673 stvx v29,r11,$sp 674 addi r11,r11,32 675 stvx v30,r10,$sp 676 stvx v31,r11,$sp 677 li r0,-1 678 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 679 li $x10,0x10 680 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 681 li $x20,0x20 682 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 683 li $x30,0x30 684 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 685 li $x40,0x40 686 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 687 li $x50,0x50 688 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 689 li $x60,0x60 690 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 691 li $x70,0x70 692 mtspr 256,r0 693 694 subi $rounds,$rounds,3 # -4 in total 695 subi $len,$len,128 # bias 696 697 lvx $rndkey0,$x00,$key # load key schedule 698 lvx v30,$x10,$key 699 addi $key,$key,0x20 700 lvx v31,$x00,$key 701 ?vperm $rndkey0,$rndkey0,v30,$keyperm 702 addi $key_,$sp,$FRAME+15 703 mtctr $rounds 704 705Load_cbc_dec_key: 706 ?vperm v24,v30,v31,$keyperm 707 lvx v30,$x10,$key 708 addi $key,$key,0x20 709 stvx v24,$x00,$key_ # off-load round[1] 710 ?vperm v25,v31,v30,$keyperm 711 lvx v31,$x00,$key 712 stvx v25,$x10,$key_ # off-load round[2] 713 addi $key_,$key_,0x20 714 bdnz Load_cbc_dec_key 715 716 lvx v26,$x10,$key 717 ?vperm v24,v30,v31,$keyperm 718 lvx v27,$x20,$key 719 stvx v24,$x00,$key_ # off-load round[3] 720 ?vperm v25,v31,v26,$keyperm 721 lvx v28,$x30,$key 722 stvx v25,$x10,$key_ # off-load round[4] 723 addi $key_,$sp,$FRAME+15 # rewind $key_ 724 ?vperm v26,v26,v27,$keyperm 725 lvx v29,$x40,$key 726 ?vperm v27,v27,v28,$keyperm 727 lvx v30,$x50,$key 728 ?vperm v28,v28,v29,$keyperm 729 lvx v31,$x60,$key 730 ?vperm v29,v29,v30,$keyperm 731 lvx $out0,$x70,$key # borrow $out0 732 ?vperm v30,v30,v31,$keyperm 733 lvx v24,$x00,$key_ # pre-load round[1] 734 ?vperm v31,v31,$out0,$keyperm 735 lvx v25,$x10,$key_ # pre-load round[2] 736 737 #lvx $inptail,0,$inp # "caller" already did this 738 #addi $inp,$inp,15 # 15 is not typo 739 subi $inp,$inp,15 # undo "caller" 740 741 le?li $idx,8 742 lvx_u $in0,$x00,$inp # load first 8 "words" 743 le?lvsl $inpperm,0,$idx 744 le?vspltisb $tmp,0x0f 745 lvx_u $in1,$x10,$inp 746 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 747 lvx_u $in2,$x20,$inp 748 le?vperm $in0,$in0,$in0,$inpperm 749 lvx_u $in3,$x30,$inp 750 le?vperm $in1,$in1,$in1,$inpperm 751 lvx_u $in4,$x40,$inp 752 le?vperm $in2,$in2,$in2,$inpperm 753 vxor $out0,$in0,$rndkey0 754 lvx_u $in5,$x50,$inp 755 le?vperm $in3,$in3,$in3,$inpperm 756 vxor $out1,$in1,$rndkey0 757 lvx_u $in6,$x60,$inp 758 le?vperm $in4,$in4,$in4,$inpperm 759 vxor $out2,$in2,$rndkey0 760 lvx_u $in7,$x70,$inp 761 addi $inp,$inp,0x80 762 le?vperm $in5,$in5,$in5,$inpperm 763 vxor $out3,$in3,$rndkey0 764 le?vperm $in6,$in6,$in6,$inpperm 765 vxor $out4,$in4,$rndkey0 766 le?vperm $in7,$in7,$in7,$inpperm 767 vxor $out5,$in5,$rndkey0 768 vxor $out6,$in6,$rndkey0 769 vxor $out7,$in7,$rndkey0 770 771 mtctr $rounds 772 b Loop_cbc_dec8x 773.align 5 774Loop_cbc_dec8x: 775 vncipher $out0,$out0,v24 776 vncipher $out1,$out1,v24 777 vncipher $out2,$out2,v24 778 vncipher $out3,$out3,v24 779 vncipher $out4,$out4,v24 780 vncipher $out5,$out5,v24 781 vncipher $out6,$out6,v24 782 vncipher $out7,$out7,v24 783 lvx v24,$x20,$key_ # round[3] 784 addi $key_,$key_,0x20 785 786 vncipher $out0,$out0,v25 787 vncipher $out1,$out1,v25 788 vncipher $out2,$out2,v25 789 vncipher $out3,$out3,v25 790 vncipher $out4,$out4,v25 791 vncipher $out5,$out5,v25 792 vncipher $out6,$out6,v25 793 vncipher $out7,$out7,v25 794 lvx v25,$x10,$key_ # round[4] 795 bdnz Loop_cbc_dec8x 796 797 subic $len,$len,128 # $len-=128 798 vncipher $out0,$out0,v24 799 vncipher $out1,$out1,v24 800 vncipher $out2,$out2,v24 801 vncipher $out3,$out3,v24 802 vncipher $out4,$out4,v24 803 vncipher $out5,$out5,v24 804 vncipher $out6,$out6,v24 805 vncipher $out7,$out7,v24 806 807 subfe. r0,r0,r0 # borrow?-1:0 808 vncipher $out0,$out0,v25 809 vncipher $out1,$out1,v25 810 vncipher $out2,$out2,v25 811 vncipher $out3,$out3,v25 812 vncipher $out4,$out4,v25 813 vncipher $out5,$out5,v25 814 vncipher $out6,$out6,v25 815 vncipher $out7,$out7,v25 816 817 and r0,r0,$len 818 vncipher $out0,$out0,v26 819 vncipher $out1,$out1,v26 820 vncipher $out2,$out2,v26 821 vncipher $out3,$out3,v26 822 vncipher $out4,$out4,v26 823 vncipher $out5,$out5,v26 824 vncipher $out6,$out6,v26 825 vncipher $out7,$out7,v26 826 827 add $inp,$inp,r0 # $inp is adjusted in such 828 # way that at exit from the 829 # loop inX-in7 are loaded 830 # with last "words" 831 vncipher $out0,$out0,v27 832 vncipher $out1,$out1,v27 833 vncipher $out2,$out2,v27 834 vncipher $out3,$out3,v27 835 vncipher $out4,$out4,v27 836 vncipher $out5,$out5,v27 837 vncipher $out6,$out6,v27 838 vncipher $out7,$out7,v27 839 840 addi $key_,$sp,$FRAME+15 # rewind $key_ 841 vncipher $out0,$out0,v28 842 vncipher $out1,$out1,v28 843 vncipher $out2,$out2,v28 844 vncipher $out3,$out3,v28 845 vncipher $out4,$out4,v28 846 vncipher $out5,$out5,v28 847 vncipher $out6,$out6,v28 848 vncipher $out7,$out7,v28 849 lvx v24,$x00,$key_ # re-pre-load round[1] 850 851 vncipher $out0,$out0,v29 852 vncipher $out1,$out1,v29 853 vncipher $out2,$out2,v29 854 vncipher $out3,$out3,v29 855 vncipher $out4,$out4,v29 856 vncipher $out5,$out5,v29 857 vncipher $out6,$out6,v29 858 vncipher $out7,$out7,v29 859 lvx v25,$x10,$key_ # re-pre-load round[2] 860 861 vncipher $out0,$out0,v30 862 vxor $ivec,$ivec,v31 # xor with last round key 863 vncipher $out1,$out1,v30 864 vxor $in0,$in0,v31 865 vncipher $out2,$out2,v30 866 vxor $in1,$in1,v31 867 vncipher $out3,$out3,v30 868 vxor $in2,$in2,v31 869 vncipher $out4,$out4,v30 870 vxor $in3,$in3,v31 871 vncipher $out5,$out5,v30 872 vxor $in4,$in4,v31 873 vncipher $out6,$out6,v30 874 vxor $in5,$in5,v31 875 vncipher $out7,$out7,v30 876 vxor $in6,$in6,v31 877 878 vncipherlast $out0,$out0,$ivec 879 vncipherlast $out1,$out1,$in0 880 lvx_u $in0,$x00,$inp # load next input block 881 vncipherlast $out2,$out2,$in1 882 lvx_u $in1,$x10,$inp 883 vncipherlast $out3,$out3,$in2 884 le?vperm $in0,$in0,$in0,$inpperm 885 lvx_u $in2,$x20,$inp 886 vncipherlast $out4,$out4,$in3 887 le?vperm $in1,$in1,$in1,$inpperm 888 lvx_u $in3,$x30,$inp 889 vncipherlast $out5,$out5,$in4 890 le?vperm $in2,$in2,$in2,$inpperm 891 lvx_u $in4,$x40,$inp 892 vncipherlast $out6,$out6,$in5 893 le?vperm $in3,$in3,$in3,$inpperm 894 lvx_u $in5,$x50,$inp 895 vncipherlast $out7,$out7,$in6 896 le?vperm $in4,$in4,$in4,$inpperm 897 lvx_u $in6,$x60,$inp 898 vmr $ivec,$in7 899 le?vperm $in5,$in5,$in5,$inpperm 900 lvx_u $in7,$x70,$inp 901 addi $inp,$inp,0x80 902 903 le?vperm $out0,$out0,$out0,$inpperm 904 le?vperm $out1,$out1,$out1,$inpperm 905 stvx_u $out0,$x00,$out 906 le?vperm $in6,$in6,$in6,$inpperm 907 vxor $out0,$in0,$rndkey0 908 le?vperm $out2,$out2,$out2,$inpperm 909 stvx_u $out1,$x10,$out 910 le?vperm $in7,$in7,$in7,$inpperm 911 vxor $out1,$in1,$rndkey0 912 le?vperm $out3,$out3,$out3,$inpperm 913 stvx_u $out2,$x20,$out 914 vxor $out2,$in2,$rndkey0 915 le?vperm $out4,$out4,$out4,$inpperm 916 stvx_u $out3,$x30,$out 917 vxor $out3,$in3,$rndkey0 918 le?vperm $out5,$out5,$out5,$inpperm 919 stvx_u $out4,$x40,$out 920 vxor $out4,$in4,$rndkey0 921 le?vperm $out6,$out6,$out6,$inpperm 922 stvx_u $out5,$x50,$out 923 vxor $out5,$in5,$rndkey0 924 le?vperm $out7,$out7,$out7,$inpperm 925 stvx_u $out6,$x60,$out 926 vxor $out6,$in6,$rndkey0 927 stvx_u $out7,$x70,$out 928 addi $out,$out,0x80 929 vxor $out7,$in7,$rndkey0 930 931 mtctr $rounds 932 beq Loop_cbc_dec8x # did $len-=128 borrow? 933 934 addic. $len,$len,128 935 beq Lcbc_dec8x_done 936 nop 937 nop 938 939Loop_cbc_dec8x_tail: # up to 7 "words" tail... 940 vncipher $out1,$out1,v24 941 vncipher $out2,$out2,v24 942 vncipher $out3,$out3,v24 943 vncipher $out4,$out4,v24 944 vncipher $out5,$out5,v24 945 vncipher $out6,$out6,v24 946 vncipher $out7,$out7,v24 947 lvx v24,$x20,$key_ # round[3] 948 addi $key_,$key_,0x20 949 950 vncipher $out1,$out1,v25 951 vncipher $out2,$out2,v25 952 vncipher $out3,$out3,v25 953 vncipher $out4,$out4,v25 954 vncipher $out5,$out5,v25 955 vncipher $out6,$out6,v25 956 vncipher $out7,$out7,v25 957 lvx v25,$x10,$key_ # round[4] 958 bdnz Loop_cbc_dec8x_tail 959 960 vncipher $out1,$out1,v24 961 vncipher $out2,$out2,v24 962 vncipher $out3,$out3,v24 963 vncipher $out4,$out4,v24 964 vncipher $out5,$out5,v24 965 vncipher $out6,$out6,v24 966 vncipher $out7,$out7,v24 967 968 vncipher $out1,$out1,v25 969 vncipher $out2,$out2,v25 970 vncipher $out3,$out3,v25 971 vncipher $out4,$out4,v25 972 vncipher $out5,$out5,v25 973 vncipher $out6,$out6,v25 974 vncipher $out7,$out7,v25 975 976 vncipher $out1,$out1,v26 977 vncipher $out2,$out2,v26 978 vncipher $out3,$out3,v26 979 vncipher $out4,$out4,v26 980 vncipher $out5,$out5,v26 981 vncipher $out6,$out6,v26 982 vncipher $out7,$out7,v26 983 984 vncipher $out1,$out1,v27 985 vncipher $out2,$out2,v27 986 vncipher $out3,$out3,v27 987 vncipher $out4,$out4,v27 988 vncipher $out5,$out5,v27 989 vncipher $out6,$out6,v27 990 vncipher $out7,$out7,v27 991 992 vncipher $out1,$out1,v28 993 vncipher $out2,$out2,v28 994 vncipher $out3,$out3,v28 995 vncipher $out4,$out4,v28 996 vncipher $out5,$out5,v28 997 vncipher $out6,$out6,v28 998 vncipher $out7,$out7,v28 999 1000 vncipher $out1,$out1,v29 1001 vncipher $out2,$out2,v29 1002 vncipher $out3,$out3,v29 1003 vncipher $out4,$out4,v29 1004 vncipher $out5,$out5,v29 1005 vncipher $out6,$out6,v29 1006 vncipher $out7,$out7,v29 1007 1008 vncipher $out1,$out1,v30 1009 vxor $ivec,$ivec,v31 # last round key 1010 vncipher $out2,$out2,v30 1011 vxor $in1,$in1,v31 1012 vncipher $out3,$out3,v30 1013 vxor $in2,$in2,v31 1014 vncipher $out4,$out4,v30 1015 vxor $in3,$in3,v31 1016 vncipher $out5,$out5,v30 1017 vxor $in4,$in4,v31 1018 vncipher $out6,$out6,v30 1019 vxor $in5,$in5,v31 1020 vncipher $out7,$out7,v30 1021 vxor $in6,$in6,v31 1022 1023 cmplwi $len,32 # switch($len) 1024 blt Lcbc_dec8x_one 1025 nop 1026 beq Lcbc_dec8x_two 1027 cmplwi $len,64 1028 blt Lcbc_dec8x_three 1029 nop 1030 beq Lcbc_dec8x_four 1031 cmplwi $len,96 1032 blt Lcbc_dec8x_five 1033 nop 1034 beq Lcbc_dec8x_six 1035 1036Lcbc_dec8x_seven: 1037 vncipherlast $out1,$out1,$ivec 1038 vncipherlast $out2,$out2,$in1 1039 vncipherlast $out3,$out3,$in2 1040 vncipherlast $out4,$out4,$in3 1041 vncipherlast $out5,$out5,$in4 1042 vncipherlast $out6,$out6,$in5 1043 vncipherlast $out7,$out7,$in6 1044 vmr $ivec,$in7 1045 1046 le?vperm $out1,$out1,$out1,$inpperm 1047 le?vperm $out2,$out2,$out2,$inpperm 1048 stvx_u $out1,$x00,$out 1049 le?vperm $out3,$out3,$out3,$inpperm 1050 stvx_u $out2,$x10,$out 1051 le?vperm $out4,$out4,$out4,$inpperm 1052 stvx_u $out3,$x20,$out 1053 le?vperm $out5,$out5,$out5,$inpperm 1054 stvx_u $out4,$x30,$out 1055 le?vperm $out6,$out6,$out6,$inpperm 1056 stvx_u $out5,$x40,$out 1057 le?vperm $out7,$out7,$out7,$inpperm 1058 stvx_u $out6,$x50,$out 1059 stvx_u $out7,$x60,$out 1060 addi $out,$out,0x70 1061 b Lcbc_dec8x_done 1062 1063.align 5 1064Lcbc_dec8x_six: 1065 vncipherlast $out2,$out2,$ivec 1066 vncipherlast $out3,$out3,$in2 1067 vncipherlast $out4,$out4,$in3 1068 vncipherlast $out5,$out5,$in4 1069 vncipherlast $out6,$out6,$in5 1070 vncipherlast $out7,$out7,$in6 1071 vmr $ivec,$in7 1072 1073 le?vperm $out2,$out2,$out2,$inpperm 1074 le?vperm $out3,$out3,$out3,$inpperm 1075 stvx_u $out2,$x00,$out 1076 le?vperm $out4,$out4,$out4,$inpperm 1077 stvx_u $out3,$x10,$out 1078 le?vperm $out5,$out5,$out5,$inpperm 1079 stvx_u $out4,$x20,$out 1080 le?vperm $out6,$out6,$out6,$inpperm 1081 stvx_u $out5,$x30,$out 1082 le?vperm $out7,$out7,$out7,$inpperm 1083 stvx_u $out6,$x40,$out 1084 stvx_u $out7,$x50,$out 1085 addi $out,$out,0x60 1086 b Lcbc_dec8x_done 1087 1088.align 5 1089Lcbc_dec8x_five: 1090 vncipherlast $out3,$out3,$ivec 1091 vncipherlast $out4,$out4,$in3 1092 vncipherlast $out5,$out5,$in4 1093 vncipherlast $out6,$out6,$in5 1094 vncipherlast $out7,$out7,$in6 1095 vmr $ivec,$in7 1096 1097 le?vperm $out3,$out3,$out3,$inpperm 1098 le?vperm $out4,$out4,$out4,$inpperm 1099 stvx_u $out3,$x00,$out 1100 le?vperm $out5,$out5,$out5,$inpperm 1101 stvx_u $out4,$x10,$out 1102 le?vperm $out6,$out6,$out6,$inpperm 1103 stvx_u $out5,$x20,$out 1104 le?vperm $out7,$out7,$out7,$inpperm 1105 stvx_u $out6,$x30,$out 1106 stvx_u $out7,$x40,$out 1107 addi $out,$out,0x50 1108 b Lcbc_dec8x_done 1109 1110.align 5 1111Lcbc_dec8x_four: 1112 vncipherlast $out4,$out4,$ivec 1113 vncipherlast $out5,$out5,$in4 1114 vncipherlast $out6,$out6,$in5 1115 vncipherlast $out7,$out7,$in6 1116 vmr $ivec,$in7 1117 1118 le?vperm $out4,$out4,$out4,$inpperm 1119 le?vperm $out5,$out5,$out5,$inpperm 1120 stvx_u $out4,$x00,$out 1121 le?vperm $out6,$out6,$out6,$inpperm 1122 stvx_u $out5,$x10,$out 1123 le?vperm $out7,$out7,$out7,$inpperm 1124 stvx_u $out6,$x20,$out 1125 stvx_u $out7,$x30,$out 1126 addi $out,$out,0x40 1127 b Lcbc_dec8x_done 1128 1129.align 5 1130Lcbc_dec8x_three: 1131 vncipherlast $out5,$out5,$ivec 1132 vncipherlast $out6,$out6,$in5 1133 vncipherlast $out7,$out7,$in6 1134 vmr $ivec,$in7 1135 1136 le?vperm $out5,$out5,$out5,$inpperm 1137 le?vperm $out6,$out6,$out6,$inpperm 1138 stvx_u $out5,$x00,$out 1139 le?vperm $out7,$out7,$out7,$inpperm 1140 stvx_u $out6,$x10,$out 1141 stvx_u $out7,$x20,$out 1142 addi $out,$out,0x30 1143 b Lcbc_dec8x_done 1144 1145.align 5 1146Lcbc_dec8x_two: 1147 vncipherlast $out6,$out6,$ivec 1148 vncipherlast $out7,$out7,$in6 1149 vmr $ivec,$in7 1150 1151 le?vperm $out6,$out6,$out6,$inpperm 1152 le?vperm $out7,$out7,$out7,$inpperm 1153 stvx_u $out6,$x00,$out 1154 stvx_u $out7,$x10,$out 1155 addi $out,$out,0x20 1156 b Lcbc_dec8x_done 1157 1158.align 5 1159Lcbc_dec8x_one: 1160 vncipherlast $out7,$out7,$ivec 1161 vmr $ivec,$in7 1162 1163 le?vperm $out7,$out7,$out7,$inpperm 1164 stvx_u $out7,0,$out 1165 addi $out,$out,0x10 1166 1167Lcbc_dec8x_done: 1168 le?vperm $ivec,$ivec,$ivec,$inpperm 1169 stvx_u $ivec,0,$ivp # write [unaligned] iv 1170 1171 li r10,`$FRAME+15` 1172 li r11,`$FRAME+31` 1173 stvx $inpperm,r10,$sp # wipe copies of round keys 1174 addi r10,r10,32 1175 stvx $inpperm,r11,$sp 1176 addi r11,r11,32 1177 stvx $inpperm,r10,$sp 1178 addi r10,r10,32 1179 stvx $inpperm,r11,$sp 1180 addi r11,r11,32 1181 stvx $inpperm,r10,$sp 1182 addi r10,r10,32 1183 stvx $inpperm,r11,$sp 1184 addi r11,r11,32 1185 stvx $inpperm,r10,$sp 1186 addi r10,r10,32 1187 stvx $inpperm,r11,$sp 1188 addi r11,r11,32 1189 1190 mtspr 256,$vrsave 1191 lvx v20,r10,$sp # ABI says so 1192 addi r10,r10,32 1193 lvx v21,r11,$sp 1194 addi r11,r11,32 1195 lvx v22,r10,$sp 1196 addi r10,r10,32 1197 lvx v23,r11,$sp 1198 addi r11,r11,32 1199 lvx v24,r10,$sp 1200 addi r10,r10,32 1201 lvx v25,r11,$sp 1202 addi r11,r11,32 1203 lvx v26,r10,$sp 1204 addi r10,r10,32 1205 lvx v27,r11,$sp 1206 addi r11,r11,32 1207 lvx v28,r10,$sp 1208 addi r10,r10,32 1209 lvx v29,r11,$sp 1210 addi r11,r11,32 1211 lvx v30,r10,$sp 1212 lvx v31,r11,$sp 1213 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1214 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1215 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1216 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1217 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1218 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1219 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1220 blr 1221 .long 0 1222 .byte 0,12,0x14,0,0x80,6,6,0 1223 .long 0 1224.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt 1225___ 1226}} }}} 1227 1228######################################################################### 1229{{{ # CTR procedure[s] # 1230my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); 1231my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 1232my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= 1233 map("v$_",(4..11)); 1234my $dat=$tmp; 1235 1236$code.=<<___; 1237.globl .${prefix}_ctr32_encrypt_blocks 1238 ${UCMP}i $len,1 1239 bltlr- 1240 1241 lis r0,0xfff0 1242 mfspr $vrsave,256 1243 mtspr 256,r0 1244 1245 li $idx,15 1246 vxor $rndkey0,$rndkey0,$rndkey0 1247 le?vspltisb $tmp,0x0f 1248 1249 lvx $ivec,0,$ivp # load [unaligned] iv 1250 lvsl $inpperm,0,$ivp 1251 lvx $inptail,$idx,$ivp 1252 vspltisb $one,1 1253 le?vxor $inpperm,$inpperm,$tmp 1254 vperm $ivec,$ivec,$inptail,$inpperm 1255 vsldoi $one,$rndkey0,$one,1 1256 1257 neg r11,$inp 1258 ?lvsl $keyperm,0,$key # prepare for unaligned key 1259 lwz $rounds,240($key) 1260 1261 lvsr $inpperm,0,r11 # prepare for unaligned load 1262 lvx $inptail,0,$inp 1263 addi $inp,$inp,15 # 15 is not typo 1264 le?vxor $inpperm,$inpperm,$tmp 1265 1266 srwi $rounds,$rounds,1 1267 li $idx,16 1268 subi $rounds,$rounds,1 1269 1270 ${UCMP}i $len,8 1271 bge _aesp8_ctr32_encrypt8x 1272 1273 ?lvsr $outperm,0,$out # prepare for unaligned store 1274 vspltisb $outmask,-1 1275 lvx $outhead,0,$out 1276 ?vperm $outmask,$rndkey0,$outmask,$outperm 1277 le?vxor $outperm,$outperm,$tmp 1278 1279 lvx $rndkey0,0,$key 1280 mtctr $rounds 1281 lvx $rndkey1,$idx,$key 1282 addi $idx,$idx,16 1283 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1284 vxor $inout,$ivec,$rndkey0 1285 lvx $rndkey0,$idx,$key 1286 addi $idx,$idx,16 1287 b Loop_ctr32_enc 1288 1289.align 5 1290Loop_ctr32_enc: 1291 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1292 vcipher $inout,$inout,$rndkey1 1293 lvx $rndkey1,$idx,$key 1294 addi $idx,$idx,16 1295 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1296 vcipher $inout,$inout,$rndkey0 1297 lvx $rndkey0,$idx,$key 1298 addi $idx,$idx,16 1299 bdnz Loop_ctr32_enc 1300 1301 vadduwm $ivec,$ivec,$one 1302 vmr $dat,$inptail 1303 lvx $inptail,0,$inp 1304 addi $inp,$inp,16 1305 subic. $len,$len,1 # blocks-- 1306 1307 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1308 vcipher $inout,$inout,$rndkey1 1309 lvx $rndkey1,$idx,$key 1310 vperm $dat,$dat,$inptail,$inpperm 1311 li $idx,16 1312 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm 1313 lvx $rndkey0,0,$key 1314 vxor $dat,$dat,$rndkey1 # last round key 1315 vcipherlast $inout,$inout,$dat 1316 1317 lvx $rndkey1,$idx,$key 1318 addi $idx,$idx,16 1319 vperm $inout,$inout,$inout,$outperm 1320 vsel $dat,$outhead,$inout,$outmask 1321 mtctr $rounds 1322 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1323 vmr $outhead,$inout 1324 vxor $inout,$ivec,$rndkey0 1325 lvx $rndkey0,$idx,$key 1326 addi $idx,$idx,16 1327 stvx $dat,0,$out 1328 addi $out,$out,16 1329 bne Loop_ctr32_enc 1330 1331 addi $out,$out,-1 1332 lvx $inout,0,$out # redundant in aligned case 1333 vsel $inout,$outhead,$inout,$outmask 1334 stvx $inout,0,$out 1335 1336 mtspr 256,$vrsave 1337 blr 1338 .long 0 1339 .byte 0,12,0x14,0,0,0,6,0 1340 .long 0 1341___ 1342######################################################################### 1343{{ # Optimized CTR procedure # 1344my $key_="r11"; 1345my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 1346my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); 1347my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); 1348my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 1349 # v26-v31 last 6 round keys 1350my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 1351my ($two,$three,$four)=($outhead,$outperm,$outmask); 1352 1353$code.=<<___; 1354.align 5 1355_aesp8_ctr32_encrypt8x: 1356 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 1357 li r10,`$FRAME+8*16+15` 1358 li r11,`$FRAME+8*16+31` 1359 stvx v20,r10,$sp # ABI says so 1360 addi r10,r10,32 1361 stvx v21,r11,$sp 1362 addi r11,r11,32 1363 stvx v22,r10,$sp 1364 addi r10,r10,32 1365 stvx v23,r11,$sp 1366 addi r11,r11,32 1367 stvx v24,r10,$sp 1368 addi r10,r10,32 1369 stvx v25,r11,$sp 1370 addi r11,r11,32 1371 stvx v26,r10,$sp 1372 addi r10,r10,32 1373 stvx v27,r11,$sp 1374 addi r11,r11,32 1375 stvx v28,r10,$sp 1376 addi r10,r10,32 1377 stvx v29,r11,$sp 1378 addi r11,r11,32 1379 stvx v30,r10,$sp 1380 stvx v31,r11,$sp 1381 li r0,-1 1382 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 1383 li $x10,0x10 1384 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1385 li $x20,0x20 1386 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1387 li $x30,0x30 1388 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1389 li $x40,0x40 1390 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1391 li $x50,0x50 1392 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1393 li $x60,0x60 1394 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1395 li $x70,0x70 1396 mtspr 256,r0 1397 1398 subi $rounds,$rounds,3 # -4 in total 1399 1400 lvx $rndkey0,$x00,$key # load key schedule 1401 lvx v30,$x10,$key 1402 addi $key,$key,0x20 1403 lvx v31,$x00,$key 1404 ?vperm $rndkey0,$rndkey0,v30,$keyperm 1405 addi $key_,$sp,$FRAME+15 1406 mtctr $rounds 1407 1408Load_ctr32_enc_key: 1409 ?vperm v24,v30,v31,$keyperm 1410 lvx v30,$x10,$key 1411 addi $key,$key,0x20 1412 stvx v24,$x00,$key_ # off-load round[1] 1413 ?vperm v25,v31,v30,$keyperm 1414 lvx v31,$x00,$key 1415 stvx v25,$x10,$key_ # off-load round[2] 1416 addi $key_,$key_,0x20 1417 bdnz Load_ctr32_enc_key 1418 1419 lvx v26,$x10,$key 1420 ?vperm v24,v30,v31,$keyperm 1421 lvx v27,$x20,$key 1422 stvx v24,$x00,$key_ # off-load round[3] 1423 ?vperm v25,v31,v26,$keyperm 1424 lvx v28,$x30,$key 1425 stvx v25,$x10,$key_ # off-load round[4] 1426 addi $key_,$sp,$FRAME+15 # rewind $key_ 1427 ?vperm v26,v26,v27,$keyperm 1428 lvx v29,$x40,$key 1429 ?vperm v27,v27,v28,$keyperm 1430 lvx v30,$x50,$key 1431 ?vperm v28,v28,v29,$keyperm 1432 lvx v31,$x60,$key 1433 ?vperm v29,v29,v30,$keyperm 1434 lvx $out0,$x70,$key # borrow $out0 1435 ?vperm v30,v30,v31,$keyperm 1436 lvx v24,$x00,$key_ # pre-load round[1] 1437 ?vperm v31,v31,$out0,$keyperm 1438 lvx v25,$x10,$key_ # pre-load round[2] 1439 1440 vadduqm $two,$one,$one 1441 subi $inp,$inp,15 # undo "caller" 1442 $SHL $len,$len,4 1443 1444 vadduqm $out1,$ivec,$one # counter values ... 1445 vadduqm $out2,$ivec,$two 1446 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1447 le?li $idx,8 1448 vadduqm $out3,$out1,$two 1449 vxor $out1,$out1,$rndkey0 1450 le?lvsl $inpperm,0,$idx 1451 vadduqm $out4,$out2,$two 1452 vxor $out2,$out2,$rndkey0 1453 le?vspltisb $tmp,0x0f 1454 vadduqm $out5,$out3,$two 1455 vxor $out3,$out3,$rndkey0 1456 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 1457 vadduqm $out6,$out4,$two 1458 vxor $out4,$out4,$rndkey0 1459 vadduqm $out7,$out5,$two 1460 vxor $out5,$out5,$rndkey0 1461 vadduqm $ivec,$out6,$two # next counter value 1462 vxor $out6,$out6,$rndkey0 1463 vxor $out7,$out7,$rndkey0 1464 1465 mtctr $rounds 1466 b Loop_ctr32_enc8x 1467.align 5 1468Loop_ctr32_enc8x: 1469 vcipher $out0,$out0,v24 1470 vcipher $out1,$out1,v24 1471 vcipher $out2,$out2,v24 1472 vcipher $out3,$out3,v24 1473 vcipher $out4,$out4,v24 1474 vcipher $out5,$out5,v24 1475 vcipher $out6,$out6,v24 1476 vcipher $out7,$out7,v24 1477Loop_ctr32_enc8x_middle: 1478 lvx v24,$x20,$key_ # round[3] 1479 addi $key_,$key_,0x20 1480 1481 vcipher $out0,$out0,v25 1482 vcipher $out1,$out1,v25 1483 vcipher $out2,$out2,v25 1484 vcipher $out3,$out3,v25 1485 vcipher $out4,$out4,v25 1486 vcipher $out5,$out5,v25 1487 vcipher $out6,$out6,v25 1488 vcipher $out7,$out7,v25 1489 lvx v25,$x10,$key_ # round[4] 1490 bdnz Loop_ctr32_enc8x 1491 1492 subic r11,$len,256 # $len-256, borrow $key_ 1493 vcipher $out0,$out0,v24 1494 vcipher $out1,$out1,v24 1495 vcipher $out2,$out2,v24 1496 vcipher $out3,$out3,v24 1497 vcipher $out4,$out4,v24 1498 vcipher $out5,$out5,v24 1499 vcipher $out6,$out6,v24 1500 vcipher $out7,$out7,v24 1501 1502 subfe r0,r0,r0 # borrow?-1:0 1503 vcipher $out0,$out0,v25 1504 vcipher $out1,$out1,v25 1505 vcipher $out2,$out2,v25 1506 vcipher $out3,$out3,v25 1507 vcipher $out4,$out4,v25 1508 vcipher $out5,$out5,v25 1509 vcipher $out6,$out6,v25 1510 vcipher $out7,$out7,v25 1511 1512 and r0,r0,r11 1513 addi $key_,$sp,$FRAME+15 # rewind $key_ 1514 vcipher $out0,$out0,v26 1515 vcipher $out1,$out1,v26 1516 vcipher $out2,$out2,v26 1517 vcipher $out3,$out3,v26 1518 vcipher $out4,$out4,v26 1519 vcipher $out5,$out5,v26 1520 vcipher $out6,$out6,v26 1521 vcipher $out7,$out7,v26 1522 lvx v24,$x00,$key_ # re-pre-load round[1] 1523 1524 subic $len,$len,129 # $len-=129 1525 vcipher $out0,$out0,v27 1526 addi $len,$len,1 # $len-=128 really 1527 vcipher $out1,$out1,v27 1528 vcipher $out2,$out2,v27 1529 vcipher $out3,$out3,v27 1530 vcipher $out4,$out4,v27 1531 vcipher $out5,$out5,v27 1532 vcipher $out6,$out6,v27 1533 vcipher $out7,$out7,v27 1534 lvx v25,$x10,$key_ # re-pre-load round[2] 1535 1536 vcipher $out0,$out0,v28 1537 lvx_u $in0,$x00,$inp # load input 1538 vcipher $out1,$out1,v28 1539 lvx_u $in1,$x10,$inp 1540 vcipher $out2,$out2,v28 1541 lvx_u $in2,$x20,$inp 1542 vcipher $out3,$out3,v28 1543 lvx_u $in3,$x30,$inp 1544 vcipher $out4,$out4,v28 1545 lvx_u $in4,$x40,$inp 1546 vcipher $out5,$out5,v28 1547 lvx_u $in5,$x50,$inp 1548 vcipher $out6,$out6,v28 1549 lvx_u $in6,$x60,$inp 1550 vcipher $out7,$out7,v28 1551 lvx_u $in7,$x70,$inp 1552 addi $inp,$inp,0x80 1553 1554 vcipher $out0,$out0,v29 1555 le?vperm $in0,$in0,$in0,$inpperm 1556 vcipher $out1,$out1,v29 1557 le?vperm $in1,$in1,$in1,$inpperm 1558 vcipher $out2,$out2,v29 1559 le?vperm $in2,$in2,$in2,$inpperm 1560 vcipher $out3,$out3,v29 1561 le?vperm $in3,$in3,$in3,$inpperm 1562 vcipher $out4,$out4,v29 1563 le?vperm $in4,$in4,$in4,$inpperm 1564 vcipher $out5,$out5,v29 1565 le?vperm $in5,$in5,$in5,$inpperm 1566 vcipher $out6,$out6,v29 1567 le?vperm $in6,$in6,$in6,$inpperm 1568 vcipher $out7,$out7,v29 1569 le?vperm $in7,$in7,$in7,$inpperm 1570 1571 add $inp,$inp,r0 # $inp is adjusted in such 1572 # way that at exit from the 1573 # loop inX-in7 are loaded 1574 # with last "words" 1575 subfe. r0,r0,r0 # borrow?-1:0 1576 vcipher $out0,$out0,v30 1577 vxor $in0,$in0,v31 # xor with last round key 1578 vcipher $out1,$out1,v30 1579 vxor $in1,$in1,v31 1580 vcipher $out2,$out2,v30 1581 vxor $in2,$in2,v31 1582 vcipher $out3,$out3,v30 1583 vxor $in3,$in3,v31 1584 vcipher $out4,$out4,v30 1585 vxor $in4,$in4,v31 1586 vcipher $out5,$out5,v30 1587 vxor $in5,$in5,v31 1588 vcipher $out6,$out6,v30 1589 vxor $in6,$in6,v31 1590 vcipher $out7,$out7,v30 1591 vxor $in7,$in7,v31 1592 1593 bne Lctr32_enc8x_break # did $len-129 borrow? 1594 1595 vcipherlast $in0,$out0,$in0 1596 vcipherlast $in1,$out1,$in1 1597 vadduqm $out1,$ivec,$one # counter values ... 1598 vcipherlast $in2,$out2,$in2 1599 vadduqm $out2,$ivec,$two 1600 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1601 vcipherlast $in3,$out3,$in3 1602 vadduqm $out3,$out1,$two 1603 vxor $out1,$out1,$rndkey0 1604 vcipherlast $in4,$out4,$in4 1605 vadduqm $out4,$out2,$two 1606 vxor $out2,$out2,$rndkey0 1607 vcipherlast $in5,$out5,$in5 1608 vadduqm $out5,$out3,$two 1609 vxor $out3,$out3,$rndkey0 1610 vcipherlast $in6,$out6,$in6 1611 vadduqm $out6,$out4,$two 1612 vxor $out4,$out4,$rndkey0 1613 vcipherlast $in7,$out7,$in7 1614 vadduqm $out7,$out5,$two 1615 vxor $out5,$out5,$rndkey0 1616 le?vperm $in0,$in0,$in0,$inpperm 1617 vadduqm $ivec,$out6,$two # next counter value 1618 vxor $out6,$out6,$rndkey0 1619 le?vperm $in1,$in1,$in1,$inpperm 1620 vxor $out7,$out7,$rndkey0 1621 mtctr $rounds 1622 1623 vcipher $out0,$out0,v24 1624 stvx_u $in0,$x00,$out 1625 le?vperm $in2,$in2,$in2,$inpperm 1626 vcipher $out1,$out1,v24 1627 stvx_u $in1,$x10,$out 1628 le?vperm $in3,$in3,$in3,$inpperm 1629 vcipher $out2,$out2,v24 1630 stvx_u $in2,$x20,$out 1631 le?vperm $in4,$in4,$in4,$inpperm 1632 vcipher $out3,$out3,v24 1633 stvx_u $in3,$x30,$out 1634 le?vperm $in5,$in5,$in5,$inpperm 1635 vcipher $out4,$out4,v24 1636 stvx_u $in4,$x40,$out 1637 le?vperm $in6,$in6,$in6,$inpperm 1638 vcipher $out5,$out5,v24 1639 stvx_u $in5,$x50,$out 1640 le?vperm $in7,$in7,$in7,$inpperm 1641 vcipher $out6,$out6,v24 1642 stvx_u $in6,$x60,$out 1643 vcipher $out7,$out7,v24 1644 stvx_u $in7,$x70,$out 1645 addi $out,$out,0x80 1646 1647 b Loop_ctr32_enc8x_middle 1648 1649.align 5 1650Lctr32_enc8x_break: 1651 cmpwi $len,-0x60 1652 blt Lctr32_enc8x_one 1653 nop 1654 beq Lctr32_enc8x_two 1655 cmpwi $len,-0x40 1656 blt Lctr32_enc8x_three 1657 nop 1658 beq Lctr32_enc8x_four 1659 cmpwi $len,-0x20 1660 blt Lctr32_enc8x_five 1661 nop 1662 beq Lctr32_enc8x_six 1663 cmpwi $len,0x00 1664 blt Lctr32_enc8x_seven 1665 1666Lctr32_enc8x_eight: 1667 vcipherlast $out0,$out0,$in0 1668 vcipherlast $out1,$out1,$in1 1669 vcipherlast $out2,$out2,$in2 1670 vcipherlast $out3,$out3,$in3 1671 vcipherlast $out4,$out4,$in4 1672 vcipherlast $out5,$out5,$in5 1673 vcipherlast $out6,$out6,$in6 1674 vcipherlast $out7,$out7,$in7 1675 1676 le?vperm $out0,$out0,$out0,$inpperm 1677 le?vperm $out1,$out1,$out1,$inpperm 1678 stvx_u $out0,$x00,$out 1679 le?vperm $out2,$out2,$out2,$inpperm 1680 stvx_u $out1,$x10,$out 1681 le?vperm $out3,$out3,$out3,$inpperm 1682 stvx_u $out2,$x20,$out 1683 le?vperm $out4,$out4,$out4,$inpperm 1684 stvx_u $out3,$x30,$out 1685 le?vperm $out5,$out5,$out5,$inpperm 1686 stvx_u $out4,$x40,$out 1687 le?vperm $out6,$out6,$out6,$inpperm 1688 stvx_u $out5,$x50,$out 1689 le?vperm $out7,$out7,$out7,$inpperm 1690 stvx_u $out6,$x60,$out 1691 stvx_u $out7,$x70,$out 1692 addi $out,$out,0x80 1693 b Lctr32_enc8x_done 1694 1695.align 5 1696Lctr32_enc8x_seven: 1697 vcipherlast $out0,$out0,$in1 1698 vcipherlast $out1,$out1,$in2 1699 vcipherlast $out2,$out2,$in3 1700 vcipherlast $out3,$out3,$in4 1701 vcipherlast $out4,$out4,$in5 1702 vcipherlast $out5,$out5,$in6 1703 vcipherlast $out6,$out6,$in7 1704 1705 le?vperm $out0,$out0,$out0,$inpperm 1706 le?vperm $out1,$out1,$out1,$inpperm 1707 stvx_u $out0,$x00,$out 1708 le?vperm $out2,$out2,$out2,$inpperm 1709 stvx_u $out1,$x10,$out 1710 le?vperm $out3,$out3,$out3,$inpperm 1711 stvx_u $out2,$x20,$out 1712 le?vperm $out4,$out4,$out4,$inpperm 1713 stvx_u $out3,$x30,$out 1714 le?vperm $out5,$out5,$out5,$inpperm 1715 stvx_u $out4,$x40,$out 1716 le?vperm $out6,$out6,$out6,$inpperm 1717 stvx_u $out5,$x50,$out 1718 stvx_u $out6,$x60,$out 1719 addi $out,$out,0x70 1720 b Lctr32_enc8x_done 1721 1722.align 5 1723Lctr32_enc8x_six: 1724 vcipherlast $out0,$out0,$in2 1725 vcipherlast $out1,$out1,$in3 1726 vcipherlast $out2,$out2,$in4 1727 vcipherlast $out3,$out3,$in5 1728 vcipherlast $out4,$out4,$in6 1729 vcipherlast $out5,$out5,$in7 1730 1731 le?vperm $out0,$out0,$out0,$inpperm 1732 le?vperm $out1,$out1,$out1,$inpperm 1733 stvx_u $out0,$x00,$out 1734 le?vperm $out2,$out2,$out2,$inpperm 1735 stvx_u $out1,$x10,$out 1736 le?vperm $out3,$out3,$out3,$inpperm 1737 stvx_u $out2,$x20,$out 1738 le?vperm $out4,$out4,$out4,$inpperm 1739 stvx_u $out3,$x30,$out 1740 le?vperm $out5,$out5,$out5,$inpperm 1741 stvx_u $out4,$x40,$out 1742 stvx_u $out5,$x50,$out 1743 addi $out,$out,0x60 1744 b Lctr32_enc8x_done 1745 1746.align 5 1747Lctr32_enc8x_five: 1748 vcipherlast $out0,$out0,$in3 1749 vcipherlast $out1,$out1,$in4 1750 vcipherlast $out2,$out2,$in5 1751 vcipherlast $out3,$out3,$in6 1752 vcipherlast $out4,$out4,$in7 1753 1754 le?vperm $out0,$out0,$out0,$inpperm 1755 le?vperm $out1,$out1,$out1,$inpperm 1756 stvx_u $out0,$x00,$out 1757 le?vperm $out2,$out2,$out2,$inpperm 1758 stvx_u $out1,$x10,$out 1759 le?vperm $out3,$out3,$out3,$inpperm 1760 stvx_u $out2,$x20,$out 1761 le?vperm $out4,$out4,$out4,$inpperm 1762 stvx_u $out3,$x30,$out 1763 stvx_u $out4,$x40,$out 1764 addi $out,$out,0x50 1765 b Lctr32_enc8x_done 1766 1767.align 5 1768Lctr32_enc8x_four: 1769 vcipherlast $out0,$out0,$in4 1770 vcipherlast $out1,$out1,$in5 1771 vcipherlast $out2,$out2,$in6 1772 vcipherlast $out3,$out3,$in7 1773 1774 le?vperm $out0,$out0,$out0,$inpperm 1775 le?vperm $out1,$out1,$out1,$inpperm 1776 stvx_u $out0,$x00,$out 1777 le?vperm $out2,$out2,$out2,$inpperm 1778 stvx_u $out1,$x10,$out 1779 le?vperm $out3,$out3,$out3,$inpperm 1780 stvx_u $out2,$x20,$out 1781 stvx_u $out3,$x30,$out 1782 addi $out,$out,0x40 1783 b Lctr32_enc8x_done 1784 1785.align 5 1786Lctr32_enc8x_three: 1787 vcipherlast $out0,$out0,$in5 1788 vcipherlast $out1,$out1,$in6 1789 vcipherlast $out2,$out2,$in7 1790 1791 le?vperm $out0,$out0,$out0,$inpperm 1792 le?vperm $out1,$out1,$out1,$inpperm 1793 stvx_u $out0,$x00,$out 1794 le?vperm $out2,$out2,$out2,$inpperm 1795 stvx_u $out1,$x10,$out 1796 stvx_u $out2,$x20,$out 1797 addi $out,$out,0x30 1798 b Lcbc_dec8x_done 1799 1800.align 5 1801Lctr32_enc8x_two: 1802 vcipherlast $out0,$out0,$in6 1803 vcipherlast $out1,$out1,$in7 1804 1805 le?vperm $out0,$out0,$out0,$inpperm 1806 le?vperm $out1,$out1,$out1,$inpperm 1807 stvx_u $out0,$x00,$out 1808 stvx_u $out1,$x10,$out 1809 addi $out,$out,0x20 1810 b Lcbc_dec8x_done 1811 1812.align 5 1813Lctr32_enc8x_one: 1814 vcipherlast $out0,$out0,$in7 1815 1816 le?vperm $out0,$out0,$out0,$inpperm 1817 stvx_u $out0,0,$out 1818 addi $out,$out,0x10 1819 1820Lctr32_enc8x_done: 1821 li r10,`$FRAME+15` 1822 li r11,`$FRAME+31` 1823 stvx $inpperm,r10,$sp # wipe copies of round keys 1824 addi r10,r10,32 1825 stvx $inpperm,r11,$sp 1826 addi r11,r11,32 1827 stvx $inpperm,r10,$sp 1828 addi r10,r10,32 1829 stvx $inpperm,r11,$sp 1830 addi r11,r11,32 1831 stvx $inpperm,r10,$sp 1832 addi r10,r10,32 1833 stvx $inpperm,r11,$sp 1834 addi r11,r11,32 1835 stvx $inpperm,r10,$sp 1836 addi r10,r10,32 1837 stvx $inpperm,r11,$sp 1838 addi r11,r11,32 1839 1840 mtspr 256,$vrsave 1841 lvx v20,r10,$sp # ABI says so 1842 addi r10,r10,32 1843 lvx v21,r11,$sp 1844 addi r11,r11,32 1845 lvx v22,r10,$sp 1846 addi r10,r10,32 1847 lvx v23,r11,$sp 1848 addi r11,r11,32 1849 lvx v24,r10,$sp 1850 addi r10,r10,32 1851 lvx v25,r11,$sp 1852 addi r11,r11,32 1853 lvx v26,r10,$sp 1854 addi r10,r10,32 1855 lvx v27,r11,$sp 1856 addi r11,r11,32 1857 lvx v28,r10,$sp 1858 addi r10,r10,32 1859 lvx v29,r11,$sp 1860 addi r11,r11,32 1861 lvx v30,r10,$sp 1862 lvx v31,r11,$sp 1863 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1864 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1865 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1866 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1867 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1868 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1869 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1870 blr 1871 .long 0 1872 .byte 0,12,0x14,0,0x80,6,6,0 1873 .long 0 1874.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks 1875___ 1876}} }}} 1877 1878my $consts=1; 1879foreach(split("\n",$code)) { 1880 s/\`([^\`]*)\`/eval($1)/geo; 1881 1882 # constants table endian-specific conversion 1883 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 1884 my $conv=$3; 1885 my @bytes=(); 1886 1887 # convert to endian-agnostic format 1888 if ($1 eq "long") { 1889 foreach (split(/,\s*/,$2)) { 1890 my $l = /^0/?oct:int; 1891 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 1892 } 1893 } else { 1894 @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 1895 } 1896 1897 # little-endian conversion 1898 if ($flavour =~ /le$/o) { 1899 SWITCH: for($conv) { 1900 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 1901 /\?rev/ && do { @bytes=reverse(@bytes); last; }; 1902 } 1903 } 1904 1905 #emit 1906 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 1907 next; 1908 } 1909 $consts=0 if (m/Lconsts:/o); # end of table 1910 1911 # instructions prefixed with '?' are endian-specific and need 1912 # to be adjusted accordingly... 1913 if ($flavour =~ /le$/o) { # little-endian 1914 s/le\?//o or 1915 s/be\?/#be#/o or 1916 s/\?lvsr/lvsl/o or 1917 s/\?lvsl/lvsr/o or 1918 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 1919 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 1920 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 1921 } else { # big-endian 1922 s/le\?/#le#/o or 1923 s/be\?//o or 1924 s/\?([a-z]+)/$1/o; 1925 } 1926 1927 print $_,"\n"; 1928} 1929 1930close STDOUT; 1931