1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Original implementation written by Andy Polyakov, @dot-asm. 4 * This is an adaptation of the original code for kernel use. 5 * 6 * Copyright (C) 2006-2019 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved. 7 */ 8 9#include <linux/linkage.h> 10#include <asm/nospec-insn.h> 11#include <asm/vx-insn.h> 12 13#define SP %r15 14#define FRAME (16 * 8 + 4 * 8) 15 16 .data 17 .balign 32 18 19SYM_DATA_START_LOCAL(sigma) 20 .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral 21 .long 1,0,0,0 22 .long 2,0,0,0 23 .long 3,0,0,0 24 .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap 25 26 .long 0,1,2,3 27 .long 0x61707865,0x61707865,0x61707865,0x61707865 # smashed sigma 28 .long 0x3320646e,0x3320646e,0x3320646e,0x3320646e 29 .long 0x79622d32,0x79622d32,0x79622d32,0x79622d32 30 .long 0x6b206574,0x6b206574,0x6b206574,0x6b206574 31SYM_DATA_END(sigma) 32 33 .previous 34 35 GEN_BR_THUNK %r14 36 37 .text 38 39############################################################################# 40# void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len, 41# counst u32 *key, const u32 *counter) 42 43#define OUT %r2 44#define INP %r3 45#define LEN %r4 46#define KEY %r5 47#define COUNTER %r6 48 49#define BEPERM %v31 50#define CTR %v26 51 52#define K0 %v16 53#define K1 %v17 54#define K2 %v18 55#define K3 %v19 56 57#define XA0 %v0 58#define XA1 %v1 59#define XA2 %v2 60#define XA3 %v3 61 62#define XB0 %v4 63#define XB1 %v5 64#define XB2 %v6 65#define XB3 %v7 66 67#define XC0 %v8 68#define XC1 %v9 69#define XC2 %v10 70#define XC3 %v11 71 72#define XD0 %v12 73#define XD1 %v13 74#define XD2 %v14 75#define XD3 %v15 76 77#define XT0 %v27 78#define XT1 %v28 79#define XT2 %v29 80#define XT3 %v30 81 82SYM_FUNC_START(chacha20_vx_4x) 83 stmg %r6,%r7,6*8(SP) 84 85 larl %r7,sigma 86 lhi %r0,10 87 lhi %r1,0 88 89 VL K0,0,,%r7 # load sigma 90 VL K1,0,,KEY # load key 91 VL K2,16,,KEY 92 VL K3,0,,COUNTER # load counter 93 94 VL BEPERM,0x40,,%r7 95 VL CTR,0x50,,%r7 96 97 VLM XA0,XA3,0x60,%r7,4 # load [smashed] sigma 98 99 VREPF XB0,K1,0 # smash the key 100 VREPF XB1,K1,1 101 VREPF XB2,K1,2 102 VREPF XB3,K1,3 103 104 VREPF XD0,K3,0 105 VREPF XD1,K3,1 106 VREPF XD2,K3,2 107 VREPF XD3,K3,3 108 VAF XD0,XD0,CTR 109 110 VREPF XC0,K2,0 111 VREPF XC1,K2,1 112 VREPF XC2,K2,2 113 VREPF XC3,K2,3 114 115.Loop_4x: 116 VAF XA0,XA0,XB0 117 VX XD0,XD0,XA0 118 VERLLF XD0,XD0,16 119 120 VAF XA1,XA1,XB1 121 VX XD1,XD1,XA1 122 VERLLF XD1,XD1,16 123 124 VAF XA2,XA2,XB2 125 VX XD2,XD2,XA2 126 VERLLF XD2,XD2,16 127 128 VAF XA3,XA3,XB3 129 VX XD3,XD3,XA3 130 VERLLF XD3,XD3,16 131 132 VAF XC0,XC0,XD0 133 VX XB0,XB0,XC0 134 VERLLF XB0,XB0,12 135 136 VAF XC1,XC1,XD1 137 VX XB1,XB1,XC1 138 VERLLF XB1,XB1,12 139 140 VAF XC2,XC2,XD2 141 VX XB2,XB2,XC2 142 VERLLF XB2,XB2,12 143 144 VAF XC3,XC3,XD3 145 VX XB3,XB3,XC3 146 VERLLF XB3,XB3,12 147 148 VAF XA0,XA0,XB0 149 VX XD0,XD0,XA0 150 VERLLF XD0,XD0,8 151 152 VAF XA1,XA1,XB1 153 VX XD1,XD1,XA1 154 VERLLF XD1,XD1,8 155 156 VAF XA2,XA2,XB2 157 VX XD2,XD2,XA2 158 VERLLF XD2,XD2,8 159 160 VAF XA3,XA3,XB3 161 VX XD3,XD3,XA3 162 VERLLF XD3,XD3,8 163 164 VAF XC0,XC0,XD0 165 VX XB0,XB0,XC0 166 VERLLF XB0,XB0,7 167 168 VAF XC1,XC1,XD1 169 VX XB1,XB1,XC1 170 VERLLF XB1,XB1,7 171 172 VAF XC2,XC2,XD2 173 VX XB2,XB2,XC2 174 VERLLF XB2,XB2,7 175 176 VAF XC3,XC3,XD3 177 VX XB3,XB3,XC3 178 VERLLF XB3,XB3,7 179 180 VAF XA0,XA0,XB1 181 VX XD3,XD3,XA0 182 VERLLF XD3,XD3,16 183 184 VAF XA1,XA1,XB2 185 VX XD0,XD0,XA1 186 VERLLF XD0,XD0,16 187 188 VAF XA2,XA2,XB3 189 VX XD1,XD1,XA2 190 VERLLF XD1,XD1,16 191 192 VAF XA3,XA3,XB0 193 VX XD2,XD2,XA3 194 VERLLF XD2,XD2,16 195 196 VAF XC2,XC2,XD3 197 VX XB1,XB1,XC2 198 VERLLF XB1,XB1,12 199 200 VAF XC3,XC3,XD0 201 VX XB2,XB2,XC3 202 VERLLF XB2,XB2,12 203 204 VAF XC0,XC0,XD1 205 VX XB3,XB3,XC0 206 VERLLF XB3,XB3,12 207 208 VAF XC1,XC1,XD2 209 VX XB0,XB0,XC1 210 VERLLF XB0,XB0,12 211 212 VAF XA0,XA0,XB1 213 VX XD3,XD3,XA0 214 VERLLF XD3,XD3,8 215 216 VAF XA1,XA1,XB2 217 VX XD0,XD0,XA1 218 VERLLF XD0,XD0,8 219 220 VAF XA2,XA2,XB3 221 VX XD1,XD1,XA2 222 VERLLF XD1,XD1,8 223 224 VAF XA3,XA3,XB0 225 VX XD2,XD2,XA3 226 VERLLF XD2,XD2,8 227 228 VAF XC2,XC2,XD3 229 VX XB1,XB1,XC2 230 VERLLF XB1,XB1,7 231 232 VAF XC3,XC3,XD0 233 VX XB2,XB2,XC3 234 VERLLF XB2,XB2,7 235 236 VAF XC0,XC0,XD1 237 VX XB3,XB3,XC0 238 VERLLF XB3,XB3,7 239 240 VAF XC1,XC1,XD2 241 VX XB0,XB0,XC1 242 VERLLF XB0,XB0,7 243 brct %r0,.Loop_4x 244 245 VAF XD0,XD0,CTR 246 247 VMRHF XT0,XA0,XA1 # transpose data 248 VMRHF XT1,XA2,XA3 249 VMRLF XT2,XA0,XA1 250 VMRLF XT3,XA2,XA3 251 VPDI XA0,XT0,XT1,0b0000 252 VPDI XA1,XT0,XT1,0b0101 253 VPDI XA2,XT2,XT3,0b0000 254 VPDI XA3,XT2,XT3,0b0101 255 256 VMRHF XT0,XB0,XB1 257 VMRHF XT1,XB2,XB3 258 VMRLF XT2,XB0,XB1 259 VMRLF XT3,XB2,XB3 260 VPDI XB0,XT0,XT1,0b0000 261 VPDI XB1,XT0,XT1,0b0101 262 VPDI XB2,XT2,XT3,0b0000 263 VPDI XB3,XT2,XT3,0b0101 264 265 VMRHF XT0,XC0,XC1 266 VMRHF XT1,XC2,XC3 267 VMRLF XT2,XC0,XC1 268 VMRLF XT3,XC2,XC3 269 VPDI XC0,XT0,XT1,0b0000 270 VPDI XC1,XT0,XT1,0b0101 271 VPDI XC2,XT2,XT3,0b0000 272 VPDI XC3,XT2,XT3,0b0101 273 274 VMRHF XT0,XD0,XD1 275 VMRHF XT1,XD2,XD3 276 VMRLF XT2,XD0,XD1 277 VMRLF XT3,XD2,XD3 278 VPDI XD0,XT0,XT1,0b0000 279 VPDI XD1,XT0,XT1,0b0101 280 VPDI XD2,XT2,XT3,0b0000 281 VPDI XD3,XT2,XT3,0b0101 282 283 VAF XA0,XA0,K0 284 VAF XB0,XB0,K1 285 VAF XC0,XC0,K2 286 VAF XD0,XD0,K3 287 288 VPERM XA0,XA0,XA0,BEPERM 289 VPERM XB0,XB0,XB0,BEPERM 290 VPERM XC0,XC0,XC0,BEPERM 291 VPERM XD0,XD0,XD0,BEPERM 292 293 VLM XT0,XT3,0,INP,0 294 295 VX XT0,XT0,XA0 296 VX XT1,XT1,XB0 297 VX XT2,XT2,XC0 298 VX XT3,XT3,XD0 299 300 VSTM XT0,XT3,0,OUT,0 301 302 la INP,0x40(INP) 303 la OUT,0x40(OUT) 304 aghi LEN,-0x40 305 306 VAF XA0,XA1,K0 307 VAF XB0,XB1,K1 308 VAF XC0,XC1,K2 309 VAF XD0,XD1,K3 310 311 VPERM XA0,XA0,XA0,BEPERM 312 VPERM XB0,XB0,XB0,BEPERM 313 VPERM XC0,XC0,XC0,BEPERM 314 VPERM XD0,XD0,XD0,BEPERM 315 316 clgfi LEN,0x40 317 jl .Ltail_4x 318 319 VLM XT0,XT3,0,INP,0 320 321 VX XT0,XT0,XA0 322 VX XT1,XT1,XB0 323 VX XT2,XT2,XC0 324 VX XT3,XT3,XD0 325 326 VSTM XT0,XT3,0,OUT,0 327 328 la INP,0x40(INP) 329 la OUT,0x40(OUT) 330 aghi LEN,-0x40 331 je .Ldone_4x 332 333 VAF XA0,XA2,K0 334 VAF XB0,XB2,K1 335 VAF XC0,XC2,K2 336 VAF XD0,XD2,K3 337 338 VPERM XA0,XA0,XA0,BEPERM 339 VPERM XB0,XB0,XB0,BEPERM 340 VPERM XC0,XC0,XC0,BEPERM 341 VPERM XD0,XD0,XD0,BEPERM 342 343 clgfi LEN,0x40 344 jl .Ltail_4x 345 346 VLM XT0,XT3,0,INP,0 347 348 VX XT0,XT0,XA0 349 VX XT1,XT1,XB0 350 VX XT2,XT2,XC0 351 VX XT3,XT3,XD0 352 353 VSTM XT0,XT3,0,OUT,0 354 355 la INP,0x40(INP) 356 la OUT,0x40(OUT) 357 aghi LEN,-0x40 358 je .Ldone_4x 359 360 VAF XA0,XA3,K0 361 VAF XB0,XB3,K1 362 VAF XC0,XC3,K2 363 VAF XD0,XD3,K3 364 365 VPERM XA0,XA0,XA0,BEPERM 366 VPERM XB0,XB0,XB0,BEPERM 367 VPERM XC0,XC0,XC0,BEPERM 368 VPERM XD0,XD0,XD0,BEPERM 369 370 clgfi LEN,0x40 371 jl .Ltail_4x 372 373 VLM XT0,XT3,0,INP,0 374 375 VX XT0,XT0,XA0 376 VX XT1,XT1,XB0 377 VX XT2,XT2,XC0 378 VX XT3,XT3,XD0 379 380 VSTM XT0,XT3,0,OUT,0 381 382.Ldone_4x: 383 lmg %r6,%r7,6*8(SP) 384 BR_EX %r14 385 386.Ltail_4x: 387 VLR XT0,XC0 388 VLR XT1,XD0 389 390 VST XA0,8*8+0x00,,SP 391 VST XB0,8*8+0x10,,SP 392 VST XT0,8*8+0x20,,SP 393 VST XT1,8*8+0x30,,SP 394 395 lghi %r1,0 396 397.Loop_tail_4x: 398 llgc %r5,0(%r1,INP) 399 llgc %r6,8*8(%r1,SP) 400 xr %r6,%r5 401 stc %r6,0(%r1,OUT) 402 la %r1,1(%r1) 403 brct LEN,.Loop_tail_4x 404 405 lmg %r6,%r7,6*8(SP) 406 BR_EX %r14 407SYM_FUNC_END(chacha20_vx_4x) 408 409#undef OUT 410#undef INP 411#undef LEN 412#undef KEY 413#undef COUNTER 414 415#undef BEPERM 416 417#undef K0 418#undef K1 419#undef K2 420#undef K3 421 422 423############################################################################# 424# void chacha20_vx(u8 *out, counst u8 *inp, size_t len, 425# counst u32 *key, const u32 *counter) 426 427#define OUT %r2 428#define INP %r3 429#define LEN %r4 430#define KEY %r5 431#define COUNTER %r6 432 433#define BEPERM %v31 434 435#define K0 %v27 436#define K1 %v24 437#define K2 %v25 438#define K3 %v26 439 440#define A0 %v0 441#define B0 %v1 442#define C0 %v2 443#define D0 %v3 444 445#define A1 %v4 446#define B1 %v5 447#define C1 %v6 448#define D1 %v7 449 450#define A2 %v8 451#define B2 %v9 452#define C2 %v10 453#define D2 %v11 454 455#define A3 %v12 456#define B3 %v13 457#define C3 %v14 458#define D3 %v15 459 460#define A4 %v16 461#define B4 %v17 462#define C4 %v18 463#define D4 %v19 464 465#define A5 %v20 466#define B5 %v21 467#define C5 %v22 468#define D5 %v23 469 470#define T0 %v27 471#define T1 %v28 472#define T2 %v29 473#define T3 %v30 474 475SYM_FUNC_START(chacha20_vx) 476 clgfi LEN,256 477 jle chacha20_vx_4x 478 stmg %r6,%r7,6*8(SP) 479 480 lghi %r1,-FRAME 481 lgr %r0,SP 482 la SP,0(%r1,SP) 483 stg %r0,0(SP) # back-chain 484 485 larl %r7,sigma 486 lhi %r0,10 487 488 VLM K1,K2,0,KEY,0 # load key 489 VL K3,0,,COUNTER # load counter 490 491 VLM K0,BEPERM,0,%r7,4 # load sigma, increments, ... 492 493.Loop_outer_vx: 494 VLR A0,K0 495 VLR B0,K1 496 VLR A1,K0 497 VLR B1,K1 498 VLR A2,K0 499 VLR B2,K1 500 VLR A3,K0 501 VLR B3,K1 502 VLR A4,K0 503 VLR B4,K1 504 VLR A5,K0 505 VLR B5,K1 506 507 VLR D0,K3 508 VAF D1,K3,T1 # K[3]+1 509 VAF D2,K3,T2 # K[3]+2 510 VAF D3,K3,T3 # K[3]+3 511 VAF D4,D2,T2 # K[3]+4 512 VAF D5,D2,T3 # K[3]+5 513 514 VLR C0,K2 515 VLR C1,K2 516 VLR C2,K2 517 VLR C3,K2 518 VLR C4,K2 519 VLR C5,K2 520 521 VLR T1,D1 522 VLR T2,D2 523 VLR T3,D3 524 525.Loop_vx: 526 VAF A0,A0,B0 527 VAF A1,A1,B1 528 VAF A2,A2,B2 529 VAF A3,A3,B3 530 VAF A4,A4,B4 531 VAF A5,A5,B5 532 VX D0,D0,A0 533 VX D1,D1,A1 534 VX D2,D2,A2 535 VX D3,D3,A3 536 VX D4,D4,A4 537 VX D5,D5,A5 538 VERLLF D0,D0,16 539 VERLLF D1,D1,16 540 VERLLF D2,D2,16 541 VERLLF D3,D3,16 542 VERLLF D4,D4,16 543 VERLLF D5,D5,16 544 545 VAF C0,C0,D0 546 VAF C1,C1,D1 547 VAF C2,C2,D2 548 VAF C3,C3,D3 549 VAF C4,C4,D4 550 VAF C5,C5,D5 551 VX B0,B0,C0 552 VX B1,B1,C1 553 VX B2,B2,C2 554 VX B3,B3,C3 555 VX B4,B4,C4 556 VX B5,B5,C5 557 VERLLF B0,B0,12 558 VERLLF B1,B1,12 559 VERLLF B2,B2,12 560 VERLLF B3,B3,12 561 VERLLF B4,B4,12 562 VERLLF B5,B5,12 563 564 VAF A0,A0,B0 565 VAF A1,A1,B1 566 VAF A2,A2,B2 567 VAF A3,A3,B3 568 VAF A4,A4,B4 569 VAF A5,A5,B5 570 VX D0,D0,A0 571 VX D1,D1,A1 572 VX D2,D2,A2 573 VX D3,D3,A3 574 VX D4,D4,A4 575 VX D5,D5,A5 576 VERLLF D0,D0,8 577 VERLLF D1,D1,8 578 VERLLF D2,D2,8 579 VERLLF D3,D3,8 580 VERLLF D4,D4,8 581 VERLLF D5,D5,8 582 583 VAF C0,C0,D0 584 VAF C1,C1,D1 585 VAF C2,C2,D2 586 VAF C3,C3,D3 587 VAF C4,C4,D4 588 VAF C5,C5,D5 589 VX B0,B0,C0 590 VX B1,B1,C1 591 VX B2,B2,C2 592 VX B3,B3,C3 593 VX B4,B4,C4 594 VX B5,B5,C5 595 VERLLF B0,B0,7 596 VERLLF B1,B1,7 597 VERLLF B2,B2,7 598 VERLLF B3,B3,7 599 VERLLF B4,B4,7 600 VERLLF B5,B5,7 601 602 VSLDB C0,C0,C0,8 603 VSLDB C1,C1,C1,8 604 VSLDB C2,C2,C2,8 605 VSLDB C3,C3,C3,8 606 VSLDB C4,C4,C4,8 607 VSLDB C5,C5,C5,8 608 VSLDB B0,B0,B0,4 609 VSLDB B1,B1,B1,4 610 VSLDB B2,B2,B2,4 611 VSLDB B3,B3,B3,4 612 VSLDB B4,B4,B4,4 613 VSLDB B5,B5,B5,4 614 VSLDB D0,D0,D0,12 615 VSLDB D1,D1,D1,12 616 VSLDB D2,D2,D2,12 617 VSLDB D3,D3,D3,12 618 VSLDB D4,D4,D4,12 619 VSLDB D5,D5,D5,12 620 621 VAF A0,A0,B0 622 VAF A1,A1,B1 623 VAF A2,A2,B2 624 VAF A3,A3,B3 625 VAF A4,A4,B4 626 VAF A5,A5,B5 627 VX D0,D0,A0 628 VX D1,D1,A1 629 VX D2,D2,A2 630 VX D3,D3,A3 631 VX D4,D4,A4 632 VX D5,D5,A5 633 VERLLF D0,D0,16 634 VERLLF D1,D1,16 635 VERLLF D2,D2,16 636 VERLLF D3,D3,16 637 VERLLF D4,D4,16 638 VERLLF D5,D5,16 639 640 VAF C0,C0,D0 641 VAF C1,C1,D1 642 VAF C2,C2,D2 643 VAF C3,C3,D3 644 VAF C4,C4,D4 645 VAF C5,C5,D5 646 VX B0,B0,C0 647 VX B1,B1,C1 648 VX B2,B2,C2 649 VX B3,B3,C3 650 VX B4,B4,C4 651 VX B5,B5,C5 652 VERLLF B0,B0,12 653 VERLLF B1,B1,12 654 VERLLF B2,B2,12 655 VERLLF B3,B3,12 656 VERLLF B4,B4,12 657 VERLLF B5,B5,12 658 659 VAF A0,A0,B0 660 VAF A1,A1,B1 661 VAF A2,A2,B2 662 VAF A3,A3,B3 663 VAF A4,A4,B4 664 VAF A5,A5,B5 665 VX D0,D0,A0 666 VX D1,D1,A1 667 VX D2,D2,A2 668 VX D3,D3,A3 669 VX D4,D4,A4 670 VX D5,D5,A5 671 VERLLF D0,D0,8 672 VERLLF D1,D1,8 673 VERLLF D2,D2,8 674 VERLLF D3,D3,8 675 VERLLF D4,D4,8 676 VERLLF D5,D5,8 677 678 VAF C0,C0,D0 679 VAF C1,C1,D1 680 VAF C2,C2,D2 681 VAF C3,C3,D3 682 VAF C4,C4,D4 683 VAF C5,C5,D5 684 VX B0,B0,C0 685 VX B1,B1,C1 686 VX B2,B2,C2 687 VX B3,B3,C3 688 VX B4,B4,C4 689 VX B5,B5,C5 690 VERLLF B0,B0,7 691 VERLLF B1,B1,7 692 VERLLF B2,B2,7 693 VERLLF B3,B3,7 694 VERLLF B4,B4,7 695 VERLLF B5,B5,7 696 697 VSLDB C0,C0,C0,8 698 VSLDB C1,C1,C1,8 699 VSLDB C2,C2,C2,8 700 VSLDB C3,C3,C3,8 701 VSLDB C4,C4,C4,8 702 VSLDB C5,C5,C5,8 703 VSLDB B0,B0,B0,12 704 VSLDB B1,B1,B1,12 705 VSLDB B2,B2,B2,12 706 VSLDB B3,B3,B3,12 707 VSLDB B4,B4,B4,12 708 VSLDB B5,B5,B5,12 709 VSLDB D0,D0,D0,4 710 VSLDB D1,D1,D1,4 711 VSLDB D2,D2,D2,4 712 VSLDB D3,D3,D3,4 713 VSLDB D4,D4,D4,4 714 VSLDB D5,D5,D5,4 715 brct %r0,.Loop_vx 716 717 VAF A0,A0,K0 718 VAF B0,B0,K1 719 VAF C0,C0,K2 720 VAF D0,D0,K3 721 VAF A1,A1,K0 722 VAF D1,D1,T1 # +K[3]+1 723 724 VPERM A0,A0,A0,BEPERM 725 VPERM B0,B0,B0,BEPERM 726 VPERM C0,C0,C0,BEPERM 727 VPERM D0,D0,D0,BEPERM 728 729 clgfi LEN,0x40 730 jl .Ltail_vx 731 732 VAF D2,D2,T2 # +K[3]+2 733 VAF D3,D3,T3 # +K[3]+3 734 VLM T0,T3,0,INP,0 735 736 VX A0,A0,T0 737 VX B0,B0,T1 738 VX C0,C0,T2 739 VX D0,D0,T3 740 741 VLM K0,T3,0,%r7,4 # re-load sigma and increments 742 743 VSTM A0,D0,0,OUT,0 744 745 la INP,0x40(INP) 746 la OUT,0x40(OUT) 747 aghi LEN,-0x40 748 je .Ldone_vx 749 750 VAF B1,B1,K1 751 VAF C1,C1,K2 752 753 VPERM A0,A1,A1,BEPERM 754 VPERM B0,B1,B1,BEPERM 755 VPERM C0,C1,C1,BEPERM 756 VPERM D0,D1,D1,BEPERM 757 758 clgfi LEN,0x40 759 jl .Ltail_vx 760 761 VLM A1,D1,0,INP,0 762 763 VX A0,A0,A1 764 VX B0,B0,B1 765 VX C0,C0,C1 766 VX D0,D0,D1 767 768 VSTM A0,D0,0,OUT,0 769 770 la INP,0x40(INP) 771 la OUT,0x40(OUT) 772 aghi LEN,-0x40 773 je .Ldone_vx 774 775 VAF A2,A2,K0 776 VAF B2,B2,K1 777 VAF C2,C2,K2 778 779 VPERM A0,A2,A2,BEPERM 780 VPERM B0,B2,B2,BEPERM 781 VPERM C0,C2,C2,BEPERM 782 VPERM D0,D2,D2,BEPERM 783 784 clgfi LEN,0x40 785 jl .Ltail_vx 786 787 VLM A1,D1,0,INP,0 788 789 VX A0,A0,A1 790 VX B0,B0,B1 791 VX C0,C0,C1 792 VX D0,D0,D1 793 794 VSTM A0,D0,0,OUT,0 795 796 la INP,0x40(INP) 797 la OUT,0x40(OUT) 798 aghi LEN,-0x40 799 je .Ldone_vx 800 801 VAF A3,A3,K0 802 VAF B3,B3,K1 803 VAF C3,C3,K2 804 VAF D2,K3,T3 # K[3]+3 805 806 VPERM A0,A3,A3,BEPERM 807 VPERM B0,B3,B3,BEPERM 808 VPERM C0,C3,C3,BEPERM 809 VPERM D0,D3,D3,BEPERM 810 811 clgfi LEN,0x40 812 jl .Ltail_vx 813 814 VAF D3,D2,T1 # K[3]+4 815 VLM A1,D1,0,INP,0 816 817 VX A0,A0,A1 818 VX B0,B0,B1 819 VX C0,C0,C1 820 VX D0,D0,D1 821 822 VSTM A0,D0,0,OUT,0 823 824 la INP,0x40(INP) 825 la OUT,0x40(OUT) 826 aghi LEN,-0x40 827 je .Ldone_vx 828 829 VAF A4,A4,K0 830 VAF B4,B4,K1 831 VAF C4,C4,K2 832 VAF D4,D4,D3 # +K[3]+4 833 VAF D3,D3,T1 # K[3]+5 834 VAF K3,D2,T3 # K[3]+=6 835 836 VPERM A0,A4,A4,BEPERM 837 VPERM B0,B4,B4,BEPERM 838 VPERM C0,C4,C4,BEPERM 839 VPERM D0,D4,D4,BEPERM 840 841 clgfi LEN,0x40 842 jl .Ltail_vx 843 844 VLM A1,D1,0,INP,0 845 846 VX A0,A0,A1 847 VX B0,B0,B1 848 VX C0,C0,C1 849 VX D0,D0,D1 850 851 VSTM A0,D0,0,OUT,0 852 853 la INP,0x40(INP) 854 la OUT,0x40(OUT) 855 aghi LEN,-0x40 856 je .Ldone_vx 857 858 VAF A5,A5,K0 859 VAF B5,B5,K1 860 VAF C5,C5,K2 861 VAF D5,D5,D3 # +K[3]+5 862 863 VPERM A0,A5,A5,BEPERM 864 VPERM B0,B5,B5,BEPERM 865 VPERM C0,C5,C5,BEPERM 866 VPERM D0,D5,D5,BEPERM 867 868 clgfi LEN,0x40 869 jl .Ltail_vx 870 871 VLM A1,D1,0,INP,0 872 873 VX A0,A0,A1 874 VX B0,B0,B1 875 VX C0,C0,C1 876 VX D0,D0,D1 877 878 VSTM A0,D0,0,OUT,0 879 880 la INP,0x40(INP) 881 la OUT,0x40(OUT) 882 lhi %r0,10 883 aghi LEN,-0x40 884 jne .Loop_outer_vx 885 886.Ldone_vx: 887 lmg %r6,%r7,FRAME+6*8(SP) 888 la SP,FRAME(SP) 889 BR_EX %r14 890 891.Ltail_vx: 892 VSTM A0,D0,8*8,SP,3 893 lghi %r1,0 894 895.Loop_tail_vx: 896 llgc %r5,0(%r1,INP) 897 llgc %r6,8*8(%r1,SP) 898 xr %r6,%r5 899 stc %r6,0(%r1,OUT) 900 la %r1,1(%r1) 901 brct LEN,.Loop_tail_vx 902 903 lmg %r6,%r7,FRAME+6*8(SP) 904 la SP,FRAME(SP) 905 BR_EX %r14 906SYM_FUNC_END(chacha20_vx) 907 908.previous 909