1*62d9e475SDanny Tsen/* SPDX-License-Identifier: GPL-2.0-or-later */ 2*62d9e475SDanny Tsen# 3*62d9e475SDanny Tsen# Accelerated chacha20 implementation for ppc64le. 4*62d9e475SDanny Tsen# 5*62d9e475SDanny Tsen# Copyright 2023- IBM Corp. All rights reserved 6*62d9e475SDanny Tsen# 7*62d9e475SDanny Tsen#=================================================================================== 8*62d9e475SDanny Tsen# Written by Danny Tsen <dtsen@us.ibm.com> 9*62d9e475SDanny Tsen# 10*62d9e475SDanny Tsen# chacha_p10le_8x(u32 *state, byte *dst, const byte *src, 11*62d9e475SDanny Tsen# size_t len, int nrounds); 12*62d9e475SDanny Tsen# 13*62d9e475SDanny Tsen# do rounds, 8 quarter rounds 14*62d9e475SDanny Tsen# 1. a += b; d ^= a; d <<<= 16; 15*62d9e475SDanny Tsen# 2. c += d; b ^= c; b <<<= 12; 16*62d9e475SDanny Tsen# 3. a += b; d ^= a; d <<<= 8; 17*62d9e475SDanny Tsen# 4. c += d; b ^= c; b <<<= 7 18*62d9e475SDanny Tsen# 19*62d9e475SDanny Tsen# row1 = (row1 + row2), row4 = row1 xor row4, row4 rotate each word by 16 20*62d9e475SDanny Tsen# row3 = (row3 + row4), row2 = row3 xor row2, row2 rotate each word by 12 21*62d9e475SDanny Tsen# row1 = (row1 + row2), row4 = row1 xor row4, row4 rotate each word by 8 22*62d9e475SDanny Tsen# row3 = (row3 + row4), row2 = row3 xor row2, row2 rotate each word by 7 23*62d9e475SDanny Tsen# 24*62d9e475SDanny Tsen# 4 blocks (a b c d) 25*62d9e475SDanny Tsen# 26*62d9e475SDanny Tsen# a0 b0 c0 d0 27*62d9e475SDanny Tsen# a1 b1 c1 d1 28*62d9e475SDanny Tsen# ... 29*62d9e475SDanny Tsen# a4 b4 c4 d4 30*62d9e475SDanny Tsen# ... 31*62d9e475SDanny Tsen# a8 b8 c8 d8 32*62d9e475SDanny Tsen# ... 33*62d9e475SDanny Tsen# a12 b12 c12 d12 34*62d9e475SDanny Tsen# a13 ... 35*62d9e475SDanny Tsen# a14 ... 36*62d9e475SDanny Tsen# a15 b15 c15 d15 37*62d9e475SDanny Tsen# 38*62d9e475SDanny Tsen# Column round (v0, v4, v8, v12, v1, v5, v9, v13, v2, v6, v10, v14, v3, v7, v11, v15) 39*62d9e475SDanny Tsen# Diagnal round (v0, v5, v10, v15, v1, v6, v11, v12, v2, v7, v8, v13, v3, v4, v9, v14) 40*62d9e475SDanny Tsen# 41*62d9e475SDanny Tsen 42*62d9e475SDanny Tsen#include <asm/ppc_asm.h> 43*62d9e475SDanny Tsen#include <asm/asm-offsets.h> 44*62d9e475SDanny Tsen#include <asm/asm-compat.h> 45*62d9e475SDanny Tsen#include <linux/linkage.h> 46*62d9e475SDanny Tsen 47*62d9e475SDanny Tsen.machine "any" 48*62d9e475SDanny Tsen.text 49*62d9e475SDanny Tsen 50*62d9e475SDanny Tsen.macro SAVE_GPR GPR OFFSET FRAME 51*62d9e475SDanny Tsen std \GPR,\OFFSET(\FRAME) 52*62d9e475SDanny Tsen.endm 53*62d9e475SDanny Tsen 54*62d9e475SDanny Tsen.macro SAVE_VRS VRS OFFSET FRAME 55*62d9e475SDanny Tsen li 16, \OFFSET 56*62d9e475SDanny Tsen stvx \VRS, 16, \FRAME 57*62d9e475SDanny Tsen.endm 58*62d9e475SDanny Tsen 59*62d9e475SDanny Tsen.macro SAVE_VSX VSX OFFSET FRAME 60*62d9e475SDanny Tsen li 16, \OFFSET 61*62d9e475SDanny Tsen stxvx \VSX, 16, \FRAME 62*62d9e475SDanny Tsen.endm 63*62d9e475SDanny Tsen 64*62d9e475SDanny Tsen.macro RESTORE_GPR GPR OFFSET FRAME 65*62d9e475SDanny Tsen ld \GPR,\OFFSET(\FRAME) 66*62d9e475SDanny Tsen.endm 67*62d9e475SDanny Tsen 68*62d9e475SDanny Tsen.macro RESTORE_VRS VRS OFFSET FRAME 69*62d9e475SDanny Tsen li 16, \OFFSET 70*62d9e475SDanny Tsen lvx \VRS, 16, \FRAME 71*62d9e475SDanny Tsen.endm 72*62d9e475SDanny Tsen 73*62d9e475SDanny Tsen.macro RESTORE_VSX VSX OFFSET FRAME 74*62d9e475SDanny Tsen li 16, \OFFSET 75*62d9e475SDanny Tsen lxvx \VSX, 16, \FRAME 76*62d9e475SDanny Tsen.endm 77*62d9e475SDanny Tsen 78*62d9e475SDanny Tsen.macro SAVE_REGS 79*62d9e475SDanny Tsen mflr 0 80*62d9e475SDanny Tsen std 0, 16(1) 81*62d9e475SDanny Tsen stdu 1,-752(1) 82*62d9e475SDanny Tsen 83*62d9e475SDanny Tsen SAVE_GPR 14, 112, 1 84*62d9e475SDanny Tsen SAVE_GPR 15, 120, 1 85*62d9e475SDanny Tsen SAVE_GPR 16, 128, 1 86*62d9e475SDanny Tsen SAVE_GPR 17, 136, 1 87*62d9e475SDanny Tsen SAVE_GPR 18, 144, 1 88*62d9e475SDanny Tsen SAVE_GPR 19, 152, 1 89*62d9e475SDanny Tsen SAVE_GPR 20, 160, 1 90*62d9e475SDanny Tsen SAVE_GPR 21, 168, 1 91*62d9e475SDanny Tsen SAVE_GPR 22, 176, 1 92*62d9e475SDanny Tsen SAVE_GPR 23, 184, 1 93*62d9e475SDanny Tsen SAVE_GPR 24, 192, 1 94*62d9e475SDanny Tsen SAVE_GPR 25, 200, 1 95*62d9e475SDanny Tsen SAVE_GPR 26, 208, 1 96*62d9e475SDanny Tsen SAVE_GPR 27, 216, 1 97*62d9e475SDanny Tsen SAVE_GPR 28, 224, 1 98*62d9e475SDanny Tsen SAVE_GPR 29, 232, 1 99*62d9e475SDanny Tsen SAVE_GPR 30, 240, 1 100*62d9e475SDanny Tsen SAVE_GPR 31, 248, 1 101*62d9e475SDanny Tsen 102*62d9e475SDanny Tsen addi 9, 1, 256 103*62d9e475SDanny Tsen SAVE_VRS 20, 0, 9 104*62d9e475SDanny Tsen SAVE_VRS 21, 16, 9 105*62d9e475SDanny Tsen SAVE_VRS 22, 32, 9 106*62d9e475SDanny Tsen SAVE_VRS 23, 48, 9 107*62d9e475SDanny Tsen SAVE_VRS 24, 64, 9 108*62d9e475SDanny Tsen SAVE_VRS 25, 80, 9 109*62d9e475SDanny Tsen SAVE_VRS 26, 96, 9 110*62d9e475SDanny Tsen SAVE_VRS 27, 112, 9 111*62d9e475SDanny Tsen SAVE_VRS 28, 128, 9 112*62d9e475SDanny Tsen SAVE_VRS 29, 144, 9 113*62d9e475SDanny Tsen SAVE_VRS 30, 160, 9 114*62d9e475SDanny Tsen SAVE_VRS 31, 176, 9 115*62d9e475SDanny Tsen 116*62d9e475SDanny Tsen SAVE_VSX 14, 192, 9 117*62d9e475SDanny Tsen SAVE_VSX 15, 208, 9 118*62d9e475SDanny Tsen SAVE_VSX 16, 224, 9 119*62d9e475SDanny Tsen SAVE_VSX 17, 240, 9 120*62d9e475SDanny Tsen SAVE_VSX 18, 256, 9 121*62d9e475SDanny Tsen SAVE_VSX 19, 272, 9 122*62d9e475SDanny Tsen SAVE_VSX 20, 288, 9 123*62d9e475SDanny Tsen SAVE_VSX 21, 304, 9 124*62d9e475SDanny Tsen SAVE_VSX 22, 320, 9 125*62d9e475SDanny Tsen SAVE_VSX 23, 336, 9 126*62d9e475SDanny Tsen SAVE_VSX 24, 352, 9 127*62d9e475SDanny Tsen SAVE_VSX 25, 368, 9 128*62d9e475SDanny Tsen SAVE_VSX 26, 384, 9 129*62d9e475SDanny Tsen SAVE_VSX 27, 400, 9 130*62d9e475SDanny Tsen SAVE_VSX 28, 416, 9 131*62d9e475SDanny Tsen SAVE_VSX 29, 432, 9 132*62d9e475SDanny Tsen SAVE_VSX 30, 448, 9 133*62d9e475SDanny Tsen SAVE_VSX 31, 464, 9 134*62d9e475SDanny Tsen.endm # SAVE_REGS 135*62d9e475SDanny Tsen 136*62d9e475SDanny Tsen.macro RESTORE_REGS 137*62d9e475SDanny Tsen addi 9, 1, 256 138*62d9e475SDanny Tsen RESTORE_VRS 20, 0, 9 139*62d9e475SDanny Tsen RESTORE_VRS 21, 16, 9 140*62d9e475SDanny Tsen RESTORE_VRS 22, 32, 9 141*62d9e475SDanny Tsen RESTORE_VRS 23, 48, 9 142*62d9e475SDanny Tsen RESTORE_VRS 24, 64, 9 143*62d9e475SDanny Tsen RESTORE_VRS 25, 80, 9 144*62d9e475SDanny Tsen RESTORE_VRS 26, 96, 9 145*62d9e475SDanny Tsen RESTORE_VRS 27, 112, 9 146*62d9e475SDanny Tsen RESTORE_VRS 28, 128, 9 147*62d9e475SDanny Tsen RESTORE_VRS 29, 144, 9 148*62d9e475SDanny Tsen RESTORE_VRS 30, 160, 9 149*62d9e475SDanny Tsen RESTORE_VRS 31, 176, 9 150*62d9e475SDanny Tsen 151*62d9e475SDanny Tsen RESTORE_VSX 14, 192, 9 152*62d9e475SDanny Tsen RESTORE_VSX 15, 208, 9 153*62d9e475SDanny Tsen RESTORE_VSX 16, 224, 9 154*62d9e475SDanny Tsen RESTORE_VSX 17, 240, 9 155*62d9e475SDanny Tsen RESTORE_VSX 18, 256, 9 156*62d9e475SDanny Tsen RESTORE_VSX 19, 272, 9 157*62d9e475SDanny Tsen RESTORE_VSX 20, 288, 9 158*62d9e475SDanny Tsen RESTORE_VSX 21, 304, 9 159*62d9e475SDanny Tsen RESTORE_VSX 22, 320, 9 160*62d9e475SDanny Tsen RESTORE_VSX 23, 336, 9 161*62d9e475SDanny Tsen RESTORE_VSX 24, 352, 9 162*62d9e475SDanny Tsen RESTORE_VSX 25, 368, 9 163*62d9e475SDanny Tsen RESTORE_VSX 26, 384, 9 164*62d9e475SDanny Tsen RESTORE_VSX 27, 400, 9 165*62d9e475SDanny Tsen RESTORE_VSX 28, 416, 9 166*62d9e475SDanny Tsen RESTORE_VSX 29, 432, 9 167*62d9e475SDanny Tsen RESTORE_VSX 30, 448, 9 168*62d9e475SDanny Tsen RESTORE_VSX 31, 464, 9 169*62d9e475SDanny Tsen 170*62d9e475SDanny Tsen RESTORE_GPR 14, 112, 1 171*62d9e475SDanny Tsen RESTORE_GPR 15, 120, 1 172*62d9e475SDanny Tsen RESTORE_GPR 16, 128, 1 173*62d9e475SDanny Tsen RESTORE_GPR 17, 136, 1 174*62d9e475SDanny Tsen RESTORE_GPR 18, 144, 1 175*62d9e475SDanny Tsen RESTORE_GPR 19, 152, 1 176*62d9e475SDanny Tsen RESTORE_GPR 20, 160, 1 177*62d9e475SDanny Tsen RESTORE_GPR 21, 168, 1 178*62d9e475SDanny Tsen RESTORE_GPR 22, 176, 1 179*62d9e475SDanny Tsen RESTORE_GPR 23, 184, 1 180*62d9e475SDanny Tsen RESTORE_GPR 24, 192, 1 181*62d9e475SDanny Tsen RESTORE_GPR 25, 200, 1 182*62d9e475SDanny Tsen RESTORE_GPR 26, 208, 1 183*62d9e475SDanny Tsen RESTORE_GPR 27, 216, 1 184*62d9e475SDanny Tsen RESTORE_GPR 28, 224, 1 185*62d9e475SDanny Tsen RESTORE_GPR 29, 232, 1 186*62d9e475SDanny Tsen RESTORE_GPR 30, 240, 1 187*62d9e475SDanny Tsen RESTORE_GPR 31, 248, 1 188*62d9e475SDanny Tsen 189*62d9e475SDanny Tsen addi 1, 1, 752 190*62d9e475SDanny Tsen ld 0, 16(1) 191*62d9e475SDanny Tsen mtlr 0 192*62d9e475SDanny Tsen.endm # RESTORE_REGS 193*62d9e475SDanny Tsen 194*62d9e475SDanny Tsen.macro QT_loop_8x 195*62d9e475SDanny Tsen # QR(v0, v4, v8, v12, v1, v5, v9, v13, v2, v6, v10, v14, v3, v7, v11, v15) 196*62d9e475SDanny Tsen xxlor 0, 32+25, 32+25 197*62d9e475SDanny Tsen xxlor 32+25, 20, 20 198*62d9e475SDanny Tsen vadduwm 0, 0, 4 199*62d9e475SDanny Tsen vadduwm 1, 1, 5 200*62d9e475SDanny Tsen vadduwm 2, 2, 6 201*62d9e475SDanny Tsen vadduwm 3, 3, 7 202*62d9e475SDanny Tsen vadduwm 16, 16, 20 203*62d9e475SDanny Tsen vadduwm 17, 17, 21 204*62d9e475SDanny Tsen vadduwm 18, 18, 22 205*62d9e475SDanny Tsen vadduwm 19, 19, 23 206*62d9e475SDanny Tsen 207*62d9e475SDanny Tsen vpermxor 12, 12, 0, 25 208*62d9e475SDanny Tsen vpermxor 13, 13, 1, 25 209*62d9e475SDanny Tsen vpermxor 14, 14, 2, 25 210*62d9e475SDanny Tsen vpermxor 15, 15, 3, 25 211*62d9e475SDanny Tsen vpermxor 28, 28, 16, 25 212*62d9e475SDanny Tsen vpermxor 29, 29, 17, 25 213*62d9e475SDanny Tsen vpermxor 30, 30, 18, 25 214*62d9e475SDanny Tsen vpermxor 31, 31, 19, 25 215*62d9e475SDanny Tsen xxlor 32+25, 0, 0 216*62d9e475SDanny Tsen vadduwm 8, 8, 12 217*62d9e475SDanny Tsen vadduwm 9, 9, 13 218*62d9e475SDanny Tsen vadduwm 10, 10, 14 219*62d9e475SDanny Tsen vadduwm 11, 11, 15 220*62d9e475SDanny Tsen vadduwm 24, 24, 28 221*62d9e475SDanny Tsen vadduwm 25, 25, 29 222*62d9e475SDanny Tsen vadduwm 26, 26, 30 223*62d9e475SDanny Tsen vadduwm 27, 27, 31 224*62d9e475SDanny Tsen vxor 4, 4, 8 225*62d9e475SDanny Tsen vxor 5, 5, 9 226*62d9e475SDanny Tsen vxor 6, 6, 10 227*62d9e475SDanny Tsen vxor 7, 7, 11 228*62d9e475SDanny Tsen vxor 20, 20, 24 229*62d9e475SDanny Tsen vxor 21, 21, 25 230*62d9e475SDanny Tsen vxor 22, 22, 26 231*62d9e475SDanny Tsen vxor 23, 23, 27 232*62d9e475SDanny Tsen 233*62d9e475SDanny Tsen xxlor 0, 32+25, 32+25 234*62d9e475SDanny Tsen xxlor 32+25, 21, 21 235*62d9e475SDanny Tsen vrlw 4, 4, 25 # 236*62d9e475SDanny Tsen vrlw 5, 5, 25 237*62d9e475SDanny Tsen vrlw 6, 6, 25 238*62d9e475SDanny Tsen vrlw 7, 7, 25 239*62d9e475SDanny Tsen vrlw 20, 20, 25 # 240*62d9e475SDanny Tsen vrlw 21, 21, 25 241*62d9e475SDanny Tsen vrlw 22, 22, 25 242*62d9e475SDanny Tsen vrlw 23, 23, 25 243*62d9e475SDanny Tsen xxlor 32+25, 0, 0 244*62d9e475SDanny Tsen vadduwm 0, 0, 4 245*62d9e475SDanny Tsen vadduwm 1, 1, 5 246*62d9e475SDanny Tsen vadduwm 2, 2, 6 247*62d9e475SDanny Tsen vadduwm 3, 3, 7 248*62d9e475SDanny Tsen vadduwm 16, 16, 20 249*62d9e475SDanny Tsen vadduwm 17, 17, 21 250*62d9e475SDanny Tsen vadduwm 18, 18, 22 251*62d9e475SDanny Tsen vadduwm 19, 19, 23 252*62d9e475SDanny Tsen 253*62d9e475SDanny Tsen xxlor 0, 32+25, 32+25 254*62d9e475SDanny Tsen xxlor 32+25, 22, 22 255*62d9e475SDanny Tsen vpermxor 12, 12, 0, 25 256*62d9e475SDanny Tsen vpermxor 13, 13, 1, 25 257*62d9e475SDanny Tsen vpermxor 14, 14, 2, 25 258*62d9e475SDanny Tsen vpermxor 15, 15, 3, 25 259*62d9e475SDanny Tsen vpermxor 28, 28, 16, 25 260*62d9e475SDanny Tsen vpermxor 29, 29, 17, 25 261*62d9e475SDanny Tsen vpermxor 30, 30, 18, 25 262*62d9e475SDanny Tsen vpermxor 31, 31, 19, 25 263*62d9e475SDanny Tsen xxlor 32+25, 0, 0 264*62d9e475SDanny Tsen vadduwm 8, 8, 12 265*62d9e475SDanny Tsen vadduwm 9, 9, 13 266*62d9e475SDanny Tsen vadduwm 10, 10, 14 267*62d9e475SDanny Tsen vadduwm 11, 11, 15 268*62d9e475SDanny Tsen vadduwm 24, 24, 28 269*62d9e475SDanny Tsen vadduwm 25, 25, 29 270*62d9e475SDanny Tsen vadduwm 26, 26, 30 271*62d9e475SDanny Tsen vadduwm 27, 27, 31 272*62d9e475SDanny Tsen xxlor 0, 32+28, 32+28 273*62d9e475SDanny Tsen xxlor 32+28, 23, 23 274*62d9e475SDanny Tsen vxor 4, 4, 8 275*62d9e475SDanny Tsen vxor 5, 5, 9 276*62d9e475SDanny Tsen vxor 6, 6, 10 277*62d9e475SDanny Tsen vxor 7, 7, 11 278*62d9e475SDanny Tsen vxor 20, 20, 24 279*62d9e475SDanny Tsen vxor 21, 21, 25 280*62d9e475SDanny Tsen vxor 22, 22, 26 281*62d9e475SDanny Tsen vxor 23, 23, 27 282*62d9e475SDanny Tsen vrlw 4, 4, 28 # 283*62d9e475SDanny Tsen vrlw 5, 5, 28 284*62d9e475SDanny Tsen vrlw 6, 6, 28 285*62d9e475SDanny Tsen vrlw 7, 7, 28 286*62d9e475SDanny Tsen vrlw 20, 20, 28 # 287*62d9e475SDanny Tsen vrlw 21, 21, 28 288*62d9e475SDanny Tsen vrlw 22, 22, 28 289*62d9e475SDanny Tsen vrlw 23, 23, 28 290*62d9e475SDanny Tsen xxlor 32+28, 0, 0 291*62d9e475SDanny Tsen 292*62d9e475SDanny Tsen # QR(v0, v5, v10, v15, v1, v6, v11, v12, v2, v7, v8, v13, v3, v4, v9, v14) 293*62d9e475SDanny Tsen xxlor 0, 32+25, 32+25 294*62d9e475SDanny Tsen xxlor 32+25, 20, 20 295*62d9e475SDanny Tsen vadduwm 0, 0, 5 296*62d9e475SDanny Tsen vadduwm 1, 1, 6 297*62d9e475SDanny Tsen vadduwm 2, 2, 7 298*62d9e475SDanny Tsen vadduwm 3, 3, 4 299*62d9e475SDanny Tsen vadduwm 16, 16, 21 300*62d9e475SDanny Tsen vadduwm 17, 17, 22 301*62d9e475SDanny Tsen vadduwm 18, 18, 23 302*62d9e475SDanny Tsen vadduwm 19, 19, 20 303*62d9e475SDanny Tsen 304*62d9e475SDanny Tsen vpermxor 15, 15, 0, 25 305*62d9e475SDanny Tsen vpermxor 12, 12, 1, 25 306*62d9e475SDanny Tsen vpermxor 13, 13, 2, 25 307*62d9e475SDanny Tsen vpermxor 14, 14, 3, 25 308*62d9e475SDanny Tsen vpermxor 31, 31, 16, 25 309*62d9e475SDanny Tsen vpermxor 28, 28, 17, 25 310*62d9e475SDanny Tsen vpermxor 29, 29, 18, 25 311*62d9e475SDanny Tsen vpermxor 30, 30, 19, 25 312*62d9e475SDanny Tsen 313*62d9e475SDanny Tsen xxlor 32+25, 0, 0 314*62d9e475SDanny Tsen vadduwm 10, 10, 15 315*62d9e475SDanny Tsen vadduwm 11, 11, 12 316*62d9e475SDanny Tsen vadduwm 8, 8, 13 317*62d9e475SDanny Tsen vadduwm 9, 9, 14 318*62d9e475SDanny Tsen vadduwm 26, 26, 31 319*62d9e475SDanny Tsen vadduwm 27, 27, 28 320*62d9e475SDanny Tsen vadduwm 24, 24, 29 321*62d9e475SDanny Tsen vadduwm 25, 25, 30 322*62d9e475SDanny Tsen vxor 5, 5, 10 323*62d9e475SDanny Tsen vxor 6, 6, 11 324*62d9e475SDanny Tsen vxor 7, 7, 8 325*62d9e475SDanny Tsen vxor 4, 4, 9 326*62d9e475SDanny Tsen vxor 21, 21, 26 327*62d9e475SDanny Tsen vxor 22, 22, 27 328*62d9e475SDanny Tsen vxor 23, 23, 24 329*62d9e475SDanny Tsen vxor 20, 20, 25 330*62d9e475SDanny Tsen 331*62d9e475SDanny Tsen xxlor 0, 32+25, 32+25 332*62d9e475SDanny Tsen xxlor 32+25, 21, 21 333*62d9e475SDanny Tsen vrlw 5, 5, 25 334*62d9e475SDanny Tsen vrlw 6, 6, 25 335*62d9e475SDanny Tsen vrlw 7, 7, 25 336*62d9e475SDanny Tsen vrlw 4, 4, 25 337*62d9e475SDanny Tsen vrlw 21, 21, 25 338*62d9e475SDanny Tsen vrlw 22, 22, 25 339*62d9e475SDanny Tsen vrlw 23, 23, 25 340*62d9e475SDanny Tsen vrlw 20, 20, 25 341*62d9e475SDanny Tsen xxlor 32+25, 0, 0 342*62d9e475SDanny Tsen 343*62d9e475SDanny Tsen vadduwm 0, 0, 5 344*62d9e475SDanny Tsen vadduwm 1, 1, 6 345*62d9e475SDanny Tsen vadduwm 2, 2, 7 346*62d9e475SDanny Tsen vadduwm 3, 3, 4 347*62d9e475SDanny Tsen vadduwm 16, 16, 21 348*62d9e475SDanny Tsen vadduwm 17, 17, 22 349*62d9e475SDanny Tsen vadduwm 18, 18, 23 350*62d9e475SDanny Tsen vadduwm 19, 19, 20 351*62d9e475SDanny Tsen 352*62d9e475SDanny Tsen xxlor 0, 32+25, 32+25 353*62d9e475SDanny Tsen xxlor 32+25, 22, 22 354*62d9e475SDanny Tsen vpermxor 15, 15, 0, 25 355*62d9e475SDanny Tsen vpermxor 12, 12, 1, 25 356*62d9e475SDanny Tsen vpermxor 13, 13, 2, 25 357*62d9e475SDanny Tsen vpermxor 14, 14, 3, 25 358*62d9e475SDanny Tsen vpermxor 31, 31, 16, 25 359*62d9e475SDanny Tsen vpermxor 28, 28, 17, 25 360*62d9e475SDanny Tsen vpermxor 29, 29, 18, 25 361*62d9e475SDanny Tsen vpermxor 30, 30, 19, 25 362*62d9e475SDanny Tsen xxlor 32+25, 0, 0 363*62d9e475SDanny Tsen 364*62d9e475SDanny Tsen vadduwm 10, 10, 15 365*62d9e475SDanny Tsen vadduwm 11, 11, 12 366*62d9e475SDanny Tsen vadduwm 8, 8, 13 367*62d9e475SDanny Tsen vadduwm 9, 9, 14 368*62d9e475SDanny Tsen vadduwm 26, 26, 31 369*62d9e475SDanny Tsen vadduwm 27, 27, 28 370*62d9e475SDanny Tsen vadduwm 24, 24, 29 371*62d9e475SDanny Tsen vadduwm 25, 25, 30 372*62d9e475SDanny Tsen 373*62d9e475SDanny Tsen xxlor 0, 32+28, 32+28 374*62d9e475SDanny Tsen xxlor 32+28, 23, 23 375*62d9e475SDanny Tsen vxor 5, 5, 10 376*62d9e475SDanny Tsen vxor 6, 6, 11 377*62d9e475SDanny Tsen vxor 7, 7, 8 378*62d9e475SDanny Tsen vxor 4, 4, 9 379*62d9e475SDanny Tsen vxor 21, 21, 26 380*62d9e475SDanny Tsen vxor 22, 22, 27 381*62d9e475SDanny Tsen vxor 23, 23, 24 382*62d9e475SDanny Tsen vxor 20, 20, 25 383*62d9e475SDanny Tsen vrlw 5, 5, 28 384*62d9e475SDanny Tsen vrlw 6, 6, 28 385*62d9e475SDanny Tsen vrlw 7, 7, 28 386*62d9e475SDanny Tsen vrlw 4, 4, 28 387*62d9e475SDanny Tsen vrlw 21, 21, 28 388*62d9e475SDanny Tsen vrlw 22, 22, 28 389*62d9e475SDanny Tsen vrlw 23, 23, 28 390*62d9e475SDanny Tsen vrlw 20, 20, 28 391*62d9e475SDanny Tsen xxlor 32+28, 0, 0 392*62d9e475SDanny Tsen.endm 393*62d9e475SDanny Tsen 394*62d9e475SDanny Tsen.macro QT_loop_4x 395*62d9e475SDanny Tsen # QR(v0, v4, v8, v12, v1, v5, v9, v13, v2, v6, v10, v14, v3, v7, v11, v15) 396*62d9e475SDanny Tsen vadduwm 0, 0, 4 397*62d9e475SDanny Tsen vadduwm 1, 1, 5 398*62d9e475SDanny Tsen vadduwm 2, 2, 6 399*62d9e475SDanny Tsen vadduwm 3, 3, 7 400*62d9e475SDanny Tsen vpermxor 12, 12, 0, 20 401*62d9e475SDanny Tsen vpermxor 13, 13, 1, 20 402*62d9e475SDanny Tsen vpermxor 14, 14, 2, 20 403*62d9e475SDanny Tsen vpermxor 15, 15, 3, 20 404*62d9e475SDanny Tsen vadduwm 8, 8, 12 405*62d9e475SDanny Tsen vadduwm 9, 9, 13 406*62d9e475SDanny Tsen vadduwm 10, 10, 14 407*62d9e475SDanny Tsen vadduwm 11, 11, 15 408*62d9e475SDanny Tsen vxor 4, 4, 8 409*62d9e475SDanny Tsen vxor 5, 5, 9 410*62d9e475SDanny Tsen vxor 6, 6, 10 411*62d9e475SDanny Tsen vxor 7, 7, 11 412*62d9e475SDanny Tsen vrlw 4, 4, 21 413*62d9e475SDanny Tsen vrlw 5, 5, 21 414*62d9e475SDanny Tsen vrlw 6, 6, 21 415*62d9e475SDanny Tsen vrlw 7, 7, 21 416*62d9e475SDanny Tsen vadduwm 0, 0, 4 417*62d9e475SDanny Tsen vadduwm 1, 1, 5 418*62d9e475SDanny Tsen vadduwm 2, 2, 6 419*62d9e475SDanny Tsen vadduwm 3, 3, 7 420*62d9e475SDanny Tsen vpermxor 12, 12, 0, 22 421*62d9e475SDanny Tsen vpermxor 13, 13, 1, 22 422*62d9e475SDanny Tsen vpermxor 14, 14, 2, 22 423*62d9e475SDanny Tsen vpermxor 15, 15, 3, 22 424*62d9e475SDanny Tsen vadduwm 8, 8, 12 425*62d9e475SDanny Tsen vadduwm 9, 9, 13 426*62d9e475SDanny Tsen vadduwm 10, 10, 14 427*62d9e475SDanny Tsen vadduwm 11, 11, 15 428*62d9e475SDanny Tsen vxor 4, 4, 8 429*62d9e475SDanny Tsen vxor 5, 5, 9 430*62d9e475SDanny Tsen vxor 6, 6, 10 431*62d9e475SDanny Tsen vxor 7, 7, 11 432*62d9e475SDanny Tsen vrlw 4, 4, 23 433*62d9e475SDanny Tsen vrlw 5, 5, 23 434*62d9e475SDanny Tsen vrlw 6, 6, 23 435*62d9e475SDanny Tsen vrlw 7, 7, 23 436*62d9e475SDanny Tsen 437*62d9e475SDanny Tsen # QR(v0, v5, v10, v15, v1, v6, v11, v12, v2, v7, v8, v13, v3, v4, v9, v14) 438*62d9e475SDanny Tsen vadduwm 0, 0, 5 439*62d9e475SDanny Tsen vadduwm 1, 1, 6 440*62d9e475SDanny Tsen vadduwm 2, 2, 7 441*62d9e475SDanny Tsen vadduwm 3, 3, 4 442*62d9e475SDanny Tsen vpermxor 15, 15, 0, 20 443*62d9e475SDanny Tsen vpermxor 12, 12, 1, 20 444*62d9e475SDanny Tsen vpermxor 13, 13, 2, 20 445*62d9e475SDanny Tsen vpermxor 14, 14, 3, 20 446*62d9e475SDanny Tsen vadduwm 10, 10, 15 447*62d9e475SDanny Tsen vadduwm 11, 11, 12 448*62d9e475SDanny Tsen vadduwm 8, 8, 13 449*62d9e475SDanny Tsen vadduwm 9, 9, 14 450*62d9e475SDanny Tsen vxor 5, 5, 10 451*62d9e475SDanny Tsen vxor 6, 6, 11 452*62d9e475SDanny Tsen vxor 7, 7, 8 453*62d9e475SDanny Tsen vxor 4, 4, 9 454*62d9e475SDanny Tsen vrlw 5, 5, 21 455*62d9e475SDanny Tsen vrlw 6, 6, 21 456*62d9e475SDanny Tsen vrlw 7, 7, 21 457*62d9e475SDanny Tsen vrlw 4, 4, 21 458*62d9e475SDanny Tsen vadduwm 0, 0, 5 459*62d9e475SDanny Tsen vadduwm 1, 1, 6 460*62d9e475SDanny Tsen vadduwm 2, 2, 7 461*62d9e475SDanny Tsen vadduwm 3, 3, 4 462*62d9e475SDanny Tsen vpermxor 15, 15, 0, 22 463*62d9e475SDanny Tsen vpermxor 12, 12, 1, 22 464*62d9e475SDanny Tsen vpermxor 13, 13, 2, 22 465*62d9e475SDanny Tsen vpermxor 14, 14, 3, 22 466*62d9e475SDanny Tsen vadduwm 10, 10, 15 467*62d9e475SDanny Tsen vadduwm 11, 11, 12 468*62d9e475SDanny Tsen vadduwm 8, 8, 13 469*62d9e475SDanny Tsen vadduwm 9, 9, 14 470*62d9e475SDanny Tsen vxor 5, 5, 10 471*62d9e475SDanny Tsen vxor 6, 6, 11 472*62d9e475SDanny Tsen vxor 7, 7, 8 473*62d9e475SDanny Tsen vxor 4, 4, 9 474*62d9e475SDanny Tsen vrlw 5, 5, 23 475*62d9e475SDanny Tsen vrlw 6, 6, 23 476*62d9e475SDanny Tsen vrlw 7, 7, 23 477*62d9e475SDanny Tsen vrlw 4, 4, 23 478*62d9e475SDanny Tsen.endm 479*62d9e475SDanny Tsen 480*62d9e475SDanny Tsen# Transpose 481*62d9e475SDanny Tsen.macro TP_4x a0 a1 a2 a3 482*62d9e475SDanny Tsen xxmrghw 10, 32+\a0, 32+\a1 # a0, a1, b0, b1 483*62d9e475SDanny Tsen xxmrghw 11, 32+\a2, 32+\a3 # a2, a3, b2, b3 484*62d9e475SDanny Tsen xxmrglw 12, 32+\a0, 32+\a1 # c0, c1, d0, d1 485*62d9e475SDanny Tsen xxmrglw 13, 32+\a2, 32+\a3 # c2, c3, d2, d3 486*62d9e475SDanny Tsen xxpermdi 32+\a0, 10, 11, 0 # a0, a1, a2, a3 487*62d9e475SDanny Tsen xxpermdi 32+\a1, 10, 11, 3 # b0, b1, b2, b3 488*62d9e475SDanny Tsen xxpermdi 32+\a2, 12, 13, 0 # c0, c1, c2, c3 489*62d9e475SDanny Tsen xxpermdi 32+\a3, 12, 13, 3 # d0, d1, d2, d3 490*62d9e475SDanny Tsen.endm 491*62d9e475SDanny Tsen 492*62d9e475SDanny Tsen# key stream = working state + state 493*62d9e475SDanny Tsen.macro Add_state S 494*62d9e475SDanny Tsen vadduwm \S+0, \S+0, 16-\S 495*62d9e475SDanny Tsen vadduwm \S+4, \S+4, 17-\S 496*62d9e475SDanny Tsen vadduwm \S+8, \S+8, 18-\S 497*62d9e475SDanny Tsen vadduwm \S+12, \S+12, 19-\S 498*62d9e475SDanny Tsen 499*62d9e475SDanny Tsen vadduwm \S+1, \S+1, 16-\S 500*62d9e475SDanny Tsen vadduwm \S+5, \S+5, 17-\S 501*62d9e475SDanny Tsen vadduwm \S+9, \S+9, 18-\S 502*62d9e475SDanny Tsen vadduwm \S+13, \S+13, 19-\S 503*62d9e475SDanny Tsen 504*62d9e475SDanny Tsen vadduwm \S+2, \S+2, 16-\S 505*62d9e475SDanny Tsen vadduwm \S+6, \S+6, 17-\S 506*62d9e475SDanny Tsen vadduwm \S+10, \S+10, 18-\S 507*62d9e475SDanny Tsen vadduwm \S+14, \S+14, 19-\S 508*62d9e475SDanny Tsen 509*62d9e475SDanny Tsen vadduwm \S+3, \S+3, 16-\S 510*62d9e475SDanny Tsen vadduwm \S+7, \S+7, 17-\S 511*62d9e475SDanny Tsen vadduwm \S+11, \S+11, 18-\S 512*62d9e475SDanny Tsen vadduwm \S+15, \S+15, 19-\S 513*62d9e475SDanny Tsen.endm 514*62d9e475SDanny Tsen 515*62d9e475SDanny Tsen# 516*62d9e475SDanny Tsen# write 256 bytes 517*62d9e475SDanny Tsen# 518*62d9e475SDanny Tsen.macro Write_256 S 519*62d9e475SDanny Tsen add 9, 14, 5 520*62d9e475SDanny Tsen add 16, 14, 4 521*62d9e475SDanny Tsen lxvw4x 0, 0, 9 522*62d9e475SDanny Tsen lxvw4x 1, 17, 9 523*62d9e475SDanny Tsen lxvw4x 2, 18, 9 524*62d9e475SDanny Tsen lxvw4x 3, 19, 9 525*62d9e475SDanny Tsen lxvw4x 4, 20, 9 526*62d9e475SDanny Tsen lxvw4x 5, 21, 9 527*62d9e475SDanny Tsen lxvw4x 6, 22, 9 528*62d9e475SDanny Tsen lxvw4x 7, 23, 9 529*62d9e475SDanny Tsen lxvw4x 8, 24, 9 530*62d9e475SDanny Tsen lxvw4x 9, 25, 9 531*62d9e475SDanny Tsen lxvw4x 10, 26, 9 532*62d9e475SDanny Tsen lxvw4x 11, 27, 9 533*62d9e475SDanny Tsen lxvw4x 12, 28, 9 534*62d9e475SDanny Tsen lxvw4x 13, 29, 9 535*62d9e475SDanny Tsen lxvw4x 14, 30, 9 536*62d9e475SDanny Tsen lxvw4x 15, 31, 9 537*62d9e475SDanny Tsen 538*62d9e475SDanny Tsen xxlxor \S+32, \S+32, 0 539*62d9e475SDanny Tsen xxlxor \S+36, \S+36, 1 540*62d9e475SDanny Tsen xxlxor \S+40, \S+40, 2 541*62d9e475SDanny Tsen xxlxor \S+44, \S+44, 3 542*62d9e475SDanny Tsen xxlxor \S+33, \S+33, 4 543*62d9e475SDanny Tsen xxlxor \S+37, \S+37, 5 544*62d9e475SDanny Tsen xxlxor \S+41, \S+41, 6 545*62d9e475SDanny Tsen xxlxor \S+45, \S+45, 7 546*62d9e475SDanny Tsen xxlxor \S+34, \S+34, 8 547*62d9e475SDanny Tsen xxlxor \S+38, \S+38, 9 548*62d9e475SDanny Tsen xxlxor \S+42, \S+42, 10 549*62d9e475SDanny Tsen xxlxor \S+46, \S+46, 11 550*62d9e475SDanny Tsen xxlxor \S+35, \S+35, 12 551*62d9e475SDanny Tsen xxlxor \S+39, \S+39, 13 552*62d9e475SDanny Tsen xxlxor \S+43, \S+43, 14 553*62d9e475SDanny Tsen xxlxor \S+47, \S+47, 15 554*62d9e475SDanny Tsen 555*62d9e475SDanny Tsen stxvw4x \S+32, 0, 16 556*62d9e475SDanny Tsen stxvw4x \S+36, 17, 16 557*62d9e475SDanny Tsen stxvw4x \S+40, 18, 16 558*62d9e475SDanny Tsen stxvw4x \S+44, 19, 16 559*62d9e475SDanny Tsen 560*62d9e475SDanny Tsen stxvw4x \S+33, 20, 16 561*62d9e475SDanny Tsen stxvw4x \S+37, 21, 16 562*62d9e475SDanny Tsen stxvw4x \S+41, 22, 16 563*62d9e475SDanny Tsen stxvw4x \S+45, 23, 16 564*62d9e475SDanny Tsen 565*62d9e475SDanny Tsen stxvw4x \S+34, 24, 16 566*62d9e475SDanny Tsen stxvw4x \S+38, 25, 16 567*62d9e475SDanny Tsen stxvw4x \S+42, 26, 16 568*62d9e475SDanny Tsen stxvw4x \S+46, 27, 16 569*62d9e475SDanny Tsen 570*62d9e475SDanny Tsen stxvw4x \S+35, 28, 16 571*62d9e475SDanny Tsen stxvw4x \S+39, 29, 16 572*62d9e475SDanny Tsen stxvw4x \S+43, 30, 16 573*62d9e475SDanny Tsen stxvw4x \S+47, 31, 16 574*62d9e475SDanny Tsen 575*62d9e475SDanny Tsen.endm 576*62d9e475SDanny Tsen 577*62d9e475SDanny Tsen# 578*62d9e475SDanny Tsen# chacha20_p10le_8x(u32 *state, byte *dst, const byte *src, size_t len, int nrounds); 579*62d9e475SDanny Tsen# 580*62d9e475SDanny TsenSYM_FUNC_START(chacha_p10le_8x) 581*62d9e475SDanny Tsen.align 5 582*62d9e475SDanny Tsen cmpdi 6, 0 583*62d9e475SDanny Tsen ble Out_no_chacha 584*62d9e475SDanny Tsen 585*62d9e475SDanny Tsen SAVE_REGS 586*62d9e475SDanny Tsen 587*62d9e475SDanny Tsen # r17 - r31 mainly for Write_256 macro. 588*62d9e475SDanny Tsen li 17, 16 589*62d9e475SDanny Tsen li 18, 32 590*62d9e475SDanny Tsen li 19, 48 591*62d9e475SDanny Tsen li 20, 64 592*62d9e475SDanny Tsen li 21, 80 593*62d9e475SDanny Tsen li 22, 96 594*62d9e475SDanny Tsen li 23, 112 595*62d9e475SDanny Tsen li 24, 128 596*62d9e475SDanny Tsen li 25, 144 597*62d9e475SDanny Tsen li 26, 160 598*62d9e475SDanny Tsen li 27, 176 599*62d9e475SDanny Tsen li 28, 192 600*62d9e475SDanny Tsen li 29, 208 601*62d9e475SDanny Tsen li 30, 224 602*62d9e475SDanny Tsen li 31, 240 603*62d9e475SDanny Tsen 604*62d9e475SDanny Tsen mr 15, 6 # len 605*62d9e475SDanny Tsen li 14, 0 # offset to inp and outp 606*62d9e475SDanny Tsen 607*62d9e475SDanny Tsen lxvw4x 48, 0, 3 # vr16, constants 608*62d9e475SDanny Tsen lxvw4x 49, 17, 3 # vr17, key 1 609*62d9e475SDanny Tsen lxvw4x 50, 18, 3 # vr18, key 2 610*62d9e475SDanny Tsen lxvw4x 51, 19, 3 # vr19, counter, nonce 611*62d9e475SDanny Tsen 612*62d9e475SDanny Tsen # create (0, 1, 2, 3) counters 613*62d9e475SDanny Tsen vspltisw 0, 0 614*62d9e475SDanny Tsen vspltisw 1, 1 615*62d9e475SDanny Tsen vspltisw 2, 2 616*62d9e475SDanny Tsen vspltisw 3, 3 617*62d9e475SDanny Tsen vmrghw 4, 0, 1 618*62d9e475SDanny Tsen vmrglw 5, 2, 3 619*62d9e475SDanny Tsen vsldoi 30, 4, 5, 8 # vr30 counter, 4 (0, 1, 2, 3) 620*62d9e475SDanny Tsen 621*62d9e475SDanny Tsen vspltisw 21, 12 622*62d9e475SDanny Tsen vspltisw 23, 7 623*62d9e475SDanny Tsen 624*62d9e475SDanny Tsen addis 11, 2, permx@toc@ha 625*62d9e475SDanny Tsen addi 11, 11, permx@toc@l 626*62d9e475SDanny Tsen lxvw4x 32+20, 0, 11 627*62d9e475SDanny Tsen lxvw4x 32+22, 17, 11 628*62d9e475SDanny Tsen 629*62d9e475SDanny Tsen sradi 8, 7, 1 630*62d9e475SDanny Tsen 631*62d9e475SDanny Tsen mtctr 8 632*62d9e475SDanny Tsen 633*62d9e475SDanny Tsen # save constants to vsx 634*62d9e475SDanny Tsen xxlor 16, 48, 48 635*62d9e475SDanny Tsen xxlor 17, 49, 49 636*62d9e475SDanny Tsen xxlor 18, 50, 50 637*62d9e475SDanny Tsen xxlor 19, 51, 51 638*62d9e475SDanny Tsen 639*62d9e475SDanny Tsen vspltisw 25, 4 640*62d9e475SDanny Tsen vspltisw 26, 8 641*62d9e475SDanny Tsen 642*62d9e475SDanny Tsen xxlor 25, 32+26, 32+26 643*62d9e475SDanny Tsen xxlor 24, 32+25, 32+25 644*62d9e475SDanny Tsen 645*62d9e475SDanny Tsen vadduwm 31, 30, 25 # counter = (0, 1, 2, 3) + (4, 4, 4, 4) 646*62d9e475SDanny Tsen xxlor 30, 32+30, 32+30 647*62d9e475SDanny Tsen xxlor 31, 32+31, 32+31 648*62d9e475SDanny Tsen 649*62d9e475SDanny Tsen xxlor 20, 32+20, 32+20 650*62d9e475SDanny Tsen xxlor 21, 32+21, 32+21 651*62d9e475SDanny Tsen xxlor 22, 32+22, 32+22 652*62d9e475SDanny Tsen xxlor 23, 32+23, 32+23 653*62d9e475SDanny Tsen 654*62d9e475SDanny Tsen cmpdi 6, 512 655*62d9e475SDanny Tsen blt Loop_last 656*62d9e475SDanny Tsen 657*62d9e475SDanny TsenLoop_8x: 658*62d9e475SDanny Tsen xxspltw 32+0, 16, 0 659*62d9e475SDanny Tsen xxspltw 32+1, 16, 1 660*62d9e475SDanny Tsen xxspltw 32+2, 16, 2 661*62d9e475SDanny Tsen xxspltw 32+3, 16, 3 662*62d9e475SDanny Tsen 663*62d9e475SDanny Tsen xxspltw 32+4, 17, 0 664*62d9e475SDanny Tsen xxspltw 32+5, 17, 1 665*62d9e475SDanny Tsen xxspltw 32+6, 17, 2 666*62d9e475SDanny Tsen xxspltw 32+7, 17, 3 667*62d9e475SDanny Tsen xxspltw 32+8, 18, 0 668*62d9e475SDanny Tsen xxspltw 32+9, 18, 1 669*62d9e475SDanny Tsen xxspltw 32+10, 18, 2 670*62d9e475SDanny Tsen xxspltw 32+11, 18, 3 671*62d9e475SDanny Tsen xxspltw 32+12, 19, 0 672*62d9e475SDanny Tsen xxspltw 32+13, 19, 1 673*62d9e475SDanny Tsen xxspltw 32+14, 19, 2 674*62d9e475SDanny Tsen xxspltw 32+15, 19, 3 675*62d9e475SDanny Tsen vadduwm 12, 12, 30 # increase counter 676*62d9e475SDanny Tsen 677*62d9e475SDanny Tsen xxspltw 32+16, 16, 0 678*62d9e475SDanny Tsen xxspltw 32+17, 16, 1 679*62d9e475SDanny Tsen xxspltw 32+18, 16, 2 680*62d9e475SDanny Tsen xxspltw 32+19, 16, 3 681*62d9e475SDanny Tsen 682*62d9e475SDanny Tsen xxspltw 32+20, 17, 0 683*62d9e475SDanny Tsen xxspltw 32+21, 17, 1 684*62d9e475SDanny Tsen xxspltw 32+22, 17, 2 685*62d9e475SDanny Tsen xxspltw 32+23, 17, 3 686*62d9e475SDanny Tsen xxspltw 32+24, 18, 0 687*62d9e475SDanny Tsen xxspltw 32+25, 18, 1 688*62d9e475SDanny Tsen xxspltw 32+26, 18, 2 689*62d9e475SDanny Tsen xxspltw 32+27, 18, 3 690*62d9e475SDanny Tsen xxspltw 32+28, 19, 0 691*62d9e475SDanny Tsen xxspltw 32+29, 19, 1 692*62d9e475SDanny Tsen vadduwm 28, 28, 31 # increase counter 693*62d9e475SDanny Tsen xxspltw 32+30, 19, 2 694*62d9e475SDanny Tsen xxspltw 32+31, 19, 3 695*62d9e475SDanny Tsen 696*62d9e475SDanny Tsen.align 5 697*62d9e475SDanny Tsenquarter_loop_8x: 698*62d9e475SDanny Tsen QT_loop_8x 699*62d9e475SDanny Tsen 700*62d9e475SDanny Tsen bdnz quarter_loop_8x 701*62d9e475SDanny Tsen 702*62d9e475SDanny Tsen xxlor 0, 32+30, 32+30 703*62d9e475SDanny Tsen xxlor 32+30, 30, 30 704*62d9e475SDanny Tsen vadduwm 12, 12, 30 705*62d9e475SDanny Tsen xxlor 32+30, 0, 0 706*62d9e475SDanny Tsen TP_4x 0, 1, 2, 3 707*62d9e475SDanny Tsen TP_4x 4, 5, 6, 7 708*62d9e475SDanny Tsen TP_4x 8, 9, 10, 11 709*62d9e475SDanny Tsen TP_4x 12, 13, 14, 15 710*62d9e475SDanny Tsen 711*62d9e475SDanny Tsen xxlor 0, 48, 48 712*62d9e475SDanny Tsen xxlor 1, 49, 49 713*62d9e475SDanny Tsen xxlor 2, 50, 50 714*62d9e475SDanny Tsen xxlor 3, 51, 51 715*62d9e475SDanny Tsen xxlor 48, 16, 16 716*62d9e475SDanny Tsen xxlor 49, 17, 17 717*62d9e475SDanny Tsen xxlor 50, 18, 18 718*62d9e475SDanny Tsen xxlor 51, 19, 19 719*62d9e475SDanny Tsen Add_state 0 720*62d9e475SDanny Tsen xxlor 48, 0, 0 721*62d9e475SDanny Tsen xxlor 49, 1, 1 722*62d9e475SDanny Tsen xxlor 50, 2, 2 723*62d9e475SDanny Tsen xxlor 51, 3, 3 724*62d9e475SDanny Tsen Write_256 0 725*62d9e475SDanny Tsen addi 14, 14, 256 # offset +=256 726*62d9e475SDanny Tsen addi 15, 15, -256 # len -=256 727*62d9e475SDanny Tsen 728*62d9e475SDanny Tsen xxlor 5, 32+31, 32+31 729*62d9e475SDanny Tsen xxlor 32+31, 31, 31 730*62d9e475SDanny Tsen vadduwm 28, 28, 31 731*62d9e475SDanny Tsen xxlor 32+31, 5, 5 732*62d9e475SDanny Tsen TP_4x 16+0, 16+1, 16+2, 16+3 733*62d9e475SDanny Tsen TP_4x 16+4, 16+5, 16+6, 16+7 734*62d9e475SDanny Tsen TP_4x 16+8, 16+9, 16+10, 16+11 735*62d9e475SDanny Tsen TP_4x 16+12, 16+13, 16+14, 16+15 736*62d9e475SDanny Tsen 737*62d9e475SDanny Tsen xxlor 32, 16, 16 738*62d9e475SDanny Tsen xxlor 33, 17, 17 739*62d9e475SDanny Tsen xxlor 34, 18, 18 740*62d9e475SDanny Tsen xxlor 35, 19, 19 741*62d9e475SDanny Tsen Add_state 16 742*62d9e475SDanny Tsen Write_256 16 743*62d9e475SDanny Tsen addi 14, 14, 256 # offset +=256 744*62d9e475SDanny Tsen addi 15, 15, -256 # len +=256 745*62d9e475SDanny Tsen 746*62d9e475SDanny Tsen xxlor 32+24, 24, 24 747*62d9e475SDanny Tsen xxlor 32+25, 25, 25 748*62d9e475SDanny Tsen xxlor 32+30, 30, 30 749*62d9e475SDanny Tsen vadduwm 30, 30, 25 750*62d9e475SDanny Tsen vadduwm 31, 30, 24 751*62d9e475SDanny Tsen xxlor 30, 32+30, 32+30 752*62d9e475SDanny Tsen xxlor 31, 32+31, 32+31 753*62d9e475SDanny Tsen 754*62d9e475SDanny Tsen cmpdi 15, 0 755*62d9e475SDanny Tsen beq Out_loop 756*62d9e475SDanny Tsen 757*62d9e475SDanny Tsen cmpdi 15, 512 758*62d9e475SDanny Tsen blt Loop_last 759*62d9e475SDanny Tsen 760*62d9e475SDanny Tsen mtctr 8 761*62d9e475SDanny Tsen b Loop_8x 762*62d9e475SDanny Tsen 763*62d9e475SDanny TsenLoop_last: 764*62d9e475SDanny Tsen lxvw4x 48, 0, 3 # vr16, constants 765*62d9e475SDanny Tsen lxvw4x 49, 17, 3 # vr17, key 1 766*62d9e475SDanny Tsen lxvw4x 50, 18, 3 # vr18, key 2 767*62d9e475SDanny Tsen lxvw4x 51, 19, 3 # vr19, counter, nonce 768*62d9e475SDanny Tsen 769*62d9e475SDanny Tsen vspltisw 21, 12 770*62d9e475SDanny Tsen vspltisw 23, 7 771*62d9e475SDanny Tsen addis 11, 2, permx@toc@ha 772*62d9e475SDanny Tsen addi 11, 11, permx@toc@l 773*62d9e475SDanny Tsen lxvw4x 32+20, 0, 11 774*62d9e475SDanny Tsen lxvw4x 32+22, 17, 11 775*62d9e475SDanny Tsen 776*62d9e475SDanny Tsen sradi 8, 7, 1 777*62d9e475SDanny Tsen mtctr 8 778*62d9e475SDanny Tsen 779*62d9e475SDanny TsenLoop_4x: 780*62d9e475SDanny Tsen vspltw 0, 16, 0 781*62d9e475SDanny Tsen vspltw 1, 16, 1 782*62d9e475SDanny Tsen vspltw 2, 16, 2 783*62d9e475SDanny Tsen vspltw 3, 16, 3 784*62d9e475SDanny Tsen 785*62d9e475SDanny Tsen vspltw 4, 17, 0 786*62d9e475SDanny Tsen vspltw 5, 17, 1 787*62d9e475SDanny Tsen vspltw 6, 17, 2 788*62d9e475SDanny Tsen vspltw 7, 17, 3 789*62d9e475SDanny Tsen vspltw 8, 18, 0 790*62d9e475SDanny Tsen vspltw 9, 18, 1 791*62d9e475SDanny Tsen vspltw 10, 18, 2 792*62d9e475SDanny Tsen vspltw 11, 18, 3 793*62d9e475SDanny Tsen vspltw 12, 19, 0 794*62d9e475SDanny Tsen vadduwm 12, 12, 30 # increase counter 795*62d9e475SDanny Tsen vspltw 13, 19, 1 796*62d9e475SDanny Tsen vspltw 14, 19, 2 797*62d9e475SDanny Tsen vspltw 15, 19, 3 798*62d9e475SDanny Tsen 799*62d9e475SDanny Tsen.align 5 800*62d9e475SDanny Tsenquarter_loop: 801*62d9e475SDanny Tsen QT_loop_4x 802*62d9e475SDanny Tsen 803*62d9e475SDanny Tsen bdnz quarter_loop 804*62d9e475SDanny Tsen 805*62d9e475SDanny Tsen vadduwm 12, 12, 30 806*62d9e475SDanny Tsen TP_4x 0, 1, 2, 3 807*62d9e475SDanny Tsen TP_4x 4, 5, 6, 7 808*62d9e475SDanny Tsen TP_4x 8, 9, 10, 11 809*62d9e475SDanny Tsen TP_4x 12, 13, 14, 15 810*62d9e475SDanny Tsen 811*62d9e475SDanny Tsen Add_state 0 812*62d9e475SDanny Tsen Write_256 0 813*62d9e475SDanny Tsen addi 14, 14, 256 # offset += 256 814*62d9e475SDanny Tsen addi 15, 15, -256 # len += 256 815*62d9e475SDanny Tsen 816*62d9e475SDanny Tsen # Update state counter 817*62d9e475SDanny Tsen vspltisw 25, 4 818*62d9e475SDanny Tsen vadduwm 30, 30, 25 819*62d9e475SDanny Tsen 820*62d9e475SDanny Tsen cmpdi 15, 0 821*62d9e475SDanny Tsen beq Out_loop 822*62d9e475SDanny Tsen cmpdi 15, 256 823*62d9e475SDanny Tsen blt Out_loop 824*62d9e475SDanny Tsen 825*62d9e475SDanny Tsen mtctr 8 826*62d9e475SDanny Tsen b Loop_4x 827*62d9e475SDanny Tsen 828*62d9e475SDanny TsenOut_loop: 829*62d9e475SDanny Tsen RESTORE_REGS 830*62d9e475SDanny Tsen blr 831*62d9e475SDanny Tsen 832*62d9e475SDanny TsenOut_no_chacha: 833*62d9e475SDanny Tsen li 3, 0 834*62d9e475SDanny Tsen blr 835*62d9e475SDanny TsenSYM_FUNC_END(chacha_p10le_8x) 836*62d9e475SDanny Tsen 837*62d9e475SDanny TsenSYM_DATA_START_LOCAL(PERMX) 838*62d9e475SDanny Tsen.align 5 839*62d9e475SDanny Tsenpermx: 840*62d9e475SDanny Tsen.long 0x22330011, 0x66774455, 0xaabb8899, 0xeeffccdd 841*62d9e475SDanny Tsen.long 0x11223300, 0x55667744, 0x99aabb88, 0xddeeffcc 842*62d9e475SDanny TsenSYM_DATA_END(PERMX) 843