1*323a6bf1SMichael Ellerman/* 2*323a6bf1SMichael Ellerman * SHA-1 implementation for PowerPC. 3*323a6bf1SMichael Ellerman * 4*323a6bf1SMichael Ellerman * Copyright (C) 2005 Paul Mackerras <paulus@samba.org> 5*323a6bf1SMichael Ellerman */ 6*323a6bf1SMichael Ellerman 7*323a6bf1SMichael Ellerman#include <asm/ppc_asm.h> 8*323a6bf1SMichael Ellerman#include <asm/asm-offsets.h> 9*323a6bf1SMichael Ellerman 10*323a6bf1SMichael Ellerman/* 11*323a6bf1SMichael Ellerman * We roll the registers for T, A, B, C, D, E around on each 12*323a6bf1SMichael Ellerman * iteration; T on iteration t is A on iteration t+1, and so on. 13*323a6bf1SMichael Ellerman * We use registers 7 - 12 for this. 14*323a6bf1SMichael Ellerman */ 15*323a6bf1SMichael Ellerman#define RT(t) ((((t)+5)%6)+7) 16*323a6bf1SMichael Ellerman#define RA(t) ((((t)+4)%6)+7) 17*323a6bf1SMichael Ellerman#define RB(t) ((((t)+3)%6)+7) 18*323a6bf1SMichael Ellerman#define RC(t) ((((t)+2)%6)+7) 19*323a6bf1SMichael Ellerman#define RD(t) ((((t)+1)%6)+7) 20*323a6bf1SMichael Ellerman#define RE(t) ((((t)+0)%6)+7) 21*323a6bf1SMichael Ellerman 22*323a6bf1SMichael Ellerman/* We use registers 16 - 31 for the W values */ 23*323a6bf1SMichael Ellerman#define W(t) (((t)%16)+16) 24*323a6bf1SMichael Ellerman 25*323a6bf1SMichael Ellerman#define LOADW(t) \ 26*323a6bf1SMichael Ellerman lwz W(t),(t)*4(r4) 27*323a6bf1SMichael Ellerman 28*323a6bf1SMichael Ellerman#define STEPD0_LOAD(t) \ 29*323a6bf1SMichael Ellerman andc r0,RD(t),RB(t); \ 30*323a6bf1SMichael Ellerman and r6,RB(t),RC(t); \ 31*323a6bf1SMichael Ellerman rotlwi RT(t),RA(t),5; \ 32*323a6bf1SMichael Ellerman or r6,r6,r0; \ 33*323a6bf1SMichael Ellerman add r0,RE(t),r15; \ 34*323a6bf1SMichael Ellerman add RT(t),RT(t),r6; \ 35*323a6bf1SMichael Ellerman add r14,r0,W(t); \ 36*323a6bf1SMichael Ellerman lwz W((t)+4),((t)+4)*4(r4); \ 37*323a6bf1SMichael Ellerman rotlwi RB(t),RB(t),30; \ 38*323a6bf1SMichael Ellerman add RT(t),RT(t),r14 39*323a6bf1SMichael Ellerman 40*323a6bf1SMichael Ellerman#define STEPD0_UPDATE(t) \ 41*323a6bf1SMichael Ellerman and r6,RB(t),RC(t); \ 42*323a6bf1SMichael Ellerman andc r0,RD(t),RB(t); \ 43*323a6bf1SMichael Ellerman rotlwi RT(t),RA(t),5; \ 44*323a6bf1SMichael Ellerman rotlwi RB(t),RB(t),30; \ 45*323a6bf1SMichael Ellerman or r6,r6,r0; \ 46*323a6bf1SMichael Ellerman add r0,RE(t),r15; \ 47*323a6bf1SMichael Ellerman xor r5,W((t)+4-3),W((t)+4-8); \ 48*323a6bf1SMichael Ellerman add RT(t),RT(t),r6; \ 49*323a6bf1SMichael Ellerman xor W((t)+4),W((t)+4-16),W((t)+4-14); \ 50*323a6bf1SMichael Ellerman add r0,r0,W(t); \ 51*323a6bf1SMichael Ellerman xor W((t)+4),W((t)+4),r5; \ 52*323a6bf1SMichael Ellerman add RT(t),RT(t),r0; \ 53*323a6bf1SMichael Ellerman rotlwi W((t)+4),W((t)+4),1 54*323a6bf1SMichael Ellerman 55*323a6bf1SMichael Ellerman#define STEPD1(t) \ 56*323a6bf1SMichael Ellerman xor r6,RB(t),RC(t); \ 57*323a6bf1SMichael Ellerman rotlwi RT(t),RA(t),5; \ 58*323a6bf1SMichael Ellerman rotlwi RB(t),RB(t),30; \ 59*323a6bf1SMichael Ellerman xor r6,r6,RD(t); \ 60*323a6bf1SMichael Ellerman add r0,RE(t),r15; \ 61*323a6bf1SMichael Ellerman add RT(t),RT(t),r6; \ 62*323a6bf1SMichael Ellerman add r0,r0,W(t); \ 63*323a6bf1SMichael Ellerman add RT(t),RT(t),r0 64*323a6bf1SMichael Ellerman 65*323a6bf1SMichael Ellerman#define STEPD1_UPDATE(t) \ 66*323a6bf1SMichael Ellerman xor r6,RB(t),RC(t); \ 67*323a6bf1SMichael Ellerman rotlwi RT(t),RA(t),5; \ 68*323a6bf1SMichael Ellerman rotlwi RB(t),RB(t),30; \ 69*323a6bf1SMichael Ellerman xor r6,r6,RD(t); \ 70*323a6bf1SMichael Ellerman add r0,RE(t),r15; \ 71*323a6bf1SMichael Ellerman xor r5,W((t)+4-3),W((t)+4-8); \ 72*323a6bf1SMichael Ellerman add RT(t),RT(t),r6; \ 73*323a6bf1SMichael Ellerman xor W((t)+4),W((t)+4-16),W((t)+4-14); \ 74*323a6bf1SMichael Ellerman add r0,r0,W(t); \ 75*323a6bf1SMichael Ellerman xor W((t)+4),W((t)+4),r5; \ 76*323a6bf1SMichael Ellerman add RT(t),RT(t),r0; \ 77*323a6bf1SMichael Ellerman rotlwi W((t)+4),W((t)+4),1 78*323a6bf1SMichael Ellerman 79*323a6bf1SMichael Ellerman#define STEPD2_UPDATE(t) \ 80*323a6bf1SMichael Ellerman and r6,RB(t),RC(t); \ 81*323a6bf1SMichael Ellerman and r0,RB(t),RD(t); \ 82*323a6bf1SMichael Ellerman rotlwi RT(t),RA(t),5; \ 83*323a6bf1SMichael Ellerman or r6,r6,r0; \ 84*323a6bf1SMichael Ellerman rotlwi RB(t),RB(t),30; \ 85*323a6bf1SMichael Ellerman and r0,RC(t),RD(t); \ 86*323a6bf1SMichael Ellerman xor r5,W((t)+4-3),W((t)+4-8); \ 87*323a6bf1SMichael Ellerman or r6,r6,r0; \ 88*323a6bf1SMichael Ellerman xor W((t)+4),W((t)+4-16),W((t)+4-14); \ 89*323a6bf1SMichael Ellerman add r0,RE(t),r15; \ 90*323a6bf1SMichael Ellerman add RT(t),RT(t),r6; \ 91*323a6bf1SMichael Ellerman add r0,r0,W(t); \ 92*323a6bf1SMichael Ellerman xor W((t)+4),W((t)+4),r5; \ 93*323a6bf1SMichael Ellerman add RT(t),RT(t),r0; \ 94*323a6bf1SMichael Ellerman rotlwi W((t)+4),W((t)+4),1 95*323a6bf1SMichael Ellerman 96*323a6bf1SMichael Ellerman#define STEP0LD4(t) \ 97*323a6bf1SMichael Ellerman STEPD0_LOAD(t); \ 98*323a6bf1SMichael Ellerman STEPD0_LOAD((t)+1); \ 99*323a6bf1SMichael Ellerman STEPD0_LOAD((t)+2); \ 100*323a6bf1SMichael Ellerman STEPD0_LOAD((t)+3) 101*323a6bf1SMichael Ellerman 102*323a6bf1SMichael Ellerman#define STEPUP4(t, fn) \ 103*323a6bf1SMichael Ellerman STEP##fn##_UPDATE(t); \ 104*323a6bf1SMichael Ellerman STEP##fn##_UPDATE((t)+1); \ 105*323a6bf1SMichael Ellerman STEP##fn##_UPDATE((t)+2); \ 106*323a6bf1SMichael Ellerman STEP##fn##_UPDATE((t)+3) 107*323a6bf1SMichael Ellerman 108*323a6bf1SMichael Ellerman#define STEPUP20(t, fn) \ 109*323a6bf1SMichael Ellerman STEPUP4(t, fn); \ 110*323a6bf1SMichael Ellerman STEPUP4((t)+4, fn); \ 111*323a6bf1SMichael Ellerman STEPUP4((t)+8, fn); \ 112*323a6bf1SMichael Ellerman STEPUP4((t)+12, fn); \ 113*323a6bf1SMichael Ellerman STEPUP4((t)+16, fn) 114*323a6bf1SMichael Ellerman 115*323a6bf1SMichael Ellerman_GLOBAL(powerpc_sha_transform) 116*323a6bf1SMichael Ellerman PPC_STLU r1,-STACKFRAMESIZE(r1) 117*323a6bf1SMichael Ellerman SAVE_8GPRS(14, r1) 118*323a6bf1SMichael Ellerman SAVE_10GPRS(22, r1) 119*323a6bf1SMichael Ellerman 120*323a6bf1SMichael Ellerman /* Load up A - E */ 121*323a6bf1SMichael Ellerman lwz RA(0),0(r3) /* A */ 122*323a6bf1SMichael Ellerman lwz RB(0),4(r3) /* B */ 123*323a6bf1SMichael Ellerman lwz RC(0),8(r3) /* C */ 124*323a6bf1SMichael Ellerman lwz RD(0),12(r3) /* D */ 125*323a6bf1SMichael Ellerman lwz RE(0),16(r3) /* E */ 126*323a6bf1SMichael Ellerman 127*323a6bf1SMichael Ellerman LOADW(0) 128*323a6bf1SMichael Ellerman LOADW(1) 129*323a6bf1SMichael Ellerman LOADW(2) 130*323a6bf1SMichael Ellerman LOADW(3) 131*323a6bf1SMichael Ellerman 132*323a6bf1SMichael Ellerman lis r15,0x5a82 /* K0-19 */ 133*323a6bf1SMichael Ellerman ori r15,r15,0x7999 134*323a6bf1SMichael Ellerman STEP0LD4(0) 135*323a6bf1SMichael Ellerman STEP0LD4(4) 136*323a6bf1SMichael Ellerman STEP0LD4(8) 137*323a6bf1SMichael Ellerman STEPUP4(12, D0) 138*323a6bf1SMichael Ellerman STEPUP4(16, D0) 139*323a6bf1SMichael Ellerman 140*323a6bf1SMichael Ellerman lis r15,0x6ed9 /* K20-39 */ 141*323a6bf1SMichael Ellerman ori r15,r15,0xeba1 142*323a6bf1SMichael Ellerman STEPUP20(20, D1) 143*323a6bf1SMichael Ellerman 144*323a6bf1SMichael Ellerman lis r15,0x8f1b /* K40-59 */ 145*323a6bf1SMichael Ellerman ori r15,r15,0xbcdc 146*323a6bf1SMichael Ellerman STEPUP20(40, D2) 147*323a6bf1SMichael Ellerman 148*323a6bf1SMichael Ellerman lis r15,0xca62 /* K60-79 */ 149*323a6bf1SMichael Ellerman ori r15,r15,0xc1d6 150*323a6bf1SMichael Ellerman STEPUP4(60, D1) 151*323a6bf1SMichael Ellerman STEPUP4(64, D1) 152*323a6bf1SMichael Ellerman STEPUP4(68, D1) 153*323a6bf1SMichael Ellerman STEPUP4(72, D1) 154*323a6bf1SMichael Ellerman lwz r20,16(r3) 155*323a6bf1SMichael Ellerman STEPD1(76) 156*323a6bf1SMichael Ellerman lwz r19,12(r3) 157*323a6bf1SMichael Ellerman STEPD1(77) 158*323a6bf1SMichael Ellerman lwz r18,8(r3) 159*323a6bf1SMichael Ellerman STEPD1(78) 160*323a6bf1SMichael Ellerman lwz r17,4(r3) 161*323a6bf1SMichael Ellerman STEPD1(79) 162*323a6bf1SMichael Ellerman 163*323a6bf1SMichael Ellerman lwz r16,0(r3) 164*323a6bf1SMichael Ellerman add r20,RE(80),r20 165*323a6bf1SMichael Ellerman add RD(0),RD(80),r19 166*323a6bf1SMichael Ellerman add RC(0),RC(80),r18 167*323a6bf1SMichael Ellerman add RB(0),RB(80),r17 168*323a6bf1SMichael Ellerman add RA(0),RA(80),r16 169*323a6bf1SMichael Ellerman mr RE(0),r20 170*323a6bf1SMichael Ellerman stw RA(0),0(r3) 171*323a6bf1SMichael Ellerman stw RB(0),4(r3) 172*323a6bf1SMichael Ellerman stw RC(0),8(r3) 173*323a6bf1SMichael Ellerman stw RD(0),12(r3) 174*323a6bf1SMichael Ellerman stw RE(0),16(r3) 175*323a6bf1SMichael Ellerman 176*323a6bf1SMichael Ellerman REST_8GPRS(14, r1) 177*323a6bf1SMichael Ellerman REST_10GPRS(22, r1) 178*323a6bf1SMichael Ellerman addi r1,r1,STACKFRAMESIZE 179*323a6bf1SMichael Ellerman blr 180