xref: /openbmc/linux/arch/powerpc/crypto/sha1-powerpc-asm.S (revision 323a6bf1d6f4ec7907d9d8aacb4ae9590f755dda)
1*323a6bf1SMichael Ellerman/*
2*323a6bf1SMichael Ellerman * SHA-1 implementation for PowerPC.
3*323a6bf1SMichael Ellerman *
4*323a6bf1SMichael Ellerman * Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
5*323a6bf1SMichael Ellerman */
6*323a6bf1SMichael Ellerman
7*323a6bf1SMichael Ellerman#include <asm/ppc_asm.h>
8*323a6bf1SMichael Ellerman#include <asm/asm-offsets.h>
9*323a6bf1SMichael Ellerman
10*323a6bf1SMichael Ellerman/*
11*323a6bf1SMichael Ellerman * We roll the registers for T, A, B, C, D, E around on each
12*323a6bf1SMichael Ellerman * iteration; T on iteration t is A on iteration t+1, and so on.
13*323a6bf1SMichael Ellerman * We use registers 7 - 12 for this.
14*323a6bf1SMichael Ellerman */
15*323a6bf1SMichael Ellerman#define RT(t)	((((t)+5)%6)+7)
16*323a6bf1SMichael Ellerman#define RA(t)	((((t)+4)%6)+7)
17*323a6bf1SMichael Ellerman#define RB(t)	((((t)+3)%6)+7)
18*323a6bf1SMichael Ellerman#define RC(t)	((((t)+2)%6)+7)
19*323a6bf1SMichael Ellerman#define RD(t)	((((t)+1)%6)+7)
20*323a6bf1SMichael Ellerman#define RE(t)	((((t)+0)%6)+7)
21*323a6bf1SMichael Ellerman
22*323a6bf1SMichael Ellerman/* We use registers 16 - 31 for the W values */
23*323a6bf1SMichael Ellerman#define W(t)	(((t)%16)+16)
24*323a6bf1SMichael Ellerman
25*323a6bf1SMichael Ellerman#define LOADW(t)				\
26*323a6bf1SMichael Ellerman	lwz	W(t),(t)*4(r4)
27*323a6bf1SMichael Ellerman
28*323a6bf1SMichael Ellerman#define STEPD0_LOAD(t)				\
29*323a6bf1SMichael Ellerman	andc	r0,RD(t),RB(t);		\
30*323a6bf1SMichael Ellerman	and	r6,RB(t),RC(t);		\
31*323a6bf1SMichael Ellerman	rotlwi	RT(t),RA(t),5;			\
32*323a6bf1SMichael Ellerman	or	r6,r6,r0;			\
33*323a6bf1SMichael Ellerman	add	r0,RE(t),r15;			\
34*323a6bf1SMichael Ellerman	add	RT(t),RT(t),r6;		\
35*323a6bf1SMichael Ellerman	add	r14,r0,W(t);			\
36*323a6bf1SMichael Ellerman	lwz	W((t)+4),((t)+4)*4(r4);	\
37*323a6bf1SMichael Ellerman	rotlwi	RB(t),RB(t),30;			\
38*323a6bf1SMichael Ellerman	add	RT(t),RT(t),r14
39*323a6bf1SMichael Ellerman
40*323a6bf1SMichael Ellerman#define STEPD0_UPDATE(t)			\
41*323a6bf1SMichael Ellerman	and	r6,RB(t),RC(t);		\
42*323a6bf1SMichael Ellerman	andc	r0,RD(t),RB(t);		\
43*323a6bf1SMichael Ellerman	rotlwi	RT(t),RA(t),5;			\
44*323a6bf1SMichael Ellerman	rotlwi	RB(t),RB(t),30;			\
45*323a6bf1SMichael Ellerman	or	r6,r6,r0;			\
46*323a6bf1SMichael Ellerman	add	r0,RE(t),r15;			\
47*323a6bf1SMichael Ellerman	xor	r5,W((t)+4-3),W((t)+4-8);		\
48*323a6bf1SMichael Ellerman	add	RT(t),RT(t),r6;		\
49*323a6bf1SMichael Ellerman	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
50*323a6bf1SMichael Ellerman	add	r0,r0,W(t);			\
51*323a6bf1SMichael Ellerman	xor	W((t)+4),W((t)+4),r5;			\
52*323a6bf1SMichael Ellerman	add	RT(t),RT(t),r0;		\
53*323a6bf1SMichael Ellerman	rotlwi	W((t)+4),W((t)+4),1
54*323a6bf1SMichael Ellerman
55*323a6bf1SMichael Ellerman#define STEPD1(t)				\
56*323a6bf1SMichael Ellerman	xor	r6,RB(t),RC(t);		\
57*323a6bf1SMichael Ellerman	rotlwi	RT(t),RA(t),5;			\
58*323a6bf1SMichael Ellerman	rotlwi	RB(t),RB(t),30;			\
59*323a6bf1SMichael Ellerman	xor	r6,r6,RD(t);			\
60*323a6bf1SMichael Ellerman	add	r0,RE(t),r15;			\
61*323a6bf1SMichael Ellerman	add	RT(t),RT(t),r6;		\
62*323a6bf1SMichael Ellerman	add	r0,r0,W(t);			\
63*323a6bf1SMichael Ellerman	add	RT(t),RT(t),r0
64*323a6bf1SMichael Ellerman
65*323a6bf1SMichael Ellerman#define STEPD1_UPDATE(t)				\
66*323a6bf1SMichael Ellerman	xor	r6,RB(t),RC(t);		\
67*323a6bf1SMichael Ellerman	rotlwi	RT(t),RA(t),5;			\
68*323a6bf1SMichael Ellerman	rotlwi	RB(t),RB(t),30;			\
69*323a6bf1SMichael Ellerman	xor	r6,r6,RD(t);			\
70*323a6bf1SMichael Ellerman	add	r0,RE(t),r15;			\
71*323a6bf1SMichael Ellerman	xor	r5,W((t)+4-3),W((t)+4-8);		\
72*323a6bf1SMichael Ellerman	add	RT(t),RT(t),r6;		\
73*323a6bf1SMichael Ellerman	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
74*323a6bf1SMichael Ellerman	add	r0,r0,W(t);			\
75*323a6bf1SMichael Ellerman	xor	W((t)+4),W((t)+4),r5;			\
76*323a6bf1SMichael Ellerman	add	RT(t),RT(t),r0;		\
77*323a6bf1SMichael Ellerman	rotlwi	W((t)+4),W((t)+4),1
78*323a6bf1SMichael Ellerman
79*323a6bf1SMichael Ellerman#define STEPD2_UPDATE(t)			\
80*323a6bf1SMichael Ellerman	and	r6,RB(t),RC(t);		\
81*323a6bf1SMichael Ellerman	and	r0,RB(t),RD(t);		\
82*323a6bf1SMichael Ellerman	rotlwi	RT(t),RA(t),5;			\
83*323a6bf1SMichael Ellerman	or	r6,r6,r0;			\
84*323a6bf1SMichael Ellerman	rotlwi	RB(t),RB(t),30;			\
85*323a6bf1SMichael Ellerman	and	r0,RC(t),RD(t);		\
86*323a6bf1SMichael Ellerman	xor	r5,W((t)+4-3),W((t)+4-8);	\
87*323a6bf1SMichael Ellerman	or	r6,r6,r0;			\
88*323a6bf1SMichael Ellerman	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
89*323a6bf1SMichael Ellerman	add	r0,RE(t),r15;			\
90*323a6bf1SMichael Ellerman	add	RT(t),RT(t),r6;		\
91*323a6bf1SMichael Ellerman	add	r0,r0,W(t);			\
92*323a6bf1SMichael Ellerman	xor	W((t)+4),W((t)+4),r5;		\
93*323a6bf1SMichael Ellerman	add	RT(t),RT(t),r0;		\
94*323a6bf1SMichael Ellerman	rotlwi	W((t)+4),W((t)+4),1
95*323a6bf1SMichael Ellerman
96*323a6bf1SMichael Ellerman#define STEP0LD4(t)				\
97*323a6bf1SMichael Ellerman	STEPD0_LOAD(t);				\
98*323a6bf1SMichael Ellerman	STEPD0_LOAD((t)+1);			\
99*323a6bf1SMichael Ellerman	STEPD0_LOAD((t)+2);			\
100*323a6bf1SMichael Ellerman	STEPD0_LOAD((t)+3)
101*323a6bf1SMichael Ellerman
102*323a6bf1SMichael Ellerman#define STEPUP4(t, fn)				\
103*323a6bf1SMichael Ellerman	STEP##fn##_UPDATE(t);			\
104*323a6bf1SMichael Ellerman	STEP##fn##_UPDATE((t)+1);		\
105*323a6bf1SMichael Ellerman	STEP##fn##_UPDATE((t)+2);		\
106*323a6bf1SMichael Ellerman	STEP##fn##_UPDATE((t)+3)
107*323a6bf1SMichael Ellerman
108*323a6bf1SMichael Ellerman#define STEPUP20(t, fn)				\
109*323a6bf1SMichael Ellerman	STEPUP4(t, fn);				\
110*323a6bf1SMichael Ellerman	STEPUP4((t)+4, fn);			\
111*323a6bf1SMichael Ellerman	STEPUP4((t)+8, fn);			\
112*323a6bf1SMichael Ellerman	STEPUP4((t)+12, fn);			\
113*323a6bf1SMichael Ellerman	STEPUP4((t)+16, fn)
114*323a6bf1SMichael Ellerman
115*323a6bf1SMichael Ellerman_GLOBAL(powerpc_sha_transform)
116*323a6bf1SMichael Ellerman	PPC_STLU r1,-STACKFRAMESIZE(r1)
117*323a6bf1SMichael Ellerman	SAVE_8GPRS(14, r1)
118*323a6bf1SMichael Ellerman	SAVE_10GPRS(22, r1)
119*323a6bf1SMichael Ellerman
120*323a6bf1SMichael Ellerman	/* Load up A - E */
121*323a6bf1SMichael Ellerman	lwz	RA(0),0(r3)	/* A */
122*323a6bf1SMichael Ellerman	lwz	RB(0),4(r3)	/* B */
123*323a6bf1SMichael Ellerman	lwz	RC(0),8(r3)	/* C */
124*323a6bf1SMichael Ellerman	lwz	RD(0),12(r3)	/* D */
125*323a6bf1SMichael Ellerman	lwz	RE(0),16(r3)	/* E */
126*323a6bf1SMichael Ellerman
127*323a6bf1SMichael Ellerman	LOADW(0)
128*323a6bf1SMichael Ellerman	LOADW(1)
129*323a6bf1SMichael Ellerman	LOADW(2)
130*323a6bf1SMichael Ellerman	LOADW(3)
131*323a6bf1SMichael Ellerman
132*323a6bf1SMichael Ellerman	lis	r15,0x5a82	/* K0-19 */
133*323a6bf1SMichael Ellerman	ori	r15,r15,0x7999
134*323a6bf1SMichael Ellerman	STEP0LD4(0)
135*323a6bf1SMichael Ellerman	STEP0LD4(4)
136*323a6bf1SMichael Ellerman	STEP0LD4(8)
137*323a6bf1SMichael Ellerman	STEPUP4(12, D0)
138*323a6bf1SMichael Ellerman	STEPUP4(16, D0)
139*323a6bf1SMichael Ellerman
140*323a6bf1SMichael Ellerman	lis	r15,0x6ed9	/* K20-39 */
141*323a6bf1SMichael Ellerman	ori	r15,r15,0xeba1
142*323a6bf1SMichael Ellerman	STEPUP20(20, D1)
143*323a6bf1SMichael Ellerman
144*323a6bf1SMichael Ellerman	lis	r15,0x8f1b	/* K40-59 */
145*323a6bf1SMichael Ellerman	ori	r15,r15,0xbcdc
146*323a6bf1SMichael Ellerman	STEPUP20(40, D2)
147*323a6bf1SMichael Ellerman
148*323a6bf1SMichael Ellerman	lis	r15,0xca62	/* K60-79 */
149*323a6bf1SMichael Ellerman	ori	r15,r15,0xc1d6
150*323a6bf1SMichael Ellerman	STEPUP4(60, D1)
151*323a6bf1SMichael Ellerman	STEPUP4(64, D1)
152*323a6bf1SMichael Ellerman	STEPUP4(68, D1)
153*323a6bf1SMichael Ellerman	STEPUP4(72, D1)
154*323a6bf1SMichael Ellerman	lwz	r20,16(r3)
155*323a6bf1SMichael Ellerman	STEPD1(76)
156*323a6bf1SMichael Ellerman	lwz	r19,12(r3)
157*323a6bf1SMichael Ellerman	STEPD1(77)
158*323a6bf1SMichael Ellerman	lwz	r18,8(r3)
159*323a6bf1SMichael Ellerman	STEPD1(78)
160*323a6bf1SMichael Ellerman	lwz	r17,4(r3)
161*323a6bf1SMichael Ellerman	STEPD1(79)
162*323a6bf1SMichael Ellerman
163*323a6bf1SMichael Ellerman	lwz	r16,0(r3)
164*323a6bf1SMichael Ellerman	add	r20,RE(80),r20
165*323a6bf1SMichael Ellerman	add	RD(0),RD(80),r19
166*323a6bf1SMichael Ellerman	add	RC(0),RC(80),r18
167*323a6bf1SMichael Ellerman	add	RB(0),RB(80),r17
168*323a6bf1SMichael Ellerman	add	RA(0),RA(80),r16
169*323a6bf1SMichael Ellerman	mr	RE(0),r20
170*323a6bf1SMichael Ellerman	stw	RA(0),0(r3)
171*323a6bf1SMichael Ellerman	stw	RB(0),4(r3)
172*323a6bf1SMichael Ellerman	stw	RC(0),8(r3)
173*323a6bf1SMichael Ellerman	stw	RD(0),12(r3)
174*323a6bf1SMichael Ellerman	stw	RE(0),16(r3)
175*323a6bf1SMichael Ellerman
176*323a6bf1SMichael Ellerman	REST_8GPRS(14, r1)
177*323a6bf1SMichael Ellerman	REST_10GPRS(22, r1)
178*323a6bf1SMichael Ellerman	addi	r1,r1,STACKFRAMESIZE
179*323a6bf1SMichael Ellerman	blr
180