xref: /openbmc/linux/arch/powerpc/crypto/md5-asm.S (revision f220d3eb)
1/*
2 * Fast MD5 implementation for PPC
3 *
4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12#include <asm/ppc_asm.h>
13#include <asm/asm-offsets.h>
14#include <asm/asm-compat.h>
15
16#define rHP	r3
17#define rWP	r4
18
19#define rH0	r0
20#define rH1	r6
21#define rH2	r7
22#define rH3	r5
23
24#define rW00	r8
25#define rW01	r9
26#define rW02	r10
27#define rW03	r11
28#define rW04	r12
29#define rW05	r14
30#define rW06	r15
31#define rW07	r16
32#define rW08	r17
33#define rW09	r18
34#define rW10	r19
35#define rW11	r20
36#define rW12	r21
37#define rW13	r22
38#define rW14	r23
39#define rW15	r24
40
41#define rT0	r25
42#define rT1	r26
43
44#define INITIALIZE \
45	PPC_STLU r1,-INT_FRAME_SIZE(r1); \
46	SAVE_8GPRS(14, r1);		/* push registers onto stack	*/ \
47	SAVE_4GPRS(22, r1);						   \
48	SAVE_GPR(26, r1)
49
50#define FINALIZE \
51	REST_8GPRS(14, r1);		/* pop registers from stack	*/ \
52	REST_4GPRS(22, r1);						   \
53	REST_GPR(26, r1);						   \
54	addi	r1,r1,INT_FRAME_SIZE;
55
56#ifdef __BIG_ENDIAN__
57#define LOAD_DATA(reg, off) \
58	lwbrx		reg,0,rWP;	/* load data			*/
59#define INC_PTR \
60	addi		rWP,rWP,4;	/* increment per word		*/
61#define NEXT_BLOCK			/* nothing to do		*/
62#else
63#define LOAD_DATA(reg, off) \
64	lwz		reg,off(rWP);	/* load data			*/
65#define INC_PTR				/* nothing to do		*/
66#define NEXT_BLOCK \
67	addi		rWP,rWP,64;	/* increment per block		*/
68#endif
69
70#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
71	LOAD_DATA(w0, off)		/*    W				*/ \
72	and		rT0,b,c;	/* 1: f = b and c		*/ \
73	INC_PTR				/*    ptr++			*/ \
74	andc		rT1,d,b;	/* 1: f' = ~b and d		*/ \
75	LOAD_DATA(w1, off+4)		/*    W				*/ \
76	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
77	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
78	add		a,a,rT0;	/* 1: a = a + f			*/ \
79	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
80	addis		w1,w1,k1h;	/* 2: wk = w + k		*/ \
81	add		a,a,w0;		/* 1: a = a + wk		*/ \
82	addi		w1,w1,k1l;	/* 2: wk = w + k'		*/ \
83	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
84	add		d,d,w1;		/* 2: a = a + wk		*/ \
85	add		a,a,b;		/* 1: a = a + b			*/ \
86	and		rT0,a,b;	/* 2: f = b and c		*/ \
87	andc		rT1,c,a;	/* 2: f' = ~b and d		*/ \
88	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
89	add		d,d,rT0;	/* 2: a = a + f			*/ \
90	INC_PTR				/*    ptr++			*/ \
91	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
92	add		d,d,a;		/* 2: a = a + b			*/
93
94#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
95	andc		rT0,c,d;	/* 1: f = c and ~d		*/ \
96	and		rT1,b,d;	/* 1: f' = b and d		*/ \
97	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
98	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
99	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
100	add		a,a,rT0;	/* 1: a = a + f			*/ \
101	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
102	add		a,a,w0;		/* 1: a = a + wk		*/ \
103	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
104	andc		rT0,b,c;	/* 2: f = c and ~d		*/ \
105	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
106	add		a,a,b;		/* 1: a = a + b			*/ \
107	add		d,d,w1;		/* 2: a = a + wk		*/ \
108	and		rT1,a,c;	/* 2: f' = b and d		*/ \
109	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
110	add		d,d,rT0;	/* 2: a = a + f			*/ \
111	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
112	add		d,d,a;		/* 2: a = a +b			*/
113
114#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
115	xor		rT0,b,c;	/* 1: f' = b xor c		*/ \
116	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
117	xor		rT1,rT0,d;	/* 1: f = f xor f'		*/ \
118	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
119	add		a,a,rT1;	/* 1: a = a + f			*/ \
120	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
121	add		a,a,w0;		/* 1: a = a + wk		*/ \
122	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
123	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
124	add		d,d,w1;		/* 2: a = a + wk		*/ \
125	add		a,a,b;		/* 1: a = a + b			*/ \
126	xor		rT1,rT0,a;	/* 2: f = b xor f'		*/ \
127	add		d,d,rT1;	/* 2: a = a + f			*/ \
128	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
129	add		d,d,a;		/* 2: a = a + b			*/
130
131#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
132	addi		w0,w0,k0l;	/* 1: w = w + k			*/ \
133	orc		rT0,b,d;	/* 1: f = b or ~d		*/ \
134	addis		w0,w0,k0h;	/* 1: w = w + k'		*/ \
135	xor		rT0,rT0,c;	/* 1: f = f xor c		*/ \
136	add		a,a,w0;		/* 1: a = a + wk		*/ \
137	addi		w1,w1,k1l;	/* 2: w = w + k			*/ \
138	add		a,a,rT0;	/* 1: a = a + f			*/ \
139	addis		w1,w1,k1h;	/* 2: w = w + k'		*/ \
140	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
141	add		a,a,b;		/* 1: a = a + b			*/ \
142	orc		rT0,a,c;	/* 2: f = b or ~d		*/ \
143	add		d,d,w1;		/* 2: a = a + wk		*/ \
144	xor		rT0,rT0,b;	/* 2: f = f xor c		*/ \
145	add		d,d,rT0;	/* 2: a = a + f			*/ \
146	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
147	add		d,d,a;		/* 2: a = a + b			*/
148
149_GLOBAL(ppc_md5_transform)
150	INITIALIZE
151
152	mtctr		r5
153	lwz		rH0,0(rHP)
154	lwz		rH1,4(rHP)
155	lwz		rH2,8(rHP)
156	lwz		rH3,12(rHP)
157
158ppc_md5_main:
159	R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
160		0xd76b, -23432, 0xe8c8, -18602)
161	R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
162		0x2420, 0x70db, 0xc1be, -12562)
163	R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
164		0xf57c, 0x0faf, 0x4788, -14806)
165	R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
166		0xa830, 0x4613, 0xfd47, -27391)
167	R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
168		0x6981, -26408, 0x8b45,  -2129)
169	R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
170		0xffff, 0x5bb1, 0x895d, -10306)
171	R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
172		0x6b90, 0x1122, 0xfd98, 0x7193)
173	R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
174		0xa679, 0x438e, 0x49b4, 0x0821)
175
176	R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
177		0x0d56, 0x6e0c, 0x1810, 0x6d2d)
178	R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
179		0x9d02, -32109, 0x124c, 0x2332)
180	R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
181		0x8ea7, 0x4a33, 0x0245, -18270)
182	R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
183		0x8eee,  -8608, 0xf258,  -5095)
184	R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
185		0x969d, -10697, 0x1cbe, -15288)
186	R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
187		0x3317, 0x3e99, 0xdbd9, 0x7c15)
188	R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
189		0xac4b, 0x7772, 0xd8cf, 0x331d)
190	R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
191		0x6a28, 0x6dd8, 0x219a, 0x3b68)
192
193	R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
194		0x29cb, 0x28e5, 0x4218,  -7788)
195	R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16,  9,
196		0x473f, 0x06d1, 0x3aae, 0x3036)
197	R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
198		0xaea1, -15134, 0x640b, -11295)
199	R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16,  9,
200		0x8f4c, 0x4887, 0xbc7c, -22499)
201	R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
202		0x7eb8, -27199, 0x00ea, 0x6050)
203	R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16,  9,
204		0xe01a, 0x22fe, 0x4447, 0x69c5)
205	R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
206		0xb7f3, 0x0253, 0x59b1, 0x4d5b)
207	R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16,  9,
208		0x4701, -27017, 0xc7bd, -19859)
209
210	R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
211		0x0988,  -1462, 0x4c70, -19401)
212	R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
213		0xadaf,  -5221, 0xfc99, 0x66f7)
214	R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
215		0x7e80, -16418, 0xba1e, -25587)
216	R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
217		0x4130, 0x380d, 0xe0c5, 0x738d)
218	lwz		rW00,0(rHP)
219	R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
220		0xe837, -30770, 0xde8a, 0x69e8)
221	lwz		rW14,4(rHP)
222	R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
223		0x9e79, 0x260f, 0x256d, -27941)
224	lwz		rW12,8(rHP)
225	R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
226		0xab75, -20775, 0x4f9e, -28397)
227	lwz		rW10,12(rHP)
228	R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
229		0x662b, 0x7c56, 0x11b2, 0x0358)
230
231	add		rH0,rH0,rW00
232	stw		rH0,0(rHP)
233	add		rH1,rH1,rW14
234	stw		rH1,4(rHP)
235	add		rH2,rH2,rW12
236	stw		rH2,8(rHP)
237	add		rH3,rH3,rW10
238	stw		rH3,12(rHP)
239	NEXT_BLOCK
240
241	bdnz		ppc_md5_main
242
243	FINALIZE
244	blr
245