xref: /openbmc/linux/arch/arm/lib/memcpy.S (revision 1da177e4)
1/*
2 *  linux/arch/arm/lib/memcpy.S
3 *
4 *  Copyright (C) 1995-1999 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 *  ASM optimised string functions
11 */
12#include <linux/linkage.h>
13#include <asm/assembler.h>
14
15		.text
16
17#define ENTER	\
18		mov	ip,sp	;\
19		stmfd	sp!,{r0,r4-r9,fp,ip,lr,pc}	;\
20		sub	fp,ip,#4
21
22#define EXIT	\
23		LOADREGS(ea, fp, {r0, r4 - r9, fp, sp, pc})
24
25#define EXITEQ	\
26		LOADREGS(eqea, fp, {r0, r4 - r9, fp, sp, pc})
27
28/*
29 * Prototype: void memcpy(void *to,const void *from,unsigned long n);
30 */
31ENTRY(memcpy)
32ENTRY(memmove)
33		ENTER
34		cmp	r1, r0
35		bcc	23f
36		subs	r2, r2, #4
37		blt	6f
38	PLD(	pld	[r1, #0]		)
39		ands	ip, r0, #3
40		bne	7f
41		ands	ip, r1, #3
42		bne	8f
43
441:		subs	r2, r2, #8
45		blt	5f
46		subs	r2, r2, #20
47		blt	4f
48	PLD(	pld	[r1, #28]		)
49	PLD(	subs	r2, r2, #64		)
50	PLD(	blt	3f			)
512:	PLD(	pld	[r1, #60]		)
52	PLD(	pld	[r1, #92]		)
53		ldmia	r1!, {r3 - r9, ip}
54		subs	r2, r2, #32
55		stmgeia	r0!, {r3 - r9, ip}
56		ldmgeia	r1!, {r3 - r9, ip}
57		subges	r2, r2, #32
58		stmia	r0!, {r3 - r9, ip}
59		bge	2b
603:	PLD(	ldmia	r1!, {r3 - r9, ip}	)
61	PLD(	adds	r2, r2, #32		)
62	PLD(	stmgeia	r0!, {r3 - r9, ip}	)
63	PLD(	ldmgeia	r1!, {r3 - r9, ip}	)
64	PLD(	subges	r2, r2, #32		)
65	PLD(	stmia	r0!, {r3 - r9, ip}	)
664:		cmn	r2, #16
67		ldmgeia	r1!, {r3 - r6}
68		subge	r2, r2, #16
69		stmgeia	r0!, {r3 - r6}
70		adds	r2, r2, #20
71		ldmgeia	r1!, {r3 - r5}
72		subge	r2, r2, #12
73		stmgeia	r0!, {r3 - r5}
745:		adds	r2, r2, #8
75		blt	6f
76		subs	r2, r2, #4
77		ldrlt	r3, [r1], #4
78		ldmgeia	r1!, {r4, r5}
79		subge	r2, r2, #4
80		strlt	r3, [r0], #4
81		stmgeia	r0!, {r4, r5}
82
836:		adds	r2, r2, #4
84		EXITEQ
85		cmp	r2, #2
86		ldrb	r3, [r1], #1
87		ldrgeb	r4, [r1], #1
88		ldrgtb	r5, [r1], #1
89		strb	r3, [r0], #1
90		strgeb	r4, [r0], #1
91		strgtb	r5, [r0], #1
92		EXIT
93
947:		rsb	ip, ip, #4
95		cmp	ip, #2
96		ldrb	r3, [r1], #1
97		ldrgeb	r4, [r1], #1
98		ldrgtb	r5, [r1], #1
99		strb	r3, [r0], #1
100		strgeb	r4, [r0], #1
101		strgtb	r5, [r0], #1
102		subs	r2, r2, ip
103		blt	6b
104		ands	ip, r1, #3
105		beq	1b
106
1078:		bic	r1, r1, #3
108		ldr	r7, [r1], #4
109		cmp	ip, #2
110		bgt	18f
111		beq	13f
112		cmp	r2, #12
113		blt	11f
114	PLD(	pld	[r1, #12]		)
115		sub	r2, r2, #12
116	PLD(	subs	r2, r2, #32		)
117	PLD(	blt	10f			)
118	PLD(	pld	[r1, #28]		)
1199:	PLD(	pld	[r1, #44]		)
12010:		mov	r3, r7, pull #8
121		ldmia	r1!, {r4 - r7}
122		subs	r2, r2, #16
123		orr	r3, r3, r4, push #24
124		mov	r4, r4, pull #8
125		orr	r4, r4, r5, push #24
126		mov	r5, r5, pull #8
127		orr	r5, r5, r6, push #24
128		mov	r6, r6, pull #8
129		orr	r6, r6, r7, push #24
130		stmia	r0!, {r3 - r6}
131		bge	9b
132	PLD(	cmn	r2, #32			)
133	PLD(	bge	10b			)
134	PLD(	add	r2, r2, #32		)
135		adds	r2, r2, #12
136		blt	12f
13711:		mov	r3, r7, pull #8
138		ldr	r7, [r1], #4
139		subs	r2, r2, #4
140		orr	r3, r3, r7, push #24
141		str	r3, [r0], #4
142		bge	11b
14312:		sub	r1, r1, #3
144		b	6b
145
14613:		cmp	r2, #12
147		blt	16f
148	PLD(	pld	[r1, #12]		)
149		sub	r2, r2, #12
150	PLD(	subs	r2, r2, #32		)
151	PLD(	blt	15f			)
152	PLD(	pld	[r1, #28]		)
15314:	PLD(	pld	[r1, #44]		)
15415:		mov	r3, r7, pull #16
155		ldmia	r1!, {r4 - r7}
156		subs	r2, r2, #16
157		orr	r3, r3, r4, push #16
158		mov	r4, r4, pull #16
159		orr	r4, r4, r5, push #16
160		mov	r5, r5, pull #16
161		orr	r5, r5, r6, push #16
162		mov	r6, r6, pull #16
163		orr	r6, r6, r7, push #16
164		stmia	r0!, {r3 - r6}
165		bge	14b
166	PLD(	cmn	r2, #32			)
167	PLD(	bge	15b			)
168	PLD(	add	r2, r2, #32		)
169		adds	r2, r2, #12
170		blt	17f
17116:		mov	r3, r7, pull #16
172		ldr	r7, [r1], #4
173		subs	r2, r2, #4
174		orr	r3, r3, r7, push #16
175		str	r3, [r0], #4
176		bge	16b
17717:		sub	r1, r1, #2
178		b	6b
179
18018:		cmp	r2, #12
181		blt	21f
182	PLD(	pld	[r1, #12]		)
183		sub	r2, r2, #12
184	PLD(	subs	r2, r2, #32		)
185	PLD(	blt	20f			)
186	PLD(	pld	[r1, #28]		)
18719:	PLD(	pld	[r1, #44]		)
18820:		mov	r3, r7, pull #24
189		ldmia	r1!, {r4 - r7}
190		subs	r2, r2, #16
191		orr	r3, r3, r4, push #8
192		mov	r4, r4, pull #24
193		orr	r4, r4, r5, push #8
194		mov	r5, r5, pull #24
195		orr	r5, r5, r6, push #8
196		mov	r6, r6, pull #24
197		orr	r6, r6, r7, push #8
198		stmia	r0!, {r3 - r6}
199		bge	19b
200	PLD(	cmn	r2, #32			)
201	PLD(	bge	20b			)
202	PLD(	add	r2, r2, #32		)
203		adds	r2, r2, #12
204		blt	22f
20521:		mov	r3, r7, pull #24
206		ldr	r7, [r1], #4
207		subs	r2, r2, #4
208		orr	r3, r3, r7, push #8
209		str	r3, [r0], #4
210		bge	21b
21122:		sub	r1, r1, #1
212		b	6b
213
214
21523:		add	r1, r1, r2
216		add	r0, r0, r2
217		subs	r2, r2, #4
218		blt	29f
219	PLD(	pld	[r1, #-4]		)
220		ands	ip, r0, #3
221		bne	30f
222		ands	ip, r1, #3
223		bne	31f
224
22524:		subs	r2, r2, #8
226		blt	28f
227		subs	r2, r2, #20
228		blt	27f
229	PLD(	pld	[r1, #-32]		)
230	PLD(	subs	r2, r2, #64		)
231	PLD(	blt	26f			)
23225:	PLD(	pld	[r1, #-64]		)
233	PLD(	pld	[r1, #-96]		)
234		ldmdb	r1!, {r3 - r9, ip}
235		subs	r2, r2, #32
236		stmgedb	r0!, {r3 - r9, ip}
237		ldmgedb	r1!, {r3 - r9, ip}
238		subges	r2, r2, #32
239		stmdb	r0!, {r3 - r9, ip}
240		bge	25b
24126:	PLD(	ldmdb	r1!, {r3 - r9, ip}	)
242	PLD(	adds	r2, r2, #32		)
243	PLD(	stmgedb	r0!, {r3 - r9, ip}	)
244	PLD(	ldmgedb	r1!, {r3 - r9, ip}	)
245	PLD(	subges	r2, r2, #32		)
246	PLD(	stmdb	r0!, {r3 - r9, ip}	)
24727:		cmn	r2, #16
248		ldmgedb	r1!, {r3 - r6}
249		subge	r2, r2, #16
250		stmgedb	r0!, {r3 - r6}
251		adds	r2, r2, #20
252		ldmgedb	r1!, {r3 - r5}
253		subge	r2, r2, #12
254		stmgedb	r0!, {r3 - r5}
25528:		adds	r2, r2, #8
256		blt	29f
257		subs	r2, r2, #4
258		ldrlt	r3, [r1, #-4]!
259		ldmgedb	r1!, {r4, r5}
260		subge	r2, r2, #4
261		strlt	r3, [r0, #-4]!
262		stmgedb	r0!, {r4, r5}
263
26429:		adds	r2, r2, #4
265		EXITEQ
266		cmp	r2, #2
267		ldrb	r3, [r1, #-1]!
268		ldrgeb	r4, [r1, #-1]!
269		ldrgtb	r5, [r1, #-1]!
270		strb	r3, [r0, #-1]!
271		strgeb	r4, [r0, #-1]!
272		strgtb	r5, [r0, #-1]!
273		EXIT
274
27530:		cmp	ip, #2
276		ldrb	r3, [r1, #-1]!
277		ldrgeb	r4, [r1, #-1]!
278		ldrgtb	r5, [r1, #-1]!
279		strb	r3, [r0, #-1]!
280		strgeb	r4, [r0, #-1]!
281		strgtb	r5, [r0, #-1]!
282		subs	r2, r2, ip
283		blt	29b
284		ands	ip, r1, #3
285		beq	24b
286
28731:		bic	r1, r1, #3
288		ldr	r3, [r1], #0
289		cmp	ip, #2
290		blt	41f
291		beq	36f
292		cmp	r2, #12
293		blt	34f
294	PLD(	pld	[r1, #-16]		)
295		sub	r2, r2, #12
296	PLD(	subs	r2, r2, #32		)
297	PLD(	blt	33f			)
298	PLD(	pld	[r1, #-32]		)
29932:	PLD(	pld	[r1, #-48]		)
30033:		mov	r7, r3, push #8
301		ldmdb	r1!, {r3, r4, r5, r6}
302		subs	r2, r2, #16
303		orr	r7, r7, r6, pull #24
304		mov	r6, r6, push #8
305		orr	r6, r6, r5, pull #24
306		mov	r5, r5, push #8
307		orr	r5, r5, r4, pull #24
308		mov	r4, r4, push #8
309		orr	r4, r4, r3, pull #24
310		stmdb	r0!, {r4, r5, r6, r7}
311		bge	32b
312	PLD(	cmn	r2, #32			)
313	PLD(	bge	33b			)
314	PLD(	add	r2, r2, #32		)
315		adds	r2, r2, #12
316		blt	35f
31734:		mov	ip, r3, push #8
318		ldr	r3, [r1, #-4]!
319		subs	r2, r2, #4
320		orr	ip, ip, r3, pull #24
321		str	ip, [r0, #-4]!
322		bge	34b
32335:		add	r1, r1, #3
324		b	29b
325
32636:		cmp	r2, #12
327		blt	39f
328	PLD(	pld	[r1, #-16]		)
329		sub	r2, r2, #12
330	PLD(	subs	r2, r2, #32		)
331	PLD(	blt	38f			)
332	PLD(	pld	[r1, #-32]		)
33337:	PLD(	pld	[r1, #-48]		)
33438:		mov	r7, r3, push #16
335		ldmdb	r1!, {r3, r4, r5, r6}
336		subs	r2, r2, #16
337		orr	r7, r7, r6, pull #16
338		mov	r6, r6, push #16
339		orr	r6, r6, r5, pull #16
340		mov	r5, r5, push #16
341		orr	r5, r5, r4, pull #16
342		mov	r4, r4, push #16
343		orr	r4, r4, r3, pull #16
344		stmdb	r0!, {r4, r5, r6, r7}
345		bge	37b
346	PLD(	cmn	r2, #32			)
347	PLD(	bge	38b			)
348	PLD(	add	r2, r2, #32		)
349		adds	r2, r2, #12
350		blt	40f
35139:		mov	ip, r3, push #16
352		ldr	r3, [r1, #-4]!
353		subs	r2, r2, #4
354		orr	ip, ip, r3, pull #16
355		str	ip, [r0, #-4]!
356		bge	39b
35740:		add	r1, r1, #2
358		b	29b
359
36041:		cmp	r2, #12
361		blt	44f
362	PLD(	pld	[r1, #-16]		)
363		sub	r2, r2, #12
364	PLD(	subs	r2, r2, #32		)
365	PLD(	blt	43f			)
366	PLD(	pld	[r1, #-32]		)
36742:	PLD(	pld	[r1, #-48]		)
36843:		mov	r7, r3, push #24
369		ldmdb	r1!, {r3, r4, r5, r6}
370		subs	r2, r2, #16
371		orr	r7, r7, r6, pull #8
372		mov	r6, r6, push #24
373		orr	r6, r6, r5, pull #8
374		mov	r5, r5, push #24
375		orr	r5, r5, r4, pull #8
376		mov	r4, r4, push #24
377		orr	r4, r4, r3, pull #8
378		stmdb	r0!, {r4, r5, r6, r7}
379		bge	42b
380	PLD(	cmn	r2, #32			)
381	PLD(	bge	43b			)
382	PLD(	add	r2, r2, #32		)
383		adds	r2, r2, #12
384		blt	45f
38544:		mov	ip, r3, push #24
386		ldr	r3, [r1, #-4]!
387		subs	r2, r2, #4
388		orr	ip, ip, r3, pull #8
389		str	ip, [r0, #-4]!
390		bge	44b
39145:		add	r1, r1, #1
392		b	29b
393
394