1/*
2 *  linux/arch/arm/lib/csumpartialcopygeneric.S
3 *
4 *  Copyright (C) 1995-2001 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/*
12 * unsigned int
13 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
14 *  r0 = src, r1 = dst, r2 = len, r3 = sum
15 *  Returns : r0 = checksum
16 *
17 * Note that 'tst' and 'teq' preserve the carry flag.
18 */
19
20src	.req	r0
21dst	.req	r1
22len	.req	r2
23sum	.req	r3
24
25.Lzero:		mov	r0, sum
26		load_regs	ea
27
28		/*
29		 * Align an unaligned destination pointer.  We know that
30		 * we have >= 8 bytes here, so we don't need to check
31		 * the length.  Note that the source pointer hasn't been
32		 * aligned yet.
33		 */
34.Ldst_unaligned:
35		tst	dst, #1
36		beq	.Ldst_16bit
37
38		load1b	ip
39		sub	len, len, #1
40		adcs	sum, sum, ip, put_byte_1	@ update checksum
41		strb	ip, [dst], #1
42		tst	dst, #2
43		moveq	pc, lr			@ dst is now 32bit aligned
44
45.Ldst_16bit:	load2b	r8, ip
46		sub	len, len, #2
47		adcs	sum, sum, r8, put_byte_0
48		strb	r8, [dst], #1
49		adcs	sum, sum, ip, put_byte_1
50		strb	ip, [dst], #1
51		mov	pc, lr			@ dst is now 32bit aligned
52
53		/*
54		 * Handle 0 to 7 bytes, with any alignment of source and
55		 * destination pointers.  Note that when we get here, C = 0
56		 */
57.Lless8:	teq	len, #0			@ check for zero count
58		beq	.Lzero
59
60		/* we must have at least one byte. */
61		tst	dst, #1			@ dst 16-bit aligned
62		beq	.Lless8_aligned
63
64		/* Align dst */
65		load1b	ip
66		sub	len, len, #1
67		adcs	sum, sum, ip, put_byte_1	@ update checksum
68		strb	ip, [dst], #1
69		tst	len, #6
70		beq	.Lless8_byteonly
71
721:		load2b	r8, ip
73		sub	len, len, #2
74		adcs	sum, sum, r8, put_byte_0
75		strb	r8, [dst], #1
76		adcs	sum, sum, ip, put_byte_1
77		strb	ip, [dst], #1
78.Lless8_aligned:
79		tst	len, #6
80		bne	1b
81.Lless8_byteonly:
82		tst	len, #1
83		beq	.Ldone
84		load1b	r8
85		adcs	sum, sum, r8, put_byte_0	@ update checksum
86		strb	r8, [dst], #1
87		b	.Ldone
88
89FN_ENTRY
90		mov	ip, sp
91		save_regs
92		sub	fp, ip, #4
93
94		cmp	len, #8			@ Ensure that we have at least
95		blo	.Lless8			@ 8 bytes to copy.
96
97		adds	sum, sum, #0		@ C = 0
98		tst	dst, #3			@ Test destination alignment
99		blne	.Ldst_unaligned		@ align destination, return here
100
101		/*
102		 * Ok, the dst pointer is now 32bit aligned, and we know
103		 * that we must have more than 4 bytes to copy.  Note
104		 * that C contains the carry from the dst alignment above.
105		 */
106
107		tst	src, #3			@ Test source alignment
108		bne	.Lsrc_not_aligned
109
110		/* Routine for src & dst aligned */
111
112		bics	ip, len, #15
113		beq	2f
114
1151:		load4l	r4, r5, r6, r7
116		stmia	dst!, {r4, r5, r6, r7}
117		adcs	sum, sum, r4
118		adcs	sum, sum, r5
119		adcs	sum, sum, r6
120		adcs	sum, sum, r7
121		sub	ip, ip, #16
122		teq	ip, #0
123		bne	1b
124
1252:		ands	ip, len, #12
126		beq	4f
127		tst	ip, #8
128		beq	3f
129		load2l	r4, r5
130		stmia	dst!, {r4, r5}
131		adcs	sum, sum, r4
132		adcs	sum, sum, r5
133		tst	ip, #4
134		beq	4f
135
1363:		load1l	r4
137		str	r4, [dst], #4
138		adcs	sum, sum, r4
139
1404:		ands	len, len, #3
141		beq	.Ldone
142		load1l	r4
143		tst	len, #2
144		mov	r5, r4, get_byte_0
145		beq	.Lexit
146		adcs	sum, sum, r4, push #16
147		strb	r5, [dst], #1
148		mov	r5, r4, get_byte_1
149		strb	r5, [dst], #1
150		mov	r5, r4, get_byte_2
151.Lexit:		tst	len, #1
152		strneb	r5, [dst], #1
153		andne	r5, r5, #255
154		adcnes	sum, sum, r5, put_byte_0
155
156		/*
157		 * If the dst pointer was not 16-bit aligned, we
158		 * need to rotate the checksum here to get around
159		 * the inefficient byte manipulations in the
160		 * architecture independent code.
161		 */
162.Ldone:		adc	r0, sum, #0
163		ldr	sum, [sp, #0]		@ dst
164		tst	sum, #1
165		movne	r0, r0, ror #8
166		load_regs	ea
167
168.Lsrc_not_aligned:
169		adc	sum, sum, #0		@ include C from dst alignment
170		and	ip, src, #3
171		bic	src, src, #3
172		load1l	r5
173		cmp	ip, #2
174		beq	.Lsrc2_aligned
175		bhi	.Lsrc3_aligned
176		mov	r4, r5, pull #8		@ C = 0
177		bics	ip, len, #15
178		beq	2f
1791:		load4l	r5, r6, r7, r8
180		orr	r4, r4, r5, push #24
181		mov	r5, r5, pull #8
182		orr	r5, r5, r6, push #24
183		mov	r6, r6, pull #8
184		orr	r6, r6, r7, push #24
185		mov	r7, r7, pull #8
186		orr	r7, r7, r8, push #24
187		stmia	dst!, {r4, r5, r6, r7}
188		adcs	sum, sum, r4
189		adcs	sum, sum, r5
190		adcs	sum, sum, r6
191		adcs	sum, sum, r7
192		mov	r4, r8, pull #8
193		sub	ip, ip, #16
194		teq	ip, #0
195		bne	1b
1962:		ands	ip, len, #12
197		beq	4f
198		tst	ip, #8
199		beq	3f
200		load2l	r5, r6
201		orr	r4, r4, r5, push #24
202		mov	r5, r5, pull #8
203		orr	r5, r5, r6, push #24
204		stmia	dst!, {r4, r5}
205		adcs	sum, sum, r4
206		adcs	sum, sum, r5
207		mov	r4, r6, pull #8
208		tst	ip, #4
209		beq	4f
2103:		load1l	r5
211		orr	r4, r4, r5, push #24
212		str	r4, [dst], #4
213		adcs	sum, sum, r4
214		mov	r4, r5, pull #8
2154:		ands	len, len, #3
216		beq	.Ldone
217		mov	r5, r4, get_byte_0
218		tst	len, #2
219		beq	.Lexit
220		adcs	sum, sum, r4, push #16
221		strb	r5, [dst], #1
222		mov	r5, r4, get_byte_1
223		strb	r5, [dst], #1
224		mov	r5, r4, get_byte_2
225		b	.Lexit
226
227.Lsrc2_aligned:	mov	r4, r5, pull #16
228		adds	sum, sum, #0
229		bics	ip, len, #15
230		beq	2f
2311:		load4l	r5, r6, r7, r8
232		orr	r4, r4, r5, push #16
233		mov	r5, r5, pull #16
234		orr	r5, r5, r6, push #16
235		mov	r6, r6, pull #16
236		orr	r6, r6, r7, push #16
237		mov	r7, r7, pull #16
238		orr	r7, r7, r8, push #16
239		stmia	dst!, {r4, r5, r6, r7}
240		adcs	sum, sum, r4
241		adcs	sum, sum, r5
242		adcs	sum, sum, r6
243		adcs	sum, sum, r7
244		mov	r4, r8, pull #16
245		sub	ip, ip, #16
246		teq	ip, #0
247		bne	1b
2482:		ands	ip, len, #12
249		beq	4f
250		tst	ip, #8
251		beq	3f
252		load2l	r5, r6
253		orr	r4, r4, r5, push #16
254		mov	r5, r5, pull #16
255		orr	r5, r5, r6, push #16
256		stmia	dst!, {r4, r5}
257		adcs	sum, sum, r4
258		adcs	sum, sum, r5
259		mov	r4, r6, pull #16
260		tst	ip, #4
261		beq	4f
2623:		load1l	r5
263		orr	r4, r4, r5, push #16
264		str	r4, [dst], #4
265		adcs	sum, sum, r4
266		mov	r4, r5, pull #16
2674:		ands	len, len, #3
268		beq	.Ldone
269		mov	r5, r4, get_byte_0
270		tst	len, #2
271		beq	.Lexit
272		adcs	sum, sum, r4
273		strb	r5, [dst], #1
274		mov	r5, r4, get_byte_1
275		strb	r5, [dst], #1
276		tst	len, #1
277		beq	.Ldone
278		load1b	r5
279		b	.Lexit
280
281.Lsrc3_aligned:	mov	r4, r5, pull #24
282		adds	sum, sum, #0
283		bics	ip, len, #15
284		beq	2f
2851:		load4l	r5, r6, r7, r8
286		orr	r4, r4, r5, push #8
287		mov	r5, r5, pull #24
288		orr	r5, r5, r6, push #8
289		mov	r6, r6, pull #24
290		orr	r6, r6, r7, push #8
291		mov	r7, r7, pull #24
292		orr	r7, r7, r8, push #8
293		stmia	dst!, {r4, r5, r6, r7}
294		adcs	sum, sum, r4
295		adcs	sum, sum, r5
296		adcs	sum, sum, r6
297		adcs	sum, sum, r7
298		mov	r4, r8, pull #24
299		sub	ip, ip, #16
300		teq	ip, #0
301		bne	1b
3022:		ands	ip, len, #12
303		beq	4f
304		tst	ip, #8
305		beq	3f
306		load2l	r5, r6
307		orr	r4, r4, r5, push #8
308		mov	r5, r5, pull #24
309		orr	r5, r5, r6, push #8
310		stmia	dst!, {r4, r5}
311		adcs	sum, sum, r4
312		adcs	sum, sum, r5
313		mov	r4, r6, pull #24
314		tst	ip, #4
315		beq	4f
3163:		load1l	r5
317		orr	r4, r4, r5, push #8
318		str	r4, [dst], #4
319		adcs	sum, sum, r4
320		mov	r4, r5, pull #24
3214:		ands	len, len, #3
322		beq	.Ldone
323		mov	r5, r4, get_byte_0
324		tst	len, #2
325		beq	.Lexit
326		strb	r5, [dst], #1
327		adcs	sum, sum, r4
328		load1l	r4
329		mov	r5, r4, get_byte_0
330		strb	r5, [dst], #1
331		adcs	sum, sum, r4, push #24
332		mov	r5, r4, get_byte_1
333		b	.Lexit
334