xref: /openbmc/linux/arch/sh/lib/copy_page.S (revision b627b4ed)
1/*
2 * copy_page, __copy_user_page, __copy_user implementation of SuperH
3 *
4 * Copyright (C) 2001  Niibe Yutaka & Kaz Kojima
5 * Copyright (C) 2002  Toshinobu Sugioka
6 * Copyright (C) 2006  Paul Mundt
7 */
8#include <linux/linkage.h>
9#include <asm/page.h>
10
11/*
12 * copy_page
13 * @to: P1 address
14 * @from: P1 address
15 *
16 * void copy_page(void *to, void *from)
17 */
18
19/*
20 * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch
21 * r8 --- from + PAGE_SIZE
22 * r9 --- not used
23 * r10 --- to
24 * r11 --- from
25 */
26ENTRY(copy_page)
27	mov.l	r8,@-r15
28	mov.l	r10,@-r15
29	mov.l	r11,@-r15
30	mov	r4,r10
31	mov	r5,r11
32	mov	r5,r8
33	mov.l	.Lpsz,r0
34	add	r0,r8
35	!
361:	mov.l	@r11+,r0
37	mov.l	@r11+,r1
38	mov.l	@r11+,r2
39	mov.l	@r11+,r3
40	mov.l	@r11+,r4
41	mov.l	@r11+,r5
42	mov.l	@r11+,r6
43	mov.l	@r11+,r7
44#if defined(CONFIG_CPU_SH4)
45	movca.l	r0,@r10
46	mov	r10,r0
47#else
48	mov.l	r0,@r10
49#endif
50	add	#32,r10
51	mov.l	r7,@-r10
52	mov.l	r6,@-r10
53	mov.l	r5,@-r10
54	mov.l	r4,@-r10
55	mov.l	r3,@-r10
56	mov.l	r2,@-r10
57	mov.l	r1,@-r10
58#if defined(CONFIG_CPU_SH4)
59	ocbwb	@r0
60#endif
61	cmp/eq	r11,r8
62	bf/s	1b
63	 add	#28,r10
64	!
65	mov.l	@r15+,r11
66	mov.l	@r15+,r10
67	mov.l	@r15+,r8
68	rts
69	 nop
70
71	.balign 4
72.Lpsz:	.long	PAGE_SIZE
73
74/*
75 * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n);
76 * Return the number of bytes NOT copied
77 */
78#define EX(...)			\
79	9999: __VA_ARGS__ ;		\
80	.section __ex_table, "a";	\
81	.long 9999b, 6000f	;	\
82	.previous
83#define EX_NO_POP(...)			\
84	9999: __VA_ARGS__ ;		\
85	.section __ex_table, "a";	\
86	.long 9999b, 6005f	;	\
87	.previous
88ENTRY(__copy_user)
89	! Check if small number of bytes
90	mov	#11,r0
91	mov	r4,r3
92	cmp/gt	r0,r6		! r6 (len) > r0 (11)
93	bf/s	.L_cleanup_loop_no_pop
94	 add	r6,r3		! last destination address
95
96	! Calculate bytes needed to align to src
97	mov.l	r11,@-r15
98	neg	r5,r0
99	mov.l	r10,@-r15
100	add	#4,r0
101	mov.l	r9,@-r15
102	and	#3,r0
103	mov.l	r8,@-r15
104	tst	r0,r0
105	bt	2f
106
1071:
108	! Copy bytes to long word align src
109EX(	mov.b	@r5+,r1		)
110	dt	r0
111	add	#-1,r6
112EX(	mov.b	r1,@r4		)
113	bf/s	1b
114	 add	#1,r4
115
116	! Jump to appropriate routine depending on dest
1172:	mov	#3,r1
118	mov	r6, r2
119	and	r4,r1
120	shlr2	r2
121	shll2	r1
122	mova	.L_jump_tbl,r0
123	mov.l	@(r0,r1),r1
124	jmp	@r1
125	 nop
126
127	.align 2
128.L_jump_tbl:
129	.long	.L_dest00
130	.long	.L_dest01
131	.long	.L_dest10
132	.long	.L_dest11
133
134/*
135 * Come here if there are less than 12 bytes to copy
136 *
137 * Keep the branch target close, so the bf/s callee doesn't overflow
138 * and result in a more expensive branch being inserted. This is the
139 * fast-path for small copies, the jump via the jump table will hit the
140 * default slow-path cleanup. -PFM.
141 */
142.L_cleanup_loop_no_pop:
143	tst	r6,r6		! Check explicitly for zero
144	bt	1f
145
1462:
147EX_NO_POP(	mov.b	@r5+,r0		)
148	dt	r6
149EX_NO_POP(	mov.b	r0,@r4		)
150	bf/s	2b
151	 add	#1,r4
152
1531:	mov	#0,r0		! normal return
1545000:
155
156# Exception handler:
157.section .fixup, "ax"
1586005:
159	mov.l	8000f,r1
160	mov	r3,r0
161	jmp	@r1
162	 sub	r4,r0
163	.align	2
1648000:	.long	5000b
165
166.previous
167	rts
168	 nop
169
170! Destination = 00
171
172.L_dest00:
173	! Skip the large copy for small transfers
174	mov	#(32+32-4), r0
175	cmp/gt	r6, r0		! r0 (60) > r6 (len)
176	bt	1f
177
178	! Align dest to a 32 byte boundary
179	neg	r4,r0
180	add	#0x20, r0
181	and	#0x1f, r0
182	tst	r0, r0
183	bt	2f
184
185	sub	r0, r6
186	shlr2	r0
1873:
188EX(	mov.l	@r5+,r1		)
189	dt	r0
190EX(	mov.l	r1,@r4		)
191	bf/s	3b
192	 add	#4,r4
193
1942:
195EX(	mov.l	@r5+,r0		)
196EX(	mov.l	@r5+,r1		)
197EX(	mov.l	@r5+,r2		)
198EX(	mov.l	@r5+,r7		)
199EX(	mov.l	@r5+,r8		)
200EX(	mov.l	@r5+,r9		)
201EX(	mov.l	@r5+,r10	)
202EX(	mov.l	@r5+,r11	)
203#ifdef CONFIG_CPU_SH4
204EX(	movca.l	r0,@r4		)
205#else
206EX(	mov.l	r0,@r4		)
207#endif
208	add	#-32, r6
209EX(	mov.l	r1,@(4,r4)	)
210	mov	#32, r0
211EX(	mov.l	r2,@(8,r4)	)
212	cmp/gt	r6, r0		! r0 (32) > r6 (len)
213EX(	mov.l	r7,@(12,r4)	)
214EX(	mov.l	r8,@(16,r4)	)
215EX(	mov.l	r9,@(20,r4)	)
216EX(	mov.l	r10,@(24,r4)	)
217EX(	mov.l	r11,@(28,r4)	)
218	bf/s	2b
219	 add	#32,r4
220
2211:	mov	r6, r0
222	shlr2	r0
223	tst	r0, r0
224	bt	.L_cleanup
2251:
226EX(	mov.l	@r5+,r1		)
227	dt	r0
228EX(	mov.l	r1,@r4		)
229	bf/s	1b
230	 add	#4,r4
231
232	bra	.L_cleanup
233	 nop
234
235! Destination = 10
236
237.L_dest10:
238	mov	r2,r7
239	shlr2	r7
240	shlr	r7
241	tst	r7,r7
242	mov	#7,r0
243	bt/s	1f
244	 and	r0,r2
2452:
246	dt	r7
247#ifdef CONFIG_CPU_LITTLE_ENDIAN
248EX(	mov.l	@r5+,r0		)
249EX(	mov.l	@r5+,r1		)
250EX(	mov.l	@r5+,r8		)
251EX(	mov.l	@r5+,r9		)
252EX(	mov.l	@r5+,r10	)
253EX(	mov.w	r0,@r4		)
254	add	#2,r4
255	xtrct	r1,r0
256	xtrct	r8,r1
257	xtrct	r9,r8
258	xtrct	r10,r9
259
260EX(	mov.l	r0,@r4		)
261EX(	mov.l	r1,@(4,r4)	)
262EX(	mov.l	r8,@(8,r4)	)
263EX(	mov.l	r9,@(12,r4)	)
264
265EX(	mov.l	@r5+,r1		)
266EX(	mov.l	@r5+,r8		)
267EX(	mov.l	@r5+,r0		)
268	xtrct	r1,r10
269	xtrct	r8,r1
270	xtrct	r0,r8
271	shlr16	r0
272EX(	mov.l	r10,@(16,r4)	)
273EX(	mov.l	r1,@(20,r4)	)
274EX(	mov.l	r8,@(24,r4)	)
275EX(	mov.w	r0,@(28,r4)	)
276	bf/s	2b
277	 add	#30,r4
278#else
279EX(	mov.l	@(28,r5),r0	)
280EX(	mov.l	@(24,r5),r8	)
281EX(	mov.l	@(20,r5),r9	)
282EX(	mov.l	@(16,r5),r10	)
283EX(	mov.w	r0,@(30,r4)	)
284	add	#-2,r4
285	xtrct	r8,r0
286	xtrct	r9,r8
287	xtrct	r10,r9
288EX(	mov.l	r0,@(28,r4)	)
289EX(	mov.l	r8,@(24,r4)	)
290EX(	mov.l	r9,@(20,r4)	)
291
292EX(	mov.l	@(12,r5),r0	)
293EX(	mov.l	@(8,r5),r8	)
294	xtrct	r0,r10
295EX(	mov.l	@(4,r5),r9	)
296	mov.l	r10,@(16,r4)
297EX(	mov.l	@r5,r10		)
298	xtrct	r8,r0
299	xtrct	r9,r8
300	xtrct	r10,r9
301EX(	mov.l	r0,@(12,r4)	)
302EX(	mov.l	r8,@(8,r4)	)
303	swap.w	r10,r0
304EX(	mov.l	r9,@(4,r4)	)
305EX(	mov.w	r0,@(2,r4)	)
306
307	add	#32,r5
308	bf/s	2b
309	 add	#34,r4
310#endif
311	tst	r2,r2
312	bt	.L_cleanup
313
3141:	! Read longword, write two words per iteration
315EX(	mov.l	@r5+,r0		)
316	dt	r2
317#ifdef CONFIG_CPU_LITTLE_ENDIAN
318EX(	mov.w	r0,@r4		)
319	shlr16	r0
320EX(	mov.w 	r0,@(2,r4)	)
321#else
322EX(	mov.w	r0,@(2,r4)	)
323	shlr16	r0
324EX(	mov.w	r0,@r4		)
325#endif
326	bf/s	1b
327	 add	#4,r4
328
329	bra	.L_cleanup
330	 nop
331
332! Destination = 01 or 11
333
334.L_dest01:
335.L_dest11:
336	! Read longword, write byte, word, byte per iteration
337EX(	mov.l	@r5+,r0		)
338	dt	r2
339#ifdef CONFIG_CPU_LITTLE_ENDIAN
340EX(	mov.b	r0,@r4		)
341	shlr8	r0
342	add	#1,r4
343EX(	mov.w	r0,@r4		)
344	shlr16	r0
345EX(	mov.b	r0,@(2,r4)	)
346	bf/s	.L_dest01
347	 add	#3,r4
348#else
349EX(	mov.b	r0,@(3,r4)	)
350	shlr8	r0
351	swap.w	r0,r7
352EX(	mov.b	r7,@r4		)
353	add	#1,r4
354EX(	mov.w	r0,@r4		)
355	bf/s	.L_dest01
356	 add	#3,r4
357#endif
358
359! Cleanup last few bytes
360.L_cleanup:
361	mov	r6,r0
362	and	#3,r0
363	tst	r0,r0
364	bt	.L_exit
365	mov	r0,r6
366
367.L_cleanup_loop:
368EX(	mov.b	@r5+,r0		)
369	dt	r6
370EX(	mov.b	r0,@r4		)
371	bf/s	.L_cleanup_loop
372	 add	#1,r4
373
374.L_exit:
375	mov	#0,r0		! normal return
376
3775000:
378
379# Exception handler:
380.section .fixup, "ax"
3816000:
382	mov.l	8000f,r1
383	mov	r3,r0
384	jmp	@r1
385	 sub	r4,r0
386	.align	2
3878000:	.long	5000b
388
389.previous
390	mov.l	@r15+,r8
391	mov.l	@r15+,r9
392	mov.l	@r15+,r10
393	rts
394	 mov.l	@r15+,r11
395