xref: /openbmc/linux/arch/sh/lib/copy_page.S (revision 12eb4683)
1/*
2 * copy_page, __copy_user_page, __copy_user implementation of SuperH
3 *
4 * Copyright (C) 2001  Niibe Yutaka & Kaz Kojima
5 * Copyright (C) 2002  Toshinobu Sugioka
6 * Copyright (C) 2006  Paul Mundt
7 */
8#include <linux/linkage.h>
9#include <asm/page.h>
10
11/*
12 * copy_page
13 * @to: P1 address
14 * @from: P1 address
15 *
16 * void copy_page(void *to, void *from)
17 */
18
19/*
20 * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch
21 * r8 --- from + PAGE_SIZE
22 * r9 --- not used
23 * r10 --- to
24 * r11 --- from
25 */
26ENTRY(copy_page)
27	mov.l	r8,@-r15
28	mov.l	r10,@-r15
29	mov.l	r11,@-r15
30	mov	r4,r10
31	mov	r5,r11
32	mov	r5,r8
33	mov	#(PAGE_SIZE >> 10), r0
34	shll8	r0
35	shll2	r0
36	add	r0,r8
37	!
381:	mov.l	@r11+,r0
39	mov.l	@r11+,r1
40	mov.l	@r11+,r2
41	mov.l	@r11+,r3
42	mov.l	@r11+,r4
43	mov.l	@r11+,r5
44	mov.l	@r11+,r6
45	mov.l	@r11+,r7
46#if defined(CONFIG_CPU_SH4)
47	movca.l	r0,@r10
48#else
49	mov.l	r0,@r10
50#endif
51	add	#32,r10
52	mov.l	r7,@-r10
53	mov.l	r6,@-r10
54	mov.l	r5,@-r10
55	mov.l	r4,@-r10
56	mov.l	r3,@-r10
57	mov.l	r2,@-r10
58	mov.l	r1,@-r10
59	cmp/eq	r11,r8
60	bf/s	1b
61	 add	#28,r10
62	!
63	mov.l	@r15+,r11
64	mov.l	@r15+,r10
65	mov.l	@r15+,r8
66	rts
67	 nop
68
69/*
70 * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n);
71 * Return the number of bytes NOT copied
72 */
73#define EX(...)			\
74	9999: __VA_ARGS__ ;		\
75	.section __ex_table, "a";	\
76	.long 9999b, 6000f	;	\
77	.previous
78#define EX_NO_POP(...)			\
79	9999: __VA_ARGS__ ;		\
80	.section __ex_table, "a";	\
81	.long 9999b, 6005f	;	\
82	.previous
83ENTRY(__copy_user)
84	! Check if small number of bytes
85	mov	#11,r0
86	mov	r4,r3
87	cmp/gt	r0,r6		! r6 (len) > r0 (11)
88	bf/s	.L_cleanup_loop_no_pop
89	 add	r6,r3		! last destination address
90
91	! Calculate bytes needed to align to src
92	mov.l	r11,@-r15
93	neg	r5,r0
94	mov.l	r10,@-r15
95	add	#4,r0
96	mov.l	r9,@-r15
97	and	#3,r0
98	mov.l	r8,@-r15
99	tst	r0,r0
100	bt	2f
101
1021:
103	! Copy bytes to long word align src
104EX(	mov.b	@r5+,r1		)
105	dt	r0
106	add	#-1,r6
107EX(	mov.b	r1,@r4		)
108	bf/s	1b
109	 add	#1,r4
110
111	! Jump to appropriate routine depending on dest
1122:	mov	#3,r1
113	mov	r6, r2
114	and	r4,r1
115	shlr2	r2
116	shll2	r1
117	mova	.L_jump_tbl,r0
118	mov.l	@(r0,r1),r1
119	jmp	@r1
120	 nop
121
122	.align 2
123.L_jump_tbl:
124	.long	.L_dest00
125	.long	.L_dest01
126	.long	.L_dest10
127	.long	.L_dest11
128
129/*
130 * Come here if there are less than 12 bytes to copy
131 *
132 * Keep the branch target close, so the bf/s callee doesn't overflow
133 * and result in a more expensive branch being inserted. This is the
134 * fast-path for small copies, the jump via the jump table will hit the
135 * default slow-path cleanup. -PFM.
136 */
137.L_cleanup_loop_no_pop:
138	tst	r6,r6		! Check explicitly for zero
139	bt	1f
140
1412:
142EX_NO_POP(	mov.b	@r5+,r0		)
143	dt	r6
144EX_NO_POP(	mov.b	r0,@r4		)
145	bf/s	2b
146	 add	#1,r4
147
1481:	mov	#0,r0		! normal return
1495000:
150
151# Exception handler:
152.section .fixup, "ax"
1536005:
154	mov.l	8000f,r1
155	mov	r3,r0
156	jmp	@r1
157	 sub	r4,r0
158	.align	2
1598000:	.long	5000b
160
161.previous
162	rts
163	 nop
164
165! Destination = 00
166
167.L_dest00:
168	! Skip the large copy for small transfers
169	mov	#(32+32-4), r0
170	cmp/gt	r6, r0		! r0 (60) > r6 (len)
171	bt	1f
172
173	! Align dest to a 32 byte boundary
174	neg	r4,r0
175	add	#0x20, r0
176	and	#0x1f, r0
177	tst	r0, r0
178	bt	2f
179
180	sub	r0, r6
181	shlr2	r0
1823:
183EX(	mov.l	@r5+,r1		)
184	dt	r0
185EX(	mov.l	r1,@r4		)
186	bf/s	3b
187	 add	#4,r4
188
1892:
190EX(	mov.l	@r5+,r0		)
191EX(	mov.l	@r5+,r1		)
192EX(	mov.l	@r5+,r2		)
193EX(	mov.l	@r5+,r7		)
194EX(	mov.l	@r5+,r8		)
195EX(	mov.l	@r5+,r9		)
196EX(	mov.l	@r5+,r10	)
197EX(	mov.l	@r5+,r11	)
198#ifdef CONFIG_CPU_SH4
199EX(	movca.l	r0,@r4		)
200#else
201EX(	mov.l	r0,@r4		)
202#endif
203	add	#-32, r6
204EX(	mov.l	r1,@(4,r4)	)
205	mov	#32, r0
206EX(	mov.l	r2,@(8,r4)	)
207	cmp/gt	r6, r0		! r0 (32) > r6 (len)
208EX(	mov.l	r7,@(12,r4)	)
209EX(	mov.l	r8,@(16,r4)	)
210EX(	mov.l	r9,@(20,r4)	)
211EX(	mov.l	r10,@(24,r4)	)
212EX(	mov.l	r11,@(28,r4)	)
213	bf/s	2b
214	 add	#32,r4
215
2161:	mov	r6, r0
217	shlr2	r0
218	tst	r0, r0
219	bt	.L_cleanup
2201:
221EX(	mov.l	@r5+,r1		)
222	dt	r0
223EX(	mov.l	r1,@r4		)
224	bf/s	1b
225	 add	#4,r4
226
227	bra	.L_cleanup
228	 nop
229
230! Destination = 10
231
232.L_dest10:
233	mov	r2,r7
234	shlr2	r7
235	shlr	r7
236	tst	r7,r7
237	mov	#7,r0
238	bt/s	1f
239	 and	r0,r2
2402:
241	dt	r7
242#ifdef CONFIG_CPU_LITTLE_ENDIAN
243EX(	mov.l	@r5+,r0		)
244EX(	mov.l	@r5+,r1		)
245EX(	mov.l	@r5+,r8		)
246EX(	mov.l	@r5+,r9		)
247EX(	mov.l	@r5+,r10	)
248EX(	mov.w	r0,@r4		)
249	add	#2,r4
250	xtrct	r1,r0
251	xtrct	r8,r1
252	xtrct	r9,r8
253	xtrct	r10,r9
254
255EX(	mov.l	r0,@r4		)
256EX(	mov.l	r1,@(4,r4)	)
257EX(	mov.l	r8,@(8,r4)	)
258EX(	mov.l	r9,@(12,r4)	)
259
260EX(	mov.l	@r5+,r1		)
261EX(	mov.l	@r5+,r8		)
262EX(	mov.l	@r5+,r0		)
263	xtrct	r1,r10
264	xtrct	r8,r1
265	xtrct	r0,r8
266	shlr16	r0
267EX(	mov.l	r10,@(16,r4)	)
268EX(	mov.l	r1,@(20,r4)	)
269EX(	mov.l	r8,@(24,r4)	)
270EX(	mov.w	r0,@(28,r4)	)
271	bf/s	2b
272	 add	#30,r4
273#else
274EX(	mov.l	@(28,r5),r0	)
275EX(	mov.l	@(24,r5),r8	)
276EX(	mov.l	@(20,r5),r9	)
277EX(	mov.l	@(16,r5),r10	)
278EX(	mov.w	r0,@(30,r4)	)
279	add	#-2,r4
280	xtrct	r8,r0
281	xtrct	r9,r8
282	xtrct	r10,r9
283EX(	mov.l	r0,@(28,r4)	)
284EX(	mov.l	r8,@(24,r4)	)
285EX(	mov.l	r9,@(20,r4)	)
286
287EX(	mov.l	@(12,r5),r0	)
288EX(	mov.l	@(8,r5),r8	)
289	xtrct	r0,r10
290EX(	mov.l	@(4,r5),r9	)
291	mov.l	r10,@(16,r4)
292EX(	mov.l	@r5,r10		)
293	xtrct	r8,r0
294	xtrct	r9,r8
295	xtrct	r10,r9
296EX(	mov.l	r0,@(12,r4)	)
297EX(	mov.l	r8,@(8,r4)	)
298	swap.w	r10,r0
299EX(	mov.l	r9,@(4,r4)	)
300EX(	mov.w	r0,@(2,r4)	)
301
302	add	#32,r5
303	bf/s	2b
304	 add	#34,r4
305#endif
306	tst	r2,r2
307	bt	.L_cleanup
308
3091:	! Read longword, write two words per iteration
310EX(	mov.l	@r5+,r0		)
311	dt	r2
312#ifdef CONFIG_CPU_LITTLE_ENDIAN
313EX(	mov.w	r0,@r4		)
314	shlr16	r0
315EX(	mov.w 	r0,@(2,r4)	)
316#else
317EX(	mov.w	r0,@(2,r4)	)
318	shlr16	r0
319EX(	mov.w	r0,@r4		)
320#endif
321	bf/s	1b
322	 add	#4,r4
323
324	bra	.L_cleanup
325	 nop
326
327! Destination = 01 or 11
328
329.L_dest01:
330.L_dest11:
331	! Read longword, write byte, word, byte per iteration
332EX(	mov.l	@r5+,r0		)
333	dt	r2
334#ifdef CONFIG_CPU_LITTLE_ENDIAN
335EX(	mov.b	r0,@r4		)
336	shlr8	r0
337	add	#1,r4
338EX(	mov.w	r0,@r4		)
339	shlr16	r0
340EX(	mov.b	r0,@(2,r4)	)
341	bf/s	.L_dest01
342	 add	#3,r4
343#else
344EX(	mov.b	r0,@(3,r4)	)
345	shlr8	r0
346	swap.w	r0,r7
347EX(	mov.b	r7,@r4		)
348	add	#1,r4
349EX(	mov.w	r0,@r4		)
350	bf/s	.L_dest01
351	 add	#3,r4
352#endif
353
354! Cleanup last few bytes
355.L_cleanup:
356	mov	r6,r0
357	and	#3,r0
358	tst	r0,r0
359	bt	.L_exit
360	mov	r0,r6
361
362.L_cleanup_loop:
363EX(	mov.b	@r5+,r0		)
364	dt	r6
365EX(	mov.b	r0,@r4		)
366	bf/s	.L_cleanup_loop
367	 add	#1,r4
368
369.L_exit:
370	mov	#0,r0		! normal return
371
3725000:
373
374# Exception handler:
375.section .fixup, "ax"
3766000:
377	mov.l	8000f,r1
378	mov	r3,r0
379	jmp	@r1
380	 sub	r4,r0
381	.align	2
3828000:	.long	5000b
383
384.previous
385	mov.l	@r15+,r8
386	mov.l	@r15+,r9
387	mov.l	@r15+,r10
388	rts
389	 mov.l	@r15+,r11
390