xref: /openbmc/linux/arch/sh/lib/copy_page.S (revision 05cf4fe738242183f1237f1b3a28b4479348c0a1)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * copy_page, __copy_user_page, __copy_user implementation of SuperH
4 *
5 * Copyright (C) 2001  Niibe Yutaka & Kaz Kojima
6 * Copyright (C) 2002  Toshinobu Sugioka
7 * Copyright (C) 2006  Paul Mundt
8 */
9#include <linux/linkage.h>
10#include <asm/page.h>
11
12/*
13 * copy_page
14 * @to: P1 address
15 * @from: P1 address
16 *
17 * void copy_page(void *to, void *from)
18 */
19
20/*
21 * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch
22 * r8 --- from + PAGE_SIZE
23 * r9 --- not used
24 * r10 --- to
25 * r11 --- from
26 */
27ENTRY(copy_page)
28	mov.l	r8,@-r15
29	mov.l	r10,@-r15
30	mov.l	r11,@-r15
31	mov	r4,r10
32	mov	r5,r11
33	mov	r5,r8
34	mov	#(PAGE_SIZE >> 10), r0
35	shll8	r0
36	shll2	r0
37	add	r0,r8
38	!
391:	mov.l	@r11+,r0
40	mov.l	@r11+,r1
41	mov.l	@r11+,r2
42	mov.l	@r11+,r3
43	mov.l	@r11+,r4
44	mov.l	@r11+,r5
45	mov.l	@r11+,r6
46	mov.l	@r11+,r7
47#if defined(CONFIG_CPU_SH4)
48	movca.l	r0,@r10
49#else
50	mov.l	r0,@r10
51#endif
52	add	#32,r10
53	mov.l	r7,@-r10
54	mov.l	r6,@-r10
55	mov.l	r5,@-r10
56	mov.l	r4,@-r10
57	mov.l	r3,@-r10
58	mov.l	r2,@-r10
59	mov.l	r1,@-r10
60	cmp/eq	r11,r8
61	bf/s	1b
62	 add	#28,r10
63	!
64	mov.l	@r15+,r11
65	mov.l	@r15+,r10
66	mov.l	@r15+,r8
67	rts
68	 nop
69
70/*
71 * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n);
72 * Return the number of bytes NOT copied
73 */
74#define EX(...)			\
75	9999: __VA_ARGS__ ;		\
76	.section __ex_table, "a";	\
77	.long 9999b, 6000f	;	\
78	.previous
79#define EX_NO_POP(...)			\
80	9999: __VA_ARGS__ ;		\
81	.section __ex_table, "a";	\
82	.long 9999b, 6005f	;	\
83	.previous
84ENTRY(__copy_user)
85	! Check if small number of bytes
86	mov	#11,r0
87	mov	r4,r3
88	cmp/gt	r0,r6		! r6 (len) > r0 (11)
89	bf/s	.L_cleanup_loop_no_pop
90	 add	r6,r3		! last destination address
91
92	! Calculate bytes needed to align to src
93	mov.l	r11,@-r15
94	neg	r5,r0
95	mov.l	r10,@-r15
96	add	#4,r0
97	mov.l	r9,@-r15
98	and	#3,r0
99	mov.l	r8,@-r15
100	tst	r0,r0
101	bt	2f
102
1031:
104	! Copy bytes to long word align src
105EX(	mov.b	@r5+,r1		)
106	dt	r0
107	add	#-1,r6
108EX(	mov.b	r1,@r4		)
109	bf/s	1b
110	 add	#1,r4
111
112	! Jump to appropriate routine depending on dest
1132:	mov	#3,r1
114	mov	r6, r2
115	and	r4,r1
116	shlr2	r2
117	shll2	r1
118	mova	.L_jump_tbl,r0
119	mov.l	@(r0,r1),r1
120	jmp	@r1
121	 nop
122
123	.align 2
124.L_jump_tbl:
125	.long	.L_dest00
126	.long	.L_dest01
127	.long	.L_dest10
128	.long	.L_dest11
129
130/*
131 * Come here if there are less than 12 bytes to copy
132 *
133 * Keep the branch target close, so the bf/s callee doesn't overflow
134 * and result in a more expensive branch being inserted. This is the
135 * fast-path for small copies, the jump via the jump table will hit the
136 * default slow-path cleanup. -PFM.
137 */
138.L_cleanup_loop_no_pop:
139	tst	r6,r6		! Check explicitly for zero
140	bt	1f
141
1422:
143EX_NO_POP(	mov.b	@r5+,r0		)
144	dt	r6
145EX_NO_POP(	mov.b	r0,@r4		)
146	bf/s	2b
147	 add	#1,r4
148
1491:	mov	#0,r0		! normal return
1505000:
151
152# Exception handler:
153.section .fixup, "ax"
1546005:
155	mov.l	8000f,r1
156	mov	r3,r0
157	jmp	@r1
158	 sub	r4,r0
159	.align	2
1608000:	.long	5000b
161
162.previous
163	rts
164	 nop
165
166! Destination = 00
167
168.L_dest00:
169	! Skip the large copy for small transfers
170	mov	#(32+32-4), r0
171	cmp/gt	r6, r0		! r0 (60) > r6 (len)
172	bt	1f
173
174	! Align dest to a 32 byte boundary
175	neg	r4,r0
176	add	#0x20, r0
177	and	#0x1f, r0
178	tst	r0, r0
179	bt	2f
180
181	sub	r0, r6
182	shlr2	r0
1833:
184EX(	mov.l	@r5+,r1		)
185	dt	r0
186EX(	mov.l	r1,@r4		)
187	bf/s	3b
188	 add	#4,r4
189
1902:
191EX(	mov.l	@r5+,r0		)
192EX(	mov.l	@r5+,r1		)
193EX(	mov.l	@r5+,r2		)
194EX(	mov.l	@r5+,r7		)
195EX(	mov.l	@r5+,r8		)
196EX(	mov.l	@r5+,r9		)
197EX(	mov.l	@r5+,r10	)
198EX(	mov.l	@r5+,r11	)
199#ifdef CONFIG_CPU_SH4
200EX(	movca.l	r0,@r4		)
201#else
202EX(	mov.l	r0,@r4		)
203#endif
204	add	#-32, r6
205EX(	mov.l	r1,@(4,r4)	)
206	mov	#32, r0
207EX(	mov.l	r2,@(8,r4)	)
208	cmp/gt	r6, r0		! r0 (32) > r6 (len)
209EX(	mov.l	r7,@(12,r4)	)
210EX(	mov.l	r8,@(16,r4)	)
211EX(	mov.l	r9,@(20,r4)	)
212EX(	mov.l	r10,@(24,r4)	)
213EX(	mov.l	r11,@(28,r4)	)
214	bf/s	2b
215	 add	#32,r4
216
2171:	mov	r6, r0
218	shlr2	r0
219	tst	r0, r0
220	bt	.L_cleanup
2211:
222EX(	mov.l	@r5+,r1		)
223	dt	r0
224EX(	mov.l	r1,@r4		)
225	bf/s	1b
226	 add	#4,r4
227
228	bra	.L_cleanup
229	 nop
230
231! Destination = 10
232
233.L_dest10:
234	mov	r2,r7
235	shlr2	r7
236	shlr	r7
237	tst	r7,r7
238	mov	#7,r0
239	bt/s	1f
240	 and	r0,r2
2412:
242	dt	r7
243#ifdef CONFIG_CPU_LITTLE_ENDIAN
244EX(	mov.l	@r5+,r0		)
245EX(	mov.l	@r5+,r1		)
246EX(	mov.l	@r5+,r8		)
247EX(	mov.l	@r5+,r9		)
248EX(	mov.l	@r5+,r10	)
249EX(	mov.w	r0,@r4		)
250	add	#2,r4
251	xtrct	r1,r0
252	xtrct	r8,r1
253	xtrct	r9,r8
254	xtrct	r10,r9
255
256EX(	mov.l	r0,@r4		)
257EX(	mov.l	r1,@(4,r4)	)
258EX(	mov.l	r8,@(8,r4)	)
259EX(	mov.l	r9,@(12,r4)	)
260
261EX(	mov.l	@r5+,r1		)
262EX(	mov.l	@r5+,r8		)
263EX(	mov.l	@r5+,r0		)
264	xtrct	r1,r10
265	xtrct	r8,r1
266	xtrct	r0,r8
267	shlr16	r0
268EX(	mov.l	r10,@(16,r4)	)
269EX(	mov.l	r1,@(20,r4)	)
270EX(	mov.l	r8,@(24,r4)	)
271EX(	mov.w	r0,@(28,r4)	)
272	bf/s	2b
273	 add	#30,r4
274#else
275EX(	mov.l	@(28,r5),r0	)
276EX(	mov.l	@(24,r5),r8	)
277EX(	mov.l	@(20,r5),r9	)
278EX(	mov.l	@(16,r5),r10	)
279EX(	mov.w	r0,@(30,r4)	)
280	add	#-2,r4
281	xtrct	r8,r0
282	xtrct	r9,r8
283	xtrct	r10,r9
284EX(	mov.l	r0,@(28,r4)	)
285EX(	mov.l	r8,@(24,r4)	)
286EX(	mov.l	r9,@(20,r4)	)
287
288EX(	mov.l	@(12,r5),r0	)
289EX(	mov.l	@(8,r5),r8	)
290	xtrct	r0,r10
291EX(	mov.l	@(4,r5),r9	)
292	mov.l	r10,@(16,r4)
293EX(	mov.l	@r5,r10		)
294	xtrct	r8,r0
295	xtrct	r9,r8
296	xtrct	r10,r9
297EX(	mov.l	r0,@(12,r4)	)
298EX(	mov.l	r8,@(8,r4)	)
299	swap.w	r10,r0
300EX(	mov.l	r9,@(4,r4)	)
301EX(	mov.w	r0,@(2,r4)	)
302
303	add	#32,r5
304	bf/s	2b
305	 add	#34,r4
306#endif
307	tst	r2,r2
308	bt	.L_cleanup
309
3101:	! Read longword, write two words per iteration
311EX(	mov.l	@r5+,r0		)
312	dt	r2
313#ifdef CONFIG_CPU_LITTLE_ENDIAN
314EX(	mov.w	r0,@r4		)
315	shlr16	r0
316EX(	mov.w 	r0,@(2,r4)	)
317#else
318EX(	mov.w	r0,@(2,r4)	)
319	shlr16	r0
320EX(	mov.w	r0,@r4		)
321#endif
322	bf/s	1b
323	 add	#4,r4
324
325	bra	.L_cleanup
326	 nop
327
328! Destination = 01 or 11
329
330.L_dest01:
331.L_dest11:
332	! Read longword, write byte, word, byte per iteration
333EX(	mov.l	@r5+,r0		)
334	dt	r2
335#ifdef CONFIG_CPU_LITTLE_ENDIAN
336EX(	mov.b	r0,@r4		)
337	shlr8	r0
338	add	#1,r4
339EX(	mov.w	r0,@r4		)
340	shlr16	r0
341EX(	mov.b	r0,@(2,r4)	)
342	bf/s	.L_dest01
343	 add	#3,r4
344#else
345EX(	mov.b	r0,@(3,r4)	)
346	shlr8	r0
347	swap.w	r0,r7
348EX(	mov.b	r7,@r4		)
349	add	#1,r4
350EX(	mov.w	r0,@r4		)
351	bf/s	.L_dest01
352	 add	#3,r4
353#endif
354
355! Cleanup last few bytes
356.L_cleanup:
357	mov	r6,r0
358	and	#3,r0
359	tst	r0,r0
360	bt	.L_exit
361	mov	r0,r6
362
363.L_cleanup_loop:
364EX(	mov.b	@r5+,r0		)
365	dt	r6
366EX(	mov.b	r0,@r4		)
367	bf/s	.L_cleanup_loop
368	 add	#1,r4
369
370.L_exit:
371	mov	#0,r0		! normal return
372
3735000:
374
375# Exception handler:
376.section .fixup, "ax"
3776000:
378	mov.l	8000f,r1
379	mov	r3,r0
380	jmp	@r1
381	 sub	r4,r0
382	.align	2
3838000:	.long	5000b
384
385.previous
386	mov.l	@r15+,r8
387	mov.l	@r15+,r9
388	mov.l	@r15+,r10
389	rts
390	 mov.l	@r15+,r11
391