xref: /openbmc/linux/arch/sparc/lib/copy_user.S (revision 8cb5d748)
1/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code.
2 *
3 *  Copyright(C) 1995 Linus Torvalds
4 *  Copyright(C) 1996 David S. Miller
5 *  Copyright(C) 1996 Eddie C. Dost
6 *  Copyright(C) 1996,1998 Jakub Jelinek
7 *
8 * derived from:
9 *	e-mail between David and Eddie.
10 *
11 * Returns 0 if successful, otherwise count of bytes not copied yet
12 */
13
14#include <asm/ptrace.h>
15#include <asm/asmmacro.h>
16#include <asm/page.h>
17#include <asm/thread_info.h>
18#include <asm/export.h>
19
20/* Work around cpp -rob */
21#define ALLOC #alloc
22#define EXECINSTR #execinstr
23#define EX(x,y,a,b) 				\
2498: 	x,y;					\
25	.section .fixup,ALLOC,EXECINSTR;	\
26	.align	4;				\
2799:	ba fixupretl;				\
28	 a, b, %g3;				\
29	.section __ex_table,ALLOC;		\
30	.align	4;				\
31	.word	98b, 99b;			\
32	.text;					\
33	.align	4
34
35#define EX2(x,y,c,d,e,a,b) 			\
3698: 	x,y;					\
37	.section .fixup,ALLOC,EXECINSTR;	\
38	.align	4;				\
3999:	c, d, e;				\
40	ba fixupretl;				\
41	 a, b, %g3;				\
42	.section __ex_table,ALLOC;		\
43	.align	4;				\
44	.word	98b, 99b;			\
45	.text;					\
46	.align	4
47
48#define EXO2(x,y) 				\
4998: 	x, y;					\
50	.section __ex_table,ALLOC;		\
51	.align	4;				\
52	.word	98b, 97f;			\
53	.text;					\
54	.align	4
55
56#define EXT(start,end,handler)			\
57	.section __ex_table,ALLOC;		\
58	.align	4;				\
59	.word	start, 0, end, handler;		\
60	.text;					\
61	.align	4
62
63/* Please do not change following macros unless you change logic used
64 * in .fixup at the end of this file as well
65 */
66
67/* Both these macros have to start with exactly the same insn */
68#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
69	ldd	[%src + (offset) + 0x00], %t0; \
70	ldd	[%src + (offset) + 0x08], %t2; \
71	ldd	[%src + (offset) + 0x10], %t4; \
72	ldd	[%src + (offset) + 0x18], %t6; \
73	st	%t0, [%dst + (offset) + 0x00]; \
74	st	%t1, [%dst + (offset) + 0x04]; \
75	st	%t2, [%dst + (offset) + 0x08]; \
76	st	%t3, [%dst + (offset) + 0x0c]; \
77	st	%t4, [%dst + (offset) + 0x10]; \
78	st	%t5, [%dst + (offset) + 0x14]; \
79	st	%t6, [%dst + (offset) + 0x18]; \
80	st	%t7, [%dst + (offset) + 0x1c];
81
82#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
83	ldd	[%src + (offset) + 0x00], %t0; \
84	ldd	[%src + (offset) + 0x08], %t2; \
85	ldd	[%src + (offset) + 0x10], %t4; \
86	ldd	[%src + (offset) + 0x18], %t6; \
87	std	%t0, [%dst + (offset) + 0x00]; \
88	std	%t2, [%dst + (offset) + 0x08]; \
89	std	%t4, [%dst + (offset) + 0x10]; \
90	std	%t6, [%dst + (offset) + 0x18];
91
92#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
93	ldd	[%src - (offset) - 0x10], %t0; \
94	ldd	[%src - (offset) - 0x08], %t2; \
95	st	%t0, [%dst - (offset) - 0x10]; \
96	st	%t1, [%dst - (offset) - 0x0c]; \
97	st	%t2, [%dst - (offset) - 0x08]; \
98	st	%t3, [%dst - (offset) - 0x04];
99
100#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
101	lduh	[%src + (offset) + 0x00], %t0; \
102	lduh	[%src + (offset) + 0x02], %t1; \
103	lduh	[%src + (offset) + 0x04], %t2; \
104	lduh	[%src + (offset) + 0x06], %t3; \
105	sth	%t0, [%dst + (offset) + 0x00]; \
106	sth	%t1, [%dst + (offset) + 0x02]; \
107	sth	%t2, [%dst + (offset) + 0x04]; \
108	sth	%t3, [%dst + (offset) + 0x06];
109
110#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
111	ldub	[%src - (offset) - 0x02], %t0; \
112	ldub	[%src - (offset) - 0x01], %t1; \
113	stb	%t0, [%dst - (offset) - 0x02]; \
114	stb	%t1, [%dst - (offset) - 0x01];
115
116	.text
117	.align	4
118
119	.globl  __copy_user_begin
120__copy_user_begin:
121
122	.globl	__copy_user
123	EXPORT_SYMBOL(__copy_user)
124dword_align:
125	andcc	%o1, 1, %g0
126	be	4f
127	 andcc	%o1, 2, %g0
128
129	EXO2(ldub [%o1], %g2)
130	add	%o1, 1, %o1
131	EXO2(stb %g2, [%o0])
132	sub	%o2, 1, %o2
133	bne	3f
134	 add	%o0, 1, %o0
135
136	EXO2(lduh [%o1], %g2)
137	add	%o1, 2, %o1
138	EXO2(sth %g2, [%o0])
139	sub	%o2, 2, %o2
140	b	3f
141	 add	%o0, 2, %o0
1424:
143	EXO2(lduh [%o1], %g2)
144	add	%o1, 2, %o1
145	EXO2(sth %g2, [%o0])
146	sub	%o2, 2, %o2
147	b	3f
148	 add	%o0, 2, %o0
149
150__copy_user:	/* %o0=dst %o1=src %o2=len */
151	xor	%o0, %o1, %o4
1521:
153	andcc	%o4, 3, %o5
1542:
155	bne	cannot_optimize
156	 cmp	%o2, 15
157
158	bleu	short_aligned_end
159	 andcc	%o1, 3, %g0
160
161	bne	dword_align
1623:
163	 andcc	%o1, 4, %g0
164
165	be	2f
166	 mov	%o2, %g1
167
168	EXO2(ld [%o1], %o4)
169	sub	%g1, 4, %g1
170	EXO2(st %o4, [%o0])
171	add	%o1, 4, %o1
172	add	%o0, 4, %o0
1732:
174	andcc	%g1, 0xffffff80, %g7
175	be	3f
176	 andcc	%o0, 4, %g0
177
178	be	ldd_std + 4
1795:
180	MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
181	MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
182	MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
183	MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
18480:
185	EXT(5b, 80b, 50f)
186	subcc	%g7, 128, %g7
187	add	%o1, 128, %o1
188	bne	5b
189	 add	%o0, 128, %o0
1903:
191	andcc	%g1, 0x70, %g7
192	be	copy_user_table_end
193	 andcc	%g1, 8, %g0
194
195	sethi	%hi(copy_user_table_end), %o5
196	srl	%g7, 1, %o4
197	add	%g7, %o4, %o4
198	add	%o1, %g7, %o1
199	sub	%o5, %o4, %o5
200	jmpl	%o5 + %lo(copy_user_table_end), %g0
201	 add	%o0, %g7, %o0
202
203copy_user_table:
204	MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
205	MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
206	MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
207	MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
208	MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
209	MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
210	MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
211copy_user_table_end:
212	EXT(copy_user_table, copy_user_table_end, 51f)
213	be	copy_user_last7
214	 andcc	%g1, 4, %g0
215
216	EX(ldd	[%o1], %g2, and %g1, 0xf)
217	add	%o0, 8, %o0
218	add	%o1, 8, %o1
219	EX(st	%g2, [%o0 - 0x08], and %g1, 0xf)
220	EX2(st	%g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4)
221copy_user_last7:
222	be	1f
223	 andcc	%g1, 2, %g0
224
225	EX(ld	[%o1], %g2, and %g1, 7)
226	add	%o1, 4, %o1
227	EX(st	%g2, [%o0], and %g1, 7)
228	add	%o0, 4, %o0
2291:
230	be	1f
231	 andcc	%g1, 1, %g0
232
233	EX(lduh	[%o1], %g2, and %g1, 3)
234	add	%o1, 2, %o1
235	EX(sth	%g2, [%o0], and %g1, 3)
236	add	%o0, 2, %o0
2371:
238	be	1f
239	 nop
240
241	EX(ldub	[%o1], %g2, add %g0, 1)
242	EX(stb	%g2, [%o0], add %g0, 1)
2431:
244	retl
245 	 clr	%o0
246
247ldd_std:
248	MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
249	MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
250	MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
251	MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
25281:
253	EXT(ldd_std, 81b, 52f)
254	subcc	%g7, 128, %g7
255	add	%o1, 128, %o1
256	bne	ldd_std
257	 add	%o0, 128, %o0
258
259	andcc	%g1, 0x70, %g7
260	be	copy_user_table_end
261	 andcc	%g1, 8, %g0
262
263	sethi	%hi(copy_user_table_end), %o5
264	srl	%g7, 1, %o4
265	add	%g7, %o4, %o4
266	add	%o1, %g7, %o1
267	sub	%o5, %o4, %o5
268	jmpl	%o5 + %lo(copy_user_table_end), %g0
269	 add	%o0, %g7, %o0
270
271cannot_optimize:
272	bleu	short_end
273	 cmp	%o5, 2
274
275	bne	byte_chunk
276	 and	%o2, 0xfffffff0, %o3
277
278	andcc	%o1, 1, %g0
279	be	10f
280	 nop
281
282	EXO2(ldub [%o1], %g2)
283	add	%o1, 1, %o1
284	EXO2(stb %g2, [%o0])
285	sub	%o2, 1, %o2
286	andcc	%o2, 0xfffffff0, %o3
287	be	short_end
288	 add	%o0, 1, %o0
28910:
290	MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
291	MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
29282:
293	EXT(10b, 82b, 53f)
294	subcc	%o3, 0x10, %o3
295	add	%o1, 0x10, %o1
296	bne	10b
297	 add	%o0, 0x10, %o0
298	b	2f
299	 and	%o2, 0xe, %o3
300
301byte_chunk:
302	MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3)
303	MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3)
304	MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3)
305	MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3)
306	MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3)
307	MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3)
308	MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
309	MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
31083:
311	EXT(byte_chunk, 83b, 54f)
312	subcc	%o3, 0x10, %o3
313	add	%o1, 0x10, %o1
314	bne	byte_chunk
315	 add	%o0, 0x10, %o0
316
317short_end:
318	and	%o2, 0xe, %o3
3192:
320	sethi	%hi(short_table_end), %o5
321	sll	%o3, 3, %o4
322	add	%o0, %o3, %o0
323	sub	%o5, %o4, %o5
324	add	%o1, %o3, %o1
325	jmpl	%o5 + %lo(short_table_end), %g0
326	 andcc	%o2, 1, %g0
32784:
328	MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
329	MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
330	MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
331	MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
332	MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
333	MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
334	MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
335short_table_end:
336	EXT(84b, short_table_end, 55f)
337	be	1f
338	 nop
339	EX(ldub	[%o1], %g2, add %g0, 1)
340	EX(stb	%g2, [%o0], add %g0, 1)
3411:
342	retl
343 	 clr	%o0
344
345short_aligned_end:
346	bne	short_end
347	 andcc	%o2, 8, %g0
348
349	be	1f
350	 andcc	%o2, 4, %g0
351
352	EXO2(ld	[%o1 + 0x00], %g2)
353	EXO2(ld	[%o1 + 0x04], %g3)
354	add	%o1, 8, %o1
355	EXO2(st	%g2, [%o0 + 0x00])
356	EX(st	%g3, [%o0 + 0x04], sub %o2, 4)
357	add	%o0, 8, %o0
3581:
359	b	copy_user_last7
360	 mov	%o2, %g1
361
362	.section .fixup,#alloc,#execinstr
363	.align	4
36497:
365	mov	%o2, %g3
366fixupretl:
367	retl
368	 mov	%g3, %o0
369
370/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */
37150:
372/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK
373 * happens. This is derived from the amount ldd reads, st stores, etc.
374 * x = g2 % 12;
375 * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4);
376 * o0 += (g2 / 12) * 32;
377 */
378	cmp	%g2, 12
379	add	%o0, %g7, %o0
380	bcs	1f
381	 cmp	%g2, 24
382	bcs	2f
383	 cmp	%g2, 36
384	bcs	3f
385	 nop
386	sub	%g2, 12, %g2
387	sub	%g7, 32, %g7
3883:	sub	%g2, 12, %g2
389	sub	%g7, 32, %g7
3902:	sub	%g2, 12, %g2
391	sub	%g7, 32, %g7
3921:	cmp	%g2, 4
393	bcs,a	60f
394	 clr	%g2
395	sub	%g2, 4, %g2
396	sll	%g2, 2, %g2
39760:	and	%g1, 0x7f, %g3
398	sub	%o0, %g7, %o0
399	add	%g3, %g7, %g3
400	ba	fixupretl
401	 sub	%g3, %g2, %g3
40251:
403/* i = 41 - g2; j = i % 6;
404 * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16;
405 * o0 -= (i / 6) * 16 + 16;
406 */
407	neg	%g2
408	and	%g1, 0xf, %g1
409	add	%g2, 41, %g2
410	add	%o0, %g1, %o0
4111:	cmp	%g2, 6
412	bcs,a	2f
413	 cmp	%g2, 4
414	add	%g1, 16, %g1
415	b	1b
416	 sub	%g2, 6, %g2
4172:	bcc,a	2f
418	 mov	16, %g2
419	inc	%g2
420	sll	%g2, 2, %g2
4212:	add	%g1, %g2, %g3
422	ba	fixupretl
423	 sub	%o0, %g3, %o0
42452:
425/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0;
426   o0 += (g2 / 8) * 32 */
427	andn	%g2, 7, %g4
428	add	%o0, %g7, %o0
429	andcc	%g2, 4, %g0
430	and	%g2, 3, %g2
431	sll	%g4, 2, %g4
432	sll	%g2, 3, %g2
433	bne	60b
434	 sub	%g7, %g4, %g7
435	ba	60b
436	 clr	%g2
43753:
438/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0;
439   o0 += (g2 & 8) */
440	and	%g2, 3, %g4
441	andcc	%g2, 4, %g0
442	and	%g2, 8, %g2
443	sll	%g4, 1, %g4
444	be	1f
445	 add	%o0, %g2, %o0
446	add	%g2, %g4, %g2
4471:	and	%o2, 0xf, %g3
448	add	%g3, %o3, %g3
449	ba	fixupretl
450	 sub	%g3, %g2, %g3
45154:
452/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0;
453   o0 += (g2 / 4) * 2 */
454	srl	%g2, 2, %o4
455	and	%g2, 1, %o5
456	srl	%g2, 1, %g2
457	add	%o4, %o4, %o4
458	and	%o5, %g2, %o5
459	and	%o2, 0xf, %o2
460	add	%o0, %o4, %o0
461	sub	%o3, %o5, %o3
462	sub	%o2, %o4, %o2
463	ba	fixupretl
464	 add	%o2, %o3, %g3
46555:
466/* i = 27 - g2;
467   g3 = (o2 & 1) + i / 4 * 2 + !(i & 3);
468   o0 -= i / 4 * 2 + 1 */
469	neg	%g2
470	and	%o2, 1, %o2
471	add	%g2, 27, %g2
472	srl	%g2, 2, %o5
473	andcc	%g2, 3, %g0
474	mov	1, %g2
475	add	%o5, %o5, %o5
476	be,a	1f
477	 clr	%g2
4781:	add	%g2, %o5, %g3
479	sub	%o0, %g3, %o0
480	ba	fixupretl
481	 add	%g3, %o2, %g3
482
483	.globl  __copy_user_end
484__copy_user_end:
485