xref: /openbmc/linux/arch/sh/lib/copy_page.S (revision 5c0cbd74)
19895f942SPaul Mundt/*
29895f942SPaul Mundt * copy_page, __copy_user_page, __copy_user implementation of SuperH
39895f942SPaul Mundt *
49895f942SPaul Mundt * Copyright (C) 2001  Niibe Yutaka & Kaz Kojima
59895f942SPaul Mundt * Copyright (C) 2002  Toshinobu Sugioka
69895f942SPaul Mundt * Copyright (C) 2006  Paul Mundt
79895f942SPaul Mundt */
89895f942SPaul Mundt#include <linux/linkage.h>
99895f942SPaul Mundt#include <asm/page.h>
109895f942SPaul Mundt
119895f942SPaul Mundt/*
129895f942SPaul Mundt * copy_page
139895f942SPaul Mundt * @to: P1 address
149895f942SPaul Mundt * @from: P1 address
159895f942SPaul Mundt *
169895f942SPaul Mundt * void copy_page(void *to, void *from)
179895f942SPaul Mundt */
189895f942SPaul Mundt
199895f942SPaul Mundt/*
209895f942SPaul Mundt * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch
219895f942SPaul Mundt * r8 --- from + PAGE_SIZE
229895f942SPaul Mundt * r9 --- not used
239895f942SPaul Mundt * r10 --- to
249895f942SPaul Mundt * r11 --- from
259895f942SPaul Mundt */
269895f942SPaul MundtENTRY(copy_page)
279895f942SPaul Mundt	mov.l	r8,@-r15
289895f942SPaul Mundt	mov.l	r10,@-r15
299895f942SPaul Mundt	mov.l	r11,@-r15
309895f942SPaul Mundt	mov	r4,r10
319895f942SPaul Mundt	mov	r5,r11
329895f942SPaul Mundt	mov	r5,r8
339895f942SPaul Mundt	mov.l	.Lpsz,r0
349895f942SPaul Mundt	add	r0,r8
359895f942SPaul Mundt	!
369895f942SPaul Mundt1:	mov.l	@r11+,r0
379895f942SPaul Mundt	mov.l	@r11+,r1
389895f942SPaul Mundt	mov.l	@r11+,r2
399895f942SPaul Mundt	mov.l	@r11+,r3
409895f942SPaul Mundt	mov.l	@r11+,r4
419895f942SPaul Mundt	mov.l	@r11+,r5
429895f942SPaul Mundt	mov.l	@r11+,r6
439895f942SPaul Mundt	mov.l	@r11+,r7
445c0cbd74SPaul Mundt#if defined(CONFIG_CPU_SH4)
459895f942SPaul Mundt	movca.l	r0,@r10
469895f942SPaul Mundt	mov	r10,r0
475c0cbd74SPaul Mundt#else
485c0cbd74SPaul Mundt	mov.l	r0,@r10
499895f942SPaul Mundt#endif
509895f942SPaul Mundt	add	#32,r10
519895f942SPaul Mundt	mov.l	r7,@-r10
529895f942SPaul Mundt	mov.l	r6,@-r10
539895f942SPaul Mundt	mov.l	r5,@-r10
549895f942SPaul Mundt	mov.l	r4,@-r10
559895f942SPaul Mundt	mov.l	r3,@-r10
569895f942SPaul Mundt	mov.l	r2,@-r10
579895f942SPaul Mundt	mov.l	r1,@-r10
589895f942SPaul Mundt#if defined(CONFIG_CPU_SH4)
599895f942SPaul Mundt	ocbwb	@r0
609895f942SPaul Mundt#endif
619895f942SPaul Mundt	cmp/eq	r11,r8
629895f942SPaul Mundt	bf/s	1b
639895f942SPaul Mundt	 add	#28,r10
649895f942SPaul Mundt	!
659895f942SPaul Mundt	mov.l	@r15+,r11
669895f942SPaul Mundt	mov.l	@r15+,r10
679895f942SPaul Mundt	mov.l	@r15+,r8
689895f942SPaul Mundt	rts
699895f942SPaul Mundt	 nop
709895f942SPaul Mundt
71325df7f2SPaul Mundt	.balign 4
729895f942SPaul Mundt.Lpsz:	.long	PAGE_SIZE
73325df7f2SPaul Mundt
749895f942SPaul Mundt/*
759895f942SPaul Mundt * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n);
769895f942SPaul Mundt * Return the number of bytes NOT copied
779895f942SPaul Mundt */
789895f942SPaul Mundt#define EX(...)			\
799895f942SPaul Mundt	9999: __VA_ARGS__ ;		\
809895f942SPaul Mundt	.section __ex_table, "a";	\
819895f942SPaul Mundt	.long 9999b, 6000f	;	\
829895f942SPaul Mundt	.previous
839895f942SPaul MundtENTRY(__copy_user)
849895f942SPaul Mundt	! Check if small number of bytes
859895f942SPaul Mundt	mov	#11,r0
869895f942SPaul Mundt	mov	r4,r3
879895f942SPaul Mundt	cmp/gt	r0,r6		! r6 (len) > r0 (11)
889895f942SPaul Mundt	bf/s	.L_cleanup_loop_no_pop
899895f942SPaul Mundt	 add	r6,r3		! last destination address
909895f942SPaul Mundt
919895f942SPaul Mundt	! Calculate bytes needed to align to src
929895f942SPaul Mundt	mov.l	r11,@-r15
939895f942SPaul Mundt	neg	r5,r0
949895f942SPaul Mundt	mov.l	r10,@-r15
959895f942SPaul Mundt	add	#4,r0
969895f942SPaul Mundt	mov.l	r9,@-r15
979895f942SPaul Mundt	and	#3,r0
989895f942SPaul Mundt	mov.l	r8,@-r15
999895f942SPaul Mundt	tst	r0,r0
1009895f942SPaul Mundt	bt	2f
1019895f942SPaul Mundt
1029895f942SPaul Mundt1:
1039895f942SPaul Mundt	! Copy bytes to long word align src
1049895f942SPaul MundtEX(	mov.b	@r5+,r1		)
1059895f942SPaul Mundt	dt	r0
1069895f942SPaul Mundt	add	#-1,r6
1079895f942SPaul MundtEX(	mov.b	r1,@r4		)
1089895f942SPaul Mundt	bf/s	1b
1099895f942SPaul Mundt	 add	#1,r4
1109895f942SPaul Mundt
1119895f942SPaul Mundt	! Jump to appropriate routine depending on dest
1129895f942SPaul Mundt2:	mov	#3,r1
1139895f942SPaul Mundt	mov	r6, r2
1149895f942SPaul Mundt	and	r4,r1
1159895f942SPaul Mundt	shlr2	r2
1169895f942SPaul Mundt	shll2	r1
1179895f942SPaul Mundt	mova	.L_jump_tbl,r0
1189895f942SPaul Mundt	mov.l	@(r0,r1),r1
1199895f942SPaul Mundt	jmp	@r1
1209895f942SPaul Mundt	 nop
1219895f942SPaul Mundt
1229895f942SPaul Mundt	.align 2
1239895f942SPaul Mundt.L_jump_tbl:
1249895f942SPaul Mundt	.long	.L_dest00
1259895f942SPaul Mundt	.long	.L_dest01
1269895f942SPaul Mundt	.long	.L_dest10
1279895f942SPaul Mundt	.long	.L_dest11
1289895f942SPaul Mundt
1299895f942SPaul Mundt/*
1309895f942SPaul Mundt * Come here if there are less than 12 bytes to copy
1319895f942SPaul Mundt *
1329895f942SPaul Mundt * Keep the branch target close, so the bf/s callee doesn't overflow
1339895f942SPaul Mundt * and result in a more expensive branch being inserted. This is the
1349895f942SPaul Mundt * fast-path for small copies, the jump via the jump table will hit the
1359895f942SPaul Mundt * default slow-path cleanup. -PFM.
1369895f942SPaul Mundt */
1379895f942SPaul Mundt.L_cleanup_loop_no_pop:
1389895f942SPaul Mundt	tst	r6,r6		! Check explicitly for zero
1399895f942SPaul Mundt	bt	1f
1409895f942SPaul Mundt
1419895f942SPaul Mundt2:
1429895f942SPaul MundtEX(	mov.b	@r5+,r0		)
1439895f942SPaul Mundt	dt	r6
1449895f942SPaul MundtEX(	mov.b	r0,@r4		)
1459895f942SPaul Mundt	bf/s	2b
1469895f942SPaul Mundt	 add	#1,r4
1479895f942SPaul Mundt
1489895f942SPaul Mundt1:	mov	#0,r0		! normal return
1499895f942SPaul Mundt5000:
1509895f942SPaul Mundt
1519895f942SPaul Mundt# Exception handler:
1529895f942SPaul Mundt.section .fixup, "ax"
1539895f942SPaul Mundt6000:
1549895f942SPaul Mundt	mov.l	8000f,r1
1559895f942SPaul Mundt	mov	r3,r0
1569895f942SPaul Mundt	jmp	@r1
1579895f942SPaul Mundt	 sub	r4,r0
1589895f942SPaul Mundt	.align	2
1599895f942SPaul Mundt8000:	.long	5000b
1609895f942SPaul Mundt
1619895f942SPaul Mundt.previous
1629895f942SPaul Mundt	rts
1639895f942SPaul Mundt	 nop
1649895f942SPaul Mundt
1659895f942SPaul Mundt! Destination = 00
1669895f942SPaul Mundt
1679895f942SPaul Mundt.L_dest00:
1689895f942SPaul Mundt	! Skip the large copy for small transfers
1699895f942SPaul Mundt	mov	#(32+32-4), r0
1709895f942SPaul Mundt	cmp/gt	r6, r0		! r0 (60) > r6 (len)
1719895f942SPaul Mundt	bt	1f
1729895f942SPaul Mundt
1739895f942SPaul Mundt	! Align dest to a 32 byte boundary
1749895f942SPaul Mundt	neg	r4,r0
1759895f942SPaul Mundt	add	#0x20, r0
1769895f942SPaul Mundt	and	#0x1f, r0
1779895f942SPaul Mundt	tst	r0, r0
1789895f942SPaul Mundt	bt	2f
1799895f942SPaul Mundt
1809895f942SPaul Mundt	sub	r0, r6
1819895f942SPaul Mundt	shlr2	r0
1829895f942SPaul Mundt3:
1839895f942SPaul MundtEX(	mov.l	@r5+,r1		)
1849895f942SPaul Mundt	dt	r0
1859895f942SPaul MundtEX(	mov.l	r1,@r4		)
1869895f942SPaul Mundt	bf/s	3b
1879895f942SPaul Mundt	 add	#4,r4
1889895f942SPaul Mundt
1899895f942SPaul Mundt2:
1909895f942SPaul MundtEX(	mov.l	@r5+,r0		)
1919895f942SPaul MundtEX(	mov.l	@r5+,r1		)
1929895f942SPaul MundtEX(	mov.l	@r5+,r2		)
1939895f942SPaul MundtEX(	mov.l	@r5+,r7		)
1949895f942SPaul MundtEX(	mov.l	@r5+,r8		)
1959895f942SPaul MundtEX(	mov.l	@r5+,r9		)
1969895f942SPaul MundtEX(	mov.l	@r5+,r10	)
1979895f942SPaul MundtEX(	mov.l	@r5+,r11	)
1989895f942SPaul Mundt#ifdef CONFIG_CPU_SH4
1999895f942SPaul MundtEX(	movca.l	r0,@r4		)
2009895f942SPaul Mundt#else
2019895f942SPaul MundtEX(	mov.l	r0,@r4		)
2029895f942SPaul Mundt#endif
2039895f942SPaul Mundt	add	#-32, r6
2049895f942SPaul MundtEX(	mov.l	r1,@(4,r4)	)
2059895f942SPaul Mundt	mov	#32, r0
2069895f942SPaul MundtEX(	mov.l	r2,@(8,r4)	)
2079895f942SPaul Mundt	cmp/gt	r6, r0		! r0 (32) > r6 (len)
2089895f942SPaul MundtEX(	mov.l	r7,@(12,r4)	)
2099895f942SPaul MundtEX(	mov.l	r8,@(16,r4)	)
2109895f942SPaul MundtEX(	mov.l	r9,@(20,r4)	)
2119895f942SPaul MundtEX(	mov.l	r10,@(24,r4)	)
2129895f942SPaul MundtEX(	mov.l	r11,@(28,r4)	)
2139895f942SPaul Mundt	bf/s	2b
2149895f942SPaul Mundt	 add	#32,r4
2159895f942SPaul Mundt
2169895f942SPaul Mundt1:	mov	r6, r0
2179895f942SPaul Mundt	shlr2	r0
2189895f942SPaul Mundt	tst	r0, r0
2199895f942SPaul Mundt	bt	.L_cleanup
2209895f942SPaul Mundt1:
2219895f942SPaul MundtEX(	mov.l	@r5+,r1		)
2229895f942SPaul Mundt	dt	r0
2239895f942SPaul MundtEX(	mov.l	r1,@r4		)
2249895f942SPaul Mundt	bf/s	1b
2259895f942SPaul Mundt	 add	#4,r4
2269895f942SPaul Mundt
2279895f942SPaul Mundt	bra	.L_cleanup
2289895f942SPaul Mundt	 nop
2299895f942SPaul Mundt
2309895f942SPaul Mundt! Destination = 10
2319895f942SPaul Mundt
2329895f942SPaul Mundt.L_dest10:
2339895f942SPaul Mundt	mov	r2,r7
2349895f942SPaul Mundt	shlr2	r7
2359895f942SPaul Mundt	shlr	r7
2369895f942SPaul Mundt	tst	r7,r7
2379895f942SPaul Mundt	mov	#7,r0
2389895f942SPaul Mundt	bt/s	1f
2399895f942SPaul Mundt	 and	r0,r2
2409895f942SPaul Mundt2:
2419895f942SPaul Mundt	dt	r7
2429895f942SPaul Mundt#ifdef CONFIG_CPU_LITTLE_ENDIAN
2439895f942SPaul MundtEX(	mov.l	@r5+,r0		)
2449895f942SPaul MundtEX(	mov.l	@r5+,r1		)
2459895f942SPaul MundtEX(	mov.l	@r5+,r8		)
2469895f942SPaul MundtEX(	mov.l	@r5+,r9		)
2479895f942SPaul MundtEX(	mov.l	@r5+,r10	)
2489895f942SPaul MundtEX(	mov.w	r0,@r4		)
2499895f942SPaul Mundt	add	#2,r4
2509895f942SPaul Mundt	xtrct	r1,r0
2519895f942SPaul Mundt	xtrct	r8,r1
2529895f942SPaul Mundt	xtrct	r9,r8
2539895f942SPaul Mundt	xtrct	r10,r9
2549895f942SPaul Mundt
2559895f942SPaul MundtEX(	mov.l	r0,@r4		)
2569895f942SPaul MundtEX(	mov.l	r1,@(4,r4)	)
2579895f942SPaul MundtEX(	mov.l	r8,@(8,r4)	)
2589895f942SPaul MundtEX(	mov.l	r9,@(12,r4)	)
2599895f942SPaul Mundt
2609895f942SPaul MundtEX(	mov.l	@r5+,r1		)
2619895f942SPaul MundtEX(	mov.l	@r5+,r8		)
2629895f942SPaul MundtEX(	mov.l	@r5+,r0		)
2639895f942SPaul Mundt	xtrct	r1,r10
2649895f942SPaul Mundt	xtrct	r8,r1
2659895f942SPaul Mundt	xtrct	r0,r8
2669895f942SPaul Mundt	shlr16	r0
2679895f942SPaul MundtEX(	mov.l	r10,@(16,r4)	)
2689895f942SPaul MundtEX(	mov.l	r1,@(20,r4)	)
2699895f942SPaul MundtEX(	mov.l	r8,@(24,r4)	)
2709895f942SPaul MundtEX(	mov.w	r0,@(28,r4)	)
2719895f942SPaul Mundt	bf/s	2b
2729895f942SPaul Mundt	 add	#30,r4
2739895f942SPaul Mundt#else
2749895f942SPaul MundtEX(	mov.l	@(28,r5),r0	)
2759895f942SPaul MundtEX(	mov.l	@(24,r5),r8	)
2769895f942SPaul MundtEX(	mov.l	@(20,r5),r9	)
2779895f942SPaul MundtEX(	mov.l	@(16,r5),r10	)
2789895f942SPaul MundtEX(	mov.w	r0,@(30,r4)	)
2799895f942SPaul Mundt	add	#-2,r4
2809895f942SPaul Mundt	xtrct	r8,r0
2819895f942SPaul Mundt	xtrct	r9,r8
2829895f942SPaul Mundt	xtrct	r10,r9
2839895f942SPaul MundtEX(	mov.l	r0,@(28,r4)	)
2849895f942SPaul MundtEX(	mov.l	r8,@(24,r4)	)
2859895f942SPaul MundtEX(	mov.l	r9,@(20,r4)	)
2869895f942SPaul Mundt
2879895f942SPaul MundtEX(	mov.l	@(12,r5),r0	)
2889895f942SPaul MundtEX(	mov.l	@(8,r5),r8	)
2899895f942SPaul Mundt	xtrct	r0,r10
2909895f942SPaul MundtEX(	mov.l	@(4,r5),r9	)
2919895f942SPaul Mundt	mov.l	r10,@(16,r4)
2929895f942SPaul MundtEX(	mov.l	@r5,r10		)
2939895f942SPaul Mundt	xtrct	r8,r0
2949895f942SPaul Mundt	xtrct	r9,r8
2959895f942SPaul Mundt	xtrct	r10,r9
2969895f942SPaul MundtEX(	mov.l	r0,@(12,r4)	)
2979895f942SPaul MundtEX(	mov.l	r8,@(8,r4)	)
2989895f942SPaul Mundt	swap.w	r10,r0
2999895f942SPaul MundtEX(	mov.l	r9,@(4,r4)	)
3009895f942SPaul MundtEX(	mov.w	r0,@(2,r4)	)
3019895f942SPaul Mundt
3029895f942SPaul Mundt	add	#32,r5
3039895f942SPaul Mundt	bf/s	2b
3049895f942SPaul Mundt	 add	#34,r4
3059895f942SPaul Mundt#endif
3069895f942SPaul Mundt	tst	r2,r2
3079895f942SPaul Mundt	bt	.L_cleanup
3089895f942SPaul Mundt
3099895f942SPaul Mundt1:	! Read longword, write two words per iteration
3109895f942SPaul MundtEX(	mov.l	@r5+,r0		)
3119895f942SPaul Mundt	dt	r2
3129895f942SPaul Mundt#ifdef CONFIG_CPU_LITTLE_ENDIAN
3139895f942SPaul MundtEX(	mov.w	r0,@r4		)
3149895f942SPaul Mundt	shlr16	r0
3159895f942SPaul MundtEX(	mov.w 	r0,@(2,r4)	)
3169895f942SPaul Mundt#else
3179895f942SPaul MundtEX(	mov.w	r0,@(2,r4)	)
3189895f942SPaul Mundt	shlr16	r0
3199895f942SPaul MundtEX(	mov.w	r0,@r4		)
3209895f942SPaul Mundt#endif
3219895f942SPaul Mundt	bf/s	1b
3229895f942SPaul Mundt	 add	#4,r4
3239895f942SPaul Mundt
3249895f942SPaul Mundt	bra	.L_cleanup
3259895f942SPaul Mundt	 nop
3269895f942SPaul Mundt
3279895f942SPaul Mundt! Destination = 01 or 11
3289895f942SPaul Mundt
3299895f942SPaul Mundt.L_dest01:
3309895f942SPaul Mundt.L_dest11:
3319895f942SPaul Mundt	! Read longword, write byte, word, byte per iteration
3329895f942SPaul MundtEX(	mov.l	@r5+,r0		)
3339895f942SPaul Mundt	dt	r2
3349895f942SPaul Mundt#ifdef CONFIG_CPU_LITTLE_ENDIAN
3359895f942SPaul MundtEX(	mov.b	r0,@r4		)
3369895f942SPaul Mundt	shlr8	r0
3379895f942SPaul Mundt	add	#1,r4
3389895f942SPaul MundtEX(	mov.w	r0,@r4		)
3399895f942SPaul Mundt	shlr16	r0
3409895f942SPaul MundtEX(	mov.b	r0,@(2,r4)	)
3419895f942SPaul Mundt	bf/s	.L_dest01
3429895f942SPaul Mundt	 add	#3,r4
3439895f942SPaul Mundt#else
3449895f942SPaul MundtEX(	mov.b	r0,@(3,r4)	)
3459895f942SPaul Mundt	shlr8	r0
3469895f942SPaul Mundt	swap.w	r0,r7
3479895f942SPaul MundtEX(	mov.b	r7,@r4		)
3489895f942SPaul Mundt	add	#1,r4
3499895f942SPaul MundtEX(	mov.w	r0,@r4		)
3509895f942SPaul Mundt	bf/s	.L_dest01
3519895f942SPaul Mundt	 add	#3,r4
3529895f942SPaul Mundt#endif
3539895f942SPaul Mundt
3549895f942SPaul Mundt! Cleanup last few bytes
3559895f942SPaul Mundt.L_cleanup:
3569895f942SPaul Mundt	mov	r6,r0
3579895f942SPaul Mundt	and	#3,r0
3589895f942SPaul Mundt	tst	r0,r0
3599895f942SPaul Mundt	bt	.L_exit
3609895f942SPaul Mundt	mov	r0,r6
3619895f942SPaul Mundt
3629895f942SPaul Mundt.L_cleanup_loop:
3639895f942SPaul MundtEX(	mov.b	@r5+,r0		)
3649895f942SPaul Mundt	dt	r6
3659895f942SPaul MundtEX(	mov.b	r0,@r4		)
3669895f942SPaul Mundt	bf/s	.L_cleanup_loop
3679895f942SPaul Mundt	 add	#1,r4
3689895f942SPaul Mundt
3699895f942SPaul Mundt.L_exit:
3709895f942SPaul Mundt	mov	#0,r0		! normal return
3719895f942SPaul Mundt
3729895f942SPaul Mundt5000:
3739895f942SPaul Mundt
3749895f942SPaul Mundt# Exception handler:
3759895f942SPaul Mundt.section .fixup, "ax"
3769895f942SPaul Mundt6000:
3779895f942SPaul Mundt	mov.l	8000f,r1
3789895f942SPaul Mundt	mov	r3,r0
3799895f942SPaul Mundt	jmp	@r1
3809895f942SPaul Mundt	 sub	r4,r0
3819895f942SPaul Mundt	.align	2
3829895f942SPaul Mundt8000:	.long	5000b
3839895f942SPaul Mundt
3849895f942SPaul Mundt.previous
3859895f942SPaul Mundt	mov.l	@r15+,r8
3869895f942SPaul Mundt	mov.l	@r15+,r9
3879895f942SPaul Mundt	mov.l	@r15+,r10
3889895f942SPaul Mundt	rts
3899895f942SPaul Mundt	 mov.l	@r15+,r11
390