xref: /openbmc/linux/arch/sh/lib/copy_page.S (revision 9895f942)
19895f942SPaul Mundt/*
29895f942SPaul Mundt * copy_page, __copy_user_page, __copy_user implementation of SuperH
39895f942SPaul Mundt *
49895f942SPaul Mundt * Copyright (C) 2001  Niibe Yutaka & Kaz Kojima
59895f942SPaul Mundt * Copyright (C) 2002  Toshinobu Sugioka
69895f942SPaul Mundt * Copyright (C) 2006  Paul Mundt
79895f942SPaul Mundt */
89895f942SPaul Mundt#include <linux/linkage.h>
99895f942SPaul Mundt#include <asm/page.h>
109895f942SPaul Mundt
119895f942SPaul Mundt/*
129895f942SPaul Mundt * copy_page
139895f942SPaul Mundt * @to: P1 address
149895f942SPaul Mundt * @from: P1 address
159895f942SPaul Mundt *
169895f942SPaul Mundt * void copy_page(void *to, void *from)
179895f942SPaul Mundt */
189895f942SPaul Mundt
199895f942SPaul Mundt/*
209895f942SPaul Mundt * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch
219895f942SPaul Mundt * r8 --- from + PAGE_SIZE
229895f942SPaul Mundt * r9 --- not used
239895f942SPaul Mundt * r10 --- to
249895f942SPaul Mundt * r11 --- from
259895f942SPaul Mundt */
269895f942SPaul MundtENTRY(copy_page)
279895f942SPaul Mundt	mov.l	r8,@-r15
289895f942SPaul Mundt	mov.l	r10,@-r15
299895f942SPaul Mundt	mov.l	r11,@-r15
309895f942SPaul Mundt	mov	r4,r10
319895f942SPaul Mundt	mov	r5,r11
329895f942SPaul Mundt	mov	r5,r8
339895f942SPaul Mundt	mov.l	.Lpsz,r0
349895f942SPaul Mundt	add	r0,r8
359895f942SPaul Mundt	!
369895f942SPaul Mundt1:	mov.l	@r11+,r0
379895f942SPaul Mundt	mov.l	@r11+,r1
389895f942SPaul Mundt	mov.l	@r11+,r2
399895f942SPaul Mundt	mov.l	@r11+,r3
409895f942SPaul Mundt	mov.l	@r11+,r4
419895f942SPaul Mundt	mov.l	@r11+,r5
429895f942SPaul Mundt	mov.l	@r11+,r6
439895f942SPaul Mundt	mov.l	@r11+,r7
449895f942SPaul Mundt#if defined(CONFIG_CPU_SH3)
459895f942SPaul Mundt	mov.l	r0,@r10
469895f942SPaul Mundt#elif defined(CONFIG_CPU_SH4)
479895f942SPaul Mundt	movca.l	r0,@r10
489895f942SPaul Mundt	mov	r10,r0
499895f942SPaul Mundt#endif
509895f942SPaul Mundt	add	#32,r10
519895f942SPaul Mundt	mov.l	r7,@-r10
529895f942SPaul Mundt	mov.l	r6,@-r10
539895f942SPaul Mundt	mov.l	r5,@-r10
549895f942SPaul Mundt	mov.l	r4,@-r10
559895f942SPaul Mundt	mov.l	r3,@-r10
569895f942SPaul Mundt	mov.l	r2,@-r10
579895f942SPaul Mundt	mov.l	r1,@-r10
589895f942SPaul Mundt#if defined(CONFIG_CPU_SH4)
599895f942SPaul Mundt	ocbwb	@r0
609895f942SPaul Mundt#endif
619895f942SPaul Mundt	cmp/eq	r11,r8
629895f942SPaul Mundt	bf/s	1b
639895f942SPaul Mundt	 add	#28,r10
649895f942SPaul Mundt	!
659895f942SPaul Mundt	mov.l	@r15+,r11
669895f942SPaul Mundt	mov.l	@r15+,r10
679895f942SPaul Mundt	mov.l	@r15+,r8
689895f942SPaul Mundt	rts
699895f942SPaul Mundt	 nop
709895f942SPaul Mundt
719895f942SPaul Mundt	.align 2
729895f942SPaul Mundt.Lpsz:	.long	PAGE_SIZE
739895f942SPaul Mundt/*
749895f942SPaul Mundt * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n);
759895f942SPaul Mundt * Return the number of bytes NOT copied
769895f942SPaul Mundt */
779895f942SPaul Mundt#define EX(...)			\
789895f942SPaul Mundt	9999: __VA_ARGS__ ;		\
799895f942SPaul Mundt	.section __ex_table, "a";	\
809895f942SPaul Mundt	.long 9999b, 6000f	;	\
819895f942SPaul Mundt	.previous
829895f942SPaul MundtENTRY(__copy_user)
839895f942SPaul Mundt	! Check if small number of bytes
849895f942SPaul Mundt	mov	#11,r0
859895f942SPaul Mundt	mov	r4,r3
869895f942SPaul Mundt	cmp/gt	r0,r6		! r6 (len) > r0 (11)
879895f942SPaul Mundt	bf/s	.L_cleanup_loop_no_pop
889895f942SPaul Mundt	 add	r6,r3		! last destination address
899895f942SPaul Mundt
909895f942SPaul Mundt	! Calculate bytes needed to align to src
919895f942SPaul Mundt	mov.l	r11,@-r15
929895f942SPaul Mundt	neg	r5,r0
939895f942SPaul Mundt	mov.l	r10,@-r15
949895f942SPaul Mundt	add	#4,r0
959895f942SPaul Mundt	mov.l	r9,@-r15
969895f942SPaul Mundt	and	#3,r0
979895f942SPaul Mundt	mov.l	r8,@-r15
989895f942SPaul Mundt	tst	r0,r0
999895f942SPaul Mundt	bt	2f
1009895f942SPaul Mundt
1019895f942SPaul Mundt1:
1029895f942SPaul Mundt	! Copy bytes to long word align src
1039895f942SPaul MundtEX(	mov.b	@r5+,r1		)
1049895f942SPaul Mundt	dt	r0
1059895f942SPaul Mundt	add	#-1,r6
1069895f942SPaul MundtEX(	mov.b	r1,@r4		)
1079895f942SPaul Mundt	bf/s	1b
1089895f942SPaul Mundt	 add	#1,r4
1099895f942SPaul Mundt
1109895f942SPaul Mundt	! Jump to appropriate routine depending on dest
1119895f942SPaul Mundt2:	mov	#3,r1
1129895f942SPaul Mundt	mov	r6, r2
1139895f942SPaul Mundt	and	r4,r1
1149895f942SPaul Mundt	shlr2	r2
1159895f942SPaul Mundt	shll2	r1
1169895f942SPaul Mundt	mova	.L_jump_tbl,r0
1179895f942SPaul Mundt	mov.l	@(r0,r1),r1
1189895f942SPaul Mundt	jmp	@r1
1199895f942SPaul Mundt	 nop
1209895f942SPaul Mundt
1219895f942SPaul Mundt	.align 2
1229895f942SPaul Mundt.L_jump_tbl:
1239895f942SPaul Mundt	.long	.L_dest00
1249895f942SPaul Mundt	.long	.L_dest01
1259895f942SPaul Mundt	.long	.L_dest10
1269895f942SPaul Mundt	.long	.L_dest11
1279895f942SPaul Mundt
1289895f942SPaul Mundt/*
1299895f942SPaul Mundt * Come here if there are less than 12 bytes to copy
1309895f942SPaul Mundt *
1319895f942SPaul Mundt * Keep the branch target close, so the bf/s callee doesn't overflow
1329895f942SPaul Mundt * and result in a more expensive branch being inserted. This is the
1339895f942SPaul Mundt * fast-path for small copies, the jump via the jump table will hit the
1349895f942SPaul Mundt * default slow-path cleanup. -PFM.
1359895f942SPaul Mundt */
1369895f942SPaul Mundt.L_cleanup_loop_no_pop:
1379895f942SPaul Mundt	tst	r6,r6		! Check explicitly for zero
1389895f942SPaul Mundt	bt	1f
1399895f942SPaul Mundt
1409895f942SPaul Mundt2:
1419895f942SPaul MundtEX(	mov.b	@r5+,r0		)
1429895f942SPaul Mundt	dt	r6
1439895f942SPaul MundtEX(	mov.b	r0,@r4		)
1449895f942SPaul Mundt	bf/s	2b
1459895f942SPaul Mundt	 add	#1,r4
1469895f942SPaul Mundt
1479895f942SPaul Mundt1:	mov	#0,r0		! normal return
1489895f942SPaul Mundt5000:
1499895f942SPaul Mundt
1509895f942SPaul Mundt# Exception handler:
1519895f942SPaul Mundt.section .fixup, "ax"
1529895f942SPaul Mundt6000:
1539895f942SPaul Mundt	mov.l	8000f,r1
1549895f942SPaul Mundt	mov	r3,r0
1559895f942SPaul Mundt	jmp	@r1
1569895f942SPaul Mundt	 sub	r4,r0
1579895f942SPaul Mundt	.align	2
1589895f942SPaul Mundt8000:	.long	5000b
1599895f942SPaul Mundt
1609895f942SPaul Mundt.previous
1619895f942SPaul Mundt	rts
1629895f942SPaul Mundt	 nop
1639895f942SPaul Mundt
1649895f942SPaul Mundt! Destination = 00
1659895f942SPaul Mundt
1669895f942SPaul Mundt.L_dest00:
1679895f942SPaul Mundt	! Skip the large copy for small transfers
1689895f942SPaul Mundt	mov	#(32+32-4), r0
1699895f942SPaul Mundt	cmp/gt	r6, r0		! r0 (60) > r6 (len)
1709895f942SPaul Mundt	bt	1f
1719895f942SPaul Mundt
1729895f942SPaul Mundt	! Align dest to a 32 byte boundary
1739895f942SPaul Mundt	neg	r4,r0
1749895f942SPaul Mundt	add	#0x20, r0
1759895f942SPaul Mundt	and	#0x1f, r0
1769895f942SPaul Mundt	tst	r0, r0
1779895f942SPaul Mundt	bt	2f
1789895f942SPaul Mundt
1799895f942SPaul Mundt	sub	r0, r6
1809895f942SPaul Mundt	shlr2	r0
1819895f942SPaul Mundt3:
1829895f942SPaul MundtEX(	mov.l	@r5+,r1		)
1839895f942SPaul Mundt	dt	r0
1849895f942SPaul MundtEX(	mov.l	r1,@r4		)
1859895f942SPaul Mundt	bf/s	3b
1869895f942SPaul Mundt	 add	#4,r4
1879895f942SPaul Mundt
1889895f942SPaul Mundt2:
1899895f942SPaul MundtEX(	mov.l	@r5+,r0		)
1909895f942SPaul MundtEX(	mov.l	@r5+,r1		)
1919895f942SPaul MundtEX(	mov.l	@r5+,r2		)
1929895f942SPaul MundtEX(	mov.l	@r5+,r7		)
1939895f942SPaul MundtEX(	mov.l	@r5+,r8		)
1949895f942SPaul MundtEX(	mov.l	@r5+,r9		)
1959895f942SPaul MundtEX(	mov.l	@r5+,r10	)
1969895f942SPaul MundtEX(	mov.l	@r5+,r11	)
1979895f942SPaul Mundt#ifdef CONFIG_CPU_SH4
1989895f942SPaul MundtEX(	movca.l	r0,@r4		)
1999895f942SPaul Mundt#else
2009895f942SPaul MundtEX(	mov.l	r0,@r4		)
2019895f942SPaul Mundt#endif
2029895f942SPaul Mundt	add	#-32, r6
2039895f942SPaul MundtEX(	mov.l	r1,@(4,r4)	)
2049895f942SPaul Mundt	mov	#32, r0
2059895f942SPaul MundtEX(	mov.l	r2,@(8,r4)	)
2069895f942SPaul Mundt	cmp/gt	r6, r0		! r0 (32) > r6 (len)
2079895f942SPaul MundtEX(	mov.l	r7,@(12,r4)	)
2089895f942SPaul MundtEX(	mov.l	r8,@(16,r4)	)
2099895f942SPaul MundtEX(	mov.l	r9,@(20,r4)	)
2109895f942SPaul MundtEX(	mov.l	r10,@(24,r4)	)
2119895f942SPaul MundtEX(	mov.l	r11,@(28,r4)	)
2129895f942SPaul Mundt	bf/s	2b
2139895f942SPaul Mundt	 add	#32,r4
2149895f942SPaul Mundt
2159895f942SPaul Mundt1:	mov	r6, r0
2169895f942SPaul Mundt	shlr2	r0
2179895f942SPaul Mundt	tst	r0, r0
2189895f942SPaul Mundt	bt	.L_cleanup
2199895f942SPaul Mundt1:
2209895f942SPaul MundtEX(	mov.l	@r5+,r1		)
2219895f942SPaul Mundt	dt	r0
2229895f942SPaul MundtEX(	mov.l	r1,@r4		)
2239895f942SPaul Mundt	bf/s	1b
2249895f942SPaul Mundt	 add	#4,r4
2259895f942SPaul Mundt
2269895f942SPaul Mundt	bra	.L_cleanup
2279895f942SPaul Mundt	 nop
2289895f942SPaul Mundt
2299895f942SPaul Mundt! Destination = 10
2309895f942SPaul Mundt
2319895f942SPaul Mundt.L_dest10:
2329895f942SPaul Mundt	mov	r2,r7
2339895f942SPaul Mundt	shlr2	r7
2349895f942SPaul Mundt	shlr	r7
2359895f942SPaul Mundt	tst	r7,r7
2369895f942SPaul Mundt	mov	#7,r0
2379895f942SPaul Mundt	bt/s	1f
2389895f942SPaul Mundt	 and	r0,r2
2399895f942SPaul Mundt2:
2409895f942SPaul Mundt	dt	r7
2419895f942SPaul Mundt#ifdef CONFIG_CPU_LITTLE_ENDIAN
2429895f942SPaul MundtEX(	mov.l	@r5+,r0		)
2439895f942SPaul MundtEX(	mov.l	@r5+,r1		)
2449895f942SPaul MundtEX(	mov.l	@r5+,r8		)
2459895f942SPaul MundtEX(	mov.l	@r5+,r9		)
2469895f942SPaul MundtEX(	mov.l	@r5+,r10	)
2479895f942SPaul MundtEX(	mov.w	r0,@r4		)
2489895f942SPaul Mundt	add	#2,r4
2499895f942SPaul Mundt	xtrct	r1,r0
2509895f942SPaul Mundt	xtrct	r8,r1
2519895f942SPaul Mundt	xtrct	r9,r8
2529895f942SPaul Mundt	xtrct	r10,r9
2539895f942SPaul Mundt
2549895f942SPaul MundtEX(	mov.l	r0,@r4		)
2559895f942SPaul MundtEX(	mov.l	r1,@(4,r4)	)
2569895f942SPaul MundtEX(	mov.l	r8,@(8,r4)	)
2579895f942SPaul MundtEX(	mov.l	r9,@(12,r4)	)
2589895f942SPaul Mundt
2599895f942SPaul MundtEX(	mov.l	@r5+,r1		)
2609895f942SPaul MundtEX(	mov.l	@r5+,r8		)
2619895f942SPaul MundtEX(	mov.l	@r5+,r0		)
2629895f942SPaul Mundt	xtrct	r1,r10
2639895f942SPaul Mundt	xtrct	r8,r1
2649895f942SPaul Mundt	xtrct	r0,r8
2659895f942SPaul Mundt	shlr16	r0
2669895f942SPaul MundtEX(	mov.l	r10,@(16,r4)	)
2679895f942SPaul MundtEX(	mov.l	r1,@(20,r4)	)
2689895f942SPaul MundtEX(	mov.l	r8,@(24,r4)	)
2699895f942SPaul MundtEX(	mov.w	r0,@(28,r4)	)
2709895f942SPaul Mundt	bf/s	2b
2719895f942SPaul Mundt	 add	#30,r4
2729895f942SPaul Mundt#else
2739895f942SPaul MundtEX(	mov.l	@(28,r5),r0	)
2749895f942SPaul MundtEX(	mov.l	@(24,r5),r8	)
2759895f942SPaul MundtEX(	mov.l	@(20,r5),r9	)
2769895f942SPaul MundtEX(	mov.l	@(16,r5),r10	)
2779895f942SPaul MundtEX(	mov.w	r0,@(30,r4)	)
2789895f942SPaul Mundt	add	#-2,r4
2799895f942SPaul Mundt	xtrct	r8,r0
2809895f942SPaul Mundt	xtrct	r9,r8
2819895f942SPaul Mundt	xtrct	r10,r9
2829895f942SPaul MundtEX(	mov.l	r0,@(28,r4)	)
2839895f942SPaul MundtEX(	mov.l	r8,@(24,r4)	)
2849895f942SPaul MundtEX(	mov.l	r9,@(20,r4)	)
2859895f942SPaul Mundt
2869895f942SPaul MundtEX(	mov.l	@(12,r5),r0	)
2879895f942SPaul MundtEX(	mov.l	@(8,r5),r8	)
2889895f942SPaul Mundt	xtrct	r0,r10
2899895f942SPaul MundtEX(	mov.l	@(4,r5),r9	)
2909895f942SPaul Mundt	mov.l	r10,@(16,r4)
2919895f942SPaul MundtEX(	mov.l	@r5,r10		)
2929895f942SPaul Mundt	xtrct	r8,r0
2939895f942SPaul Mundt	xtrct	r9,r8
2949895f942SPaul Mundt	xtrct	r10,r9
2959895f942SPaul MundtEX(	mov.l	r0,@(12,r4)	)
2969895f942SPaul MundtEX(	mov.l	r8,@(8,r4)	)
2979895f942SPaul Mundt	swap.w	r10,r0
2989895f942SPaul MundtEX(	mov.l	r9,@(4,r4)	)
2999895f942SPaul MundtEX(	mov.w	r0,@(2,r4)	)
3009895f942SPaul Mundt
3019895f942SPaul Mundt	add	#32,r5
3029895f942SPaul Mundt	bf/s	2b
3039895f942SPaul Mundt	 add	#34,r4
3049895f942SPaul Mundt#endif
3059895f942SPaul Mundt	tst	r2,r2
3069895f942SPaul Mundt	bt	.L_cleanup
3079895f942SPaul Mundt
3089895f942SPaul Mundt1:	! Read longword, write two words per iteration
3099895f942SPaul MundtEX(	mov.l	@r5+,r0		)
3109895f942SPaul Mundt	dt	r2
3119895f942SPaul Mundt#ifdef CONFIG_CPU_LITTLE_ENDIAN
3129895f942SPaul MundtEX(	mov.w	r0,@r4		)
3139895f942SPaul Mundt	shlr16	r0
3149895f942SPaul MundtEX(	mov.w 	r0,@(2,r4)	)
3159895f942SPaul Mundt#else
3169895f942SPaul MundtEX(	mov.w	r0,@(2,r4)	)
3179895f942SPaul Mundt	shlr16	r0
3189895f942SPaul MundtEX(	mov.w	r0,@r4		)
3199895f942SPaul Mundt#endif
3209895f942SPaul Mundt	bf/s	1b
3219895f942SPaul Mundt	 add	#4,r4
3229895f942SPaul Mundt
3239895f942SPaul Mundt	bra	.L_cleanup
3249895f942SPaul Mundt	 nop
3259895f942SPaul Mundt
3269895f942SPaul Mundt! Destination = 01 or 11
3279895f942SPaul Mundt
3289895f942SPaul Mundt.L_dest01:
3299895f942SPaul Mundt.L_dest11:
3309895f942SPaul Mundt	! Read longword, write byte, word, byte per iteration
3319895f942SPaul MundtEX(	mov.l	@r5+,r0		)
3329895f942SPaul Mundt	dt	r2
3339895f942SPaul Mundt#ifdef CONFIG_CPU_LITTLE_ENDIAN
3349895f942SPaul MundtEX(	mov.b	r0,@r4		)
3359895f942SPaul Mundt	shlr8	r0
3369895f942SPaul Mundt	add	#1,r4
3379895f942SPaul MundtEX(	mov.w	r0,@r4		)
3389895f942SPaul Mundt	shlr16	r0
3399895f942SPaul MundtEX(	mov.b	r0,@(2,r4)	)
3409895f942SPaul Mundt	bf/s	.L_dest01
3419895f942SPaul Mundt	 add	#3,r4
3429895f942SPaul Mundt#else
3439895f942SPaul MundtEX(	mov.b	r0,@(3,r4)	)
3449895f942SPaul Mundt	shlr8	r0
3459895f942SPaul Mundt	swap.w	r0,r7
3469895f942SPaul MundtEX(	mov.b	r7,@r4		)
3479895f942SPaul Mundt	add	#1,r4
3489895f942SPaul MundtEX(	mov.w	r0,@r4		)
3499895f942SPaul Mundt	bf/s	.L_dest01
3509895f942SPaul Mundt	 add	#3,r4
3519895f942SPaul Mundt#endif
3529895f942SPaul Mundt
3539895f942SPaul Mundt! Cleanup last few bytes
3549895f942SPaul Mundt.L_cleanup:
3559895f942SPaul Mundt	mov	r6,r0
3569895f942SPaul Mundt	and	#3,r0
3579895f942SPaul Mundt	tst	r0,r0
3589895f942SPaul Mundt	bt	.L_exit
3599895f942SPaul Mundt	mov	r0,r6
3609895f942SPaul Mundt
3619895f942SPaul Mundt.L_cleanup_loop:
3629895f942SPaul MundtEX(	mov.b	@r5+,r0		)
3639895f942SPaul Mundt	dt	r6
3649895f942SPaul MundtEX(	mov.b	r0,@r4		)
3659895f942SPaul Mundt	bf/s	.L_cleanup_loop
3669895f942SPaul Mundt	 add	#1,r4
3679895f942SPaul Mundt
3689895f942SPaul Mundt.L_exit:
3699895f942SPaul Mundt	mov	#0,r0		! normal return
3709895f942SPaul Mundt
3719895f942SPaul Mundt5000:
3729895f942SPaul Mundt
3739895f942SPaul Mundt# Exception handler:
3749895f942SPaul Mundt.section .fixup, "ax"
3759895f942SPaul Mundt6000:
3769895f942SPaul Mundt	mov.l	8000f,r1
3779895f942SPaul Mundt	mov	r3,r0
3789895f942SPaul Mundt	jmp	@r1
3799895f942SPaul Mundt	 sub	r4,r0
3809895f942SPaul Mundt	.align	2
3819895f942SPaul Mundt8000:	.long	5000b
3829895f942SPaul Mundt
3839895f942SPaul Mundt.previous
3849895f942SPaul Mundt	mov.l	@r15+,r8
3859895f942SPaul Mundt	mov.l	@r15+,r9
3869895f942SPaul Mundt	mov.l	@r15+,r10
3879895f942SPaul Mundt	rts
3889895f942SPaul Mundt	 mov.l	@r15+,r11
389