xref: /openbmc/linux/arch/sh/lib/copy_page.S (revision b2441318)
1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */
29895f942SPaul Mundt/*
39895f942SPaul Mundt * copy_page, __copy_user_page, __copy_user implementation of SuperH
49895f942SPaul Mundt *
59895f942SPaul Mundt * Copyright (C) 2001  Niibe Yutaka & Kaz Kojima
69895f942SPaul Mundt * Copyright (C) 2002  Toshinobu Sugioka
79895f942SPaul Mundt * Copyright (C) 2006  Paul Mundt
89895f942SPaul Mundt */
99895f942SPaul Mundt#include <linux/linkage.h>
109895f942SPaul Mundt#include <asm/page.h>
119895f942SPaul Mundt
129895f942SPaul Mundt/*
139895f942SPaul Mundt * copy_page
149895f942SPaul Mundt * @to: P1 address
159895f942SPaul Mundt * @from: P1 address
169895f942SPaul Mundt *
179895f942SPaul Mundt * void copy_page(void *to, void *from)
189895f942SPaul Mundt */
199895f942SPaul Mundt
209895f942SPaul Mundt/*
219895f942SPaul Mundt * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch
229895f942SPaul Mundt * r8 --- from + PAGE_SIZE
239895f942SPaul Mundt * r9 --- not used
249895f942SPaul Mundt * r10 --- to
259895f942SPaul Mundt * r11 --- from
269895f942SPaul Mundt */
279895f942SPaul MundtENTRY(copy_page)
289895f942SPaul Mundt	mov.l	r8,@-r15
299895f942SPaul Mundt	mov.l	r10,@-r15
309895f942SPaul Mundt	mov.l	r11,@-r15
319895f942SPaul Mundt	mov	r4,r10
329895f942SPaul Mundt	mov	r5,r11
339895f942SPaul Mundt	mov	r5,r8
34a2494b9bSPaul Mundt	mov	#(PAGE_SIZE >> 10), r0
35a2494b9bSPaul Mundt	shll8	r0
36a2494b9bSPaul Mundt	shll2	r0
379895f942SPaul Mundt	add	r0,r8
389895f942SPaul Mundt	!
399895f942SPaul Mundt1:	mov.l	@r11+,r0
409895f942SPaul Mundt	mov.l	@r11+,r1
419895f942SPaul Mundt	mov.l	@r11+,r2
429895f942SPaul Mundt	mov.l	@r11+,r3
439895f942SPaul Mundt	mov.l	@r11+,r4
449895f942SPaul Mundt	mov.l	@r11+,r5
459895f942SPaul Mundt	mov.l	@r11+,r6
469895f942SPaul Mundt	mov.l	@r11+,r7
475c0cbd74SPaul Mundt#if defined(CONFIG_CPU_SH4)
489895f942SPaul Mundt	movca.l	r0,@r10
495c0cbd74SPaul Mundt#else
505c0cbd74SPaul Mundt	mov.l	r0,@r10
519895f942SPaul Mundt#endif
529895f942SPaul Mundt	add	#32,r10
539895f942SPaul Mundt	mov.l	r7,@-r10
549895f942SPaul Mundt	mov.l	r6,@-r10
559895f942SPaul Mundt	mov.l	r5,@-r10
569895f942SPaul Mundt	mov.l	r4,@-r10
579895f942SPaul Mundt	mov.l	r3,@-r10
589895f942SPaul Mundt	mov.l	r2,@-r10
599895f942SPaul Mundt	mov.l	r1,@-r10
609895f942SPaul Mundt	cmp/eq	r11,r8
619895f942SPaul Mundt	bf/s	1b
629895f942SPaul Mundt	 add	#28,r10
639895f942SPaul Mundt	!
649895f942SPaul Mundt	mov.l	@r15+,r11
659895f942SPaul Mundt	mov.l	@r15+,r10
669895f942SPaul Mundt	mov.l	@r15+,r8
679895f942SPaul Mundt	rts
689895f942SPaul Mundt	 nop
699895f942SPaul Mundt
709895f942SPaul Mundt/*
719895f942SPaul Mundt * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n);
729895f942SPaul Mundt * Return the number of bytes NOT copied
739895f942SPaul Mundt */
749895f942SPaul Mundt#define EX(...)			\
759895f942SPaul Mundt	9999: __VA_ARGS__ ;		\
769895f942SPaul Mundt	.section __ex_table, "a";	\
779895f942SPaul Mundt	.long 9999b, 6000f	;	\
789895f942SPaul Mundt	.previous
795d52013cSStuart MENEFY#define EX_NO_POP(...)			\
805d52013cSStuart MENEFY	9999: __VA_ARGS__ ;		\
815d52013cSStuart MENEFY	.section __ex_table, "a";	\
825d52013cSStuart MENEFY	.long 9999b, 6005f	;	\
835d52013cSStuart MENEFY	.previous
849895f942SPaul MundtENTRY(__copy_user)
859895f942SPaul Mundt	! Check if small number of bytes
869895f942SPaul Mundt	mov	#11,r0
879895f942SPaul Mundt	mov	r4,r3
889895f942SPaul Mundt	cmp/gt	r0,r6		! r6 (len) > r0 (11)
899895f942SPaul Mundt	bf/s	.L_cleanup_loop_no_pop
909895f942SPaul Mundt	 add	r6,r3		! last destination address
919895f942SPaul Mundt
929895f942SPaul Mundt	! Calculate bytes needed to align to src
939895f942SPaul Mundt	mov.l	r11,@-r15
949895f942SPaul Mundt	neg	r5,r0
959895f942SPaul Mundt	mov.l	r10,@-r15
969895f942SPaul Mundt	add	#4,r0
979895f942SPaul Mundt	mov.l	r9,@-r15
989895f942SPaul Mundt	and	#3,r0
999895f942SPaul Mundt	mov.l	r8,@-r15
1009895f942SPaul Mundt	tst	r0,r0
1019895f942SPaul Mundt	bt	2f
1029895f942SPaul Mundt
1039895f942SPaul Mundt1:
1049895f942SPaul Mundt	! Copy bytes to long word align src
1059895f942SPaul MundtEX(	mov.b	@r5+,r1		)
1069895f942SPaul Mundt	dt	r0
1079895f942SPaul Mundt	add	#-1,r6
1089895f942SPaul MundtEX(	mov.b	r1,@r4		)
1099895f942SPaul Mundt	bf/s	1b
1109895f942SPaul Mundt	 add	#1,r4
1119895f942SPaul Mundt
1129895f942SPaul Mundt	! Jump to appropriate routine depending on dest
1139895f942SPaul Mundt2:	mov	#3,r1
1149895f942SPaul Mundt	mov	r6, r2
1159895f942SPaul Mundt	and	r4,r1
1169895f942SPaul Mundt	shlr2	r2
1179895f942SPaul Mundt	shll2	r1
1189895f942SPaul Mundt	mova	.L_jump_tbl,r0
1199895f942SPaul Mundt	mov.l	@(r0,r1),r1
1209895f942SPaul Mundt	jmp	@r1
1219895f942SPaul Mundt	 nop
1229895f942SPaul Mundt
1239895f942SPaul Mundt	.align 2
1249895f942SPaul Mundt.L_jump_tbl:
1259895f942SPaul Mundt	.long	.L_dest00
1269895f942SPaul Mundt	.long	.L_dest01
1279895f942SPaul Mundt	.long	.L_dest10
1289895f942SPaul Mundt	.long	.L_dest11
1299895f942SPaul Mundt
1309895f942SPaul Mundt/*
1319895f942SPaul Mundt * Come here if there are less than 12 bytes to copy
1329895f942SPaul Mundt *
1339895f942SPaul Mundt * Keep the branch target close, so the bf/s callee doesn't overflow
1349895f942SPaul Mundt * and result in a more expensive branch being inserted. This is the
1359895f942SPaul Mundt * fast-path for small copies, the jump via the jump table will hit the
1369895f942SPaul Mundt * default slow-path cleanup. -PFM.
1379895f942SPaul Mundt */
1389895f942SPaul Mundt.L_cleanup_loop_no_pop:
1399895f942SPaul Mundt	tst	r6,r6		! Check explicitly for zero
1409895f942SPaul Mundt	bt	1f
1419895f942SPaul Mundt
1429895f942SPaul Mundt2:
1435d52013cSStuart MENEFYEX_NO_POP(	mov.b	@r5+,r0		)
1449895f942SPaul Mundt	dt	r6
1455d52013cSStuart MENEFYEX_NO_POP(	mov.b	r0,@r4		)
1469895f942SPaul Mundt	bf/s	2b
1479895f942SPaul Mundt	 add	#1,r4
1489895f942SPaul Mundt
1499895f942SPaul Mundt1:	mov	#0,r0		! normal return
1509895f942SPaul Mundt5000:
1519895f942SPaul Mundt
1529895f942SPaul Mundt# Exception handler:
1539895f942SPaul Mundt.section .fixup, "ax"
1545d52013cSStuart MENEFY6005:
1559895f942SPaul Mundt	mov.l	8000f,r1
1569895f942SPaul Mundt	mov	r3,r0
1579895f942SPaul Mundt	jmp	@r1
1589895f942SPaul Mundt	 sub	r4,r0
1599895f942SPaul Mundt	.align	2
1609895f942SPaul Mundt8000:	.long	5000b
1619895f942SPaul Mundt
1629895f942SPaul Mundt.previous
1639895f942SPaul Mundt	rts
1649895f942SPaul Mundt	 nop
1659895f942SPaul Mundt
1669895f942SPaul Mundt! Destination = 00
1679895f942SPaul Mundt
1689895f942SPaul Mundt.L_dest00:
1699895f942SPaul Mundt	! Skip the large copy for small transfers
1709895f942SPaul Mundt	mov	#(32+32-4), r0
1719895f942SPaul Mundt	cmp/gt	r6, r0		! r0 (60) > r6 (len)
1729895f942SPaul Mundt	bt	1f
1739895f942SPaul Mundt
1749895f942SPaul Mundt	! Align dest to a 32 byte boundary
1759895f942SPaul Mundt	neg	r4,r0
1769895f942SPaul Mundt	add	#0x20, r0
1779895f942SPaul Mundt	and	#0x1f, r0
1789895f942SPaul Mundt	tst	r0, r0
1799895f942SPaul Mundt	bt	2f
1809895f942SPaul Mundt
1819895f942SPaul Mundt	sub	r0, r6
1829895f942SPaul Mundt	shlr2	r0
1839895f942SPaul Mundt3:
1849895f942SPaul MundtEX(	mov.l	@r5+,r1		)
1859895f942SPaul Mundt	dt	r0
1869895f942SPaul MundtEX(	mov.l	r1,@r4		)
1879895f942SPaul Mundt	bf/s	3b
1889895f942SPaul Mundt	 add	#4,r4
1899895f942SPaul Mundt
1909895f942SPaul Mundt2:
1919895f942SPaul MundtEX(	mov.l	@r5+,r0		)
1929895f942SPaul MundtEX(	mov.l	@r5+,r1		)
1939895f942SPaul MundtEX(	mov.l	@r5+,r2		)
1949895f942SPaul MundtEX(	mov.l	@r5+,r7		)
1959895f942SPaul MundtEX(	mov.l	@r5+,r8		)
1969895f942SPaul MundtEX(	mov.l	@r5+,r9		)
1979895f942SPaul MundtEX(	mov.l	@r5+,r10	)
1989895f942SPaul MundtEX(	mov.l	@r5+,r11	)
1999895f942SPaul Mundt#ifdef CONFIG_CPU_SH4
2009895f942SPaul MundtEX(	movca.l	r0,@r4		)
2019895f942SPaul Mundt#else
2029895f942SPaul MundtEX(	mov.l	r0,@r4		)
2039895f942SPaul Mundt#endif
2049895f942SPaul Mundt	add	#-32, r6
2059895f942SPaul MundtEX(	mov.l	r1,@(4,r4)	)
2069895f942SPaul Mundt	mov	#32, r0
2079895f942SPaul MundtEX(	mov.l	r2,@(8,r4)	)
2089895f942SPaul Mundt	cmp/gt	r6, r0		! r0 (32) > r6 (len)
2099895f942SPaul MundtEX(	mov.l	r7,@(12,r4)	)
2109895f942SPaul MundtEX(	mov.l	r8,@(16,r4)	)
2119895f942SPaul MundtEX(	mov.l	r9,@(20,r4)	)
2129895f942SPaul MundtEX(	mov.l	r10,@(24,r4)	)
2139895f942SPaul MundtEX(	mov.l	r11,@(28,r4)	)
2149895f942SPaul Mundt	bf/s	2b
2159895f942SPaul Mundt	 add	#32,r4
2169895f942SPaul Mundt
2179895f942SPaul Mundt1:	mov	r6, r0
2189895f942SPaul Mundt	shlr2	r0
2199895f942SPaul Mundt	tst	r0, r0
2209895f942SPaul Mundt	bt	.L_cleanup
2219895f942SPaul Mundt1:
2229895f942SPaul MundtEX(	mov.l	@r5+,r1		)
2239895f942SPaul Mundt	dt	r0
2249895f942SPaul MundtEX(	mov.l	r1,@r4		)
2259895f942SPaul Mundt	bf/s	1b
2269895f942SPaul Mundt	 add	#4,r4
2279895f942SPaul Mundt
2289895f942SPaul Mundt	bra	.L_cleanup
2299895f942SPaul Mundt	 nop
2309895f942SPaul Mundt
2319895f942SPaul Mundt! Destination = 10
2329895f942SPaul Mundt
2339895f942SPaul Mundt.L_dest10:
2349895f942SPaul Mundt	mov	r2,r7
2359895f942SPaul Mundt	shlr2	r7
2369895f942SPaul Mundt	shlr	r7
2379895f942SPaul Mundt	tst	r7,r7
2389895f942SPaul Mundt	mov	#7,r0
2399895f942SPaul Mundt	bt/s	1f
2409895f942SPaul Mundt	 and	r0,r2
2419895f942SPaul Mundt2:
2429895f942SPaul Mundt	dt	r7
2439895f942SPaul Mundt#ifdef CONFIG_CPU_LITTLE_ENDIAN
2449895f942SPaul MundtEX(	mov.l	@r5+,r0		)
2459895f942SPaul MundtEX(	mov.l	@r5+,r1		)
2469895f942SPaul MundtEX(	mov.l	@r5+,r8		)
2479895f942SPaul MundtEX(	mov.l	@r5+,r9		)
2489895f942SPaul MundtEX(	mov.l	@r5+,r10	)
2499895f942SPaul MundtEX(	mov.w	r0,@r4		)
2509895f942SPaul Mundt	add	#2,r4
2519895f942SPaul Mundt	xtrct	r1,r0
2529895f942SPaul Mundt	xtrct	r8,r1
2539895f942SPaul Mundt	xtrct	r9,r8
2549895f942SPaul Mundt	xtrct	r10,r9
2559895f942SPaul Mundt
2569895f942SPaul MundtEX(	mov.l	r0,@r4		)
2579895f942SPaul MundtEX(	mov.l	r1,@(4,r4)	)
2589895f942SPaul MundtEX(	mov.l	r8,@(8,r4)	)
2599895f942SPaul MundtEX(	mov.l	r9,@(12,r4)	)
2609895f942SPaul Mundt
2619895f942SPaul MundtEX(	mov.l	@r5+,r1		)
2629895f942SPaul MundtEX(	mov.l	@r5+,r8		)
2639895f942SPaul MundtEX(	mov.l	@r5+,r0		)
2649895f942SPaul Mundt	xtrct	r1,r10
2659895f942SPaul Mundt	xtrct	r8,r1
2669895f942SPaul Mundt	xtrct	r0,r8
2679895f942SPaul Mundt	shlr16	r0
2689895f942SPaul MundtEX(	mov.l	r10,@(16,r4)	)
2699895f942SPaul MundtEX(	mov.l	r1,@(20,r4)	)
2709895f942SPaul MundtEX(	mov.l	r8,@(24,r4)	)
2719895f942SPaul MundtEX(	mov.w	r0,@(28,r4)	)
2729895f942SPaul Mundt	bf/s	2b
2739895f942SPaul Mundt	 add	#30,r4
2749895f942SPaul Mundt#else
2759895f942SPaul MundtEX(	mov.l	@(28,r5),r0	)
2769895f942SPaul MundtEX(	mov.l	@(24,r5),r8	)
2779895f942SPaul MundtEX(	mov.l	@(20,r5),r9	)
2789895f942SPaul MundtEX(	mov.l	@(16,r5),r10	)
2799895f942SPaul MundtEX(	mov.w	r0,@(30,r4)	)
2809895f942SPaul Mundt	add	#-2,r4
2819895f942SPaul Mundt	xtrct	r8,r0
2829895f942SPaul Mundt	xtrct	r9,r8
2839895f942SPaul Mundt	xtrct	r10,r9
2849895f942SPaul MundtEX(	mov.l	r0,@(28,r4)	)
2859895f942SPaul MundtEX(	mov.l	r8,@(24,r4)	)
2869895f942SPaul MundtEX(	mov.l	r9,@(20,r4)	)
2879895f942SPaul Mundt
2889895f942SPaul MundtEX(	mov.l	@(12,r5),r0	)
2899895f942SPaul MundtEX(	mov.l	@(8,r5),r8	)
2909895f942SPaul Mundt	xtrct	r0,r10
2919895f942SPaul MundtEX(	mov.l	@(4,r5),r9	)
2929895f942SPaul Mundt	mov.l	r10,@(16,r4)
2939895f942SPaul MundtEX(	mov.l	@r5,r10		)
2949895f942SPaul Mundt	xtrct	r8,r0
2959895f942SPaul Mundt	xtrct	r9,r8
2969895f942SPaul Mundt	xtrct	r10,r9
2979895f942SPaul MundtEX(	mov.l	r0,@(12,r4)	)
2989895f942SPaul MundtEX(	mov.l	r8,@(8,r4)	)
2999895f942SPaul Mundt	swap.w	r10,r0
3009895f942SPaul MundtEX(	mov.l	r9,@(4,r4)	)
3019895f942SPaul MundtEX(	mov.w	r0,@(2,r4)	)
3029895f942SPaul Mundt
3039895f942SPaul Mundt	add	#32,r5
3049895f942SPaul Mundt	bf/s	2b
3059895f942SPaul Mundt	 add	#34,r4
3069895f942SPaul Mundt#endif
3079895f942SPaul Mundt	tst	r2,r2
3089895f942SPaul Mundt	bt	.L_cleanup
3099895f942SPaul Mundt
3109895f942SPaul Mundt1:	! Read longword, write two words per iteration
3119895f942SPaul MundtEX(	mov.l	@r5+,r0		)
3129895f942SPaul Mundt	dt	r2
3139895f942SPaul Mundt#ifdef CONFIG_CPU_LITTLE_ENDIAN
3149895f942SPaul MundtEX(	mov.w	r0,@r4		)
3159895f942SPaul Mundt	shlr16	r0
3169895f942SPaul MundtEX(	mov.w 	r0,@(2,r4)	)
3179895f942SPaul Mundt#else
3189895f942SPaul MundtEX(	mov.w	r0,@(2,r4)	)
3199895f942SPaul Mundt	shlr16	r0
3209895f942SPaul MundtEX(	mov.w	r0,@r4		)
3219895f942SPaul Mundt#endif
3229895f942SPaul Mundt	bf/s	1b
3239895f942SPaul Mundt	 add	#4,r4
3249895f942SPaul Mundt
3259895f942SPaul Mundt	bra	.L_cleanup
3269895f942SPaul Mundt	 nop
3279895f942SPaul Mundt
3289895f942SPaul Mundt! Destination = 01 or 11
3299895f942SPaul Mundt
3309895f942SPaul Mundt.L_dest01:
3319895f942SPaul Mundt.L_dest11:
3329895f942SPaul Mundt	! Read longword, write byte, word, byte per iteration
3339895f942SPaul MundtEX(	mov.l	@r5+,r0		)
3349895f942SPaul Mundt	dt	r2
3359895f942SPaul Mundt#ifdef CONFIG_CPU_LITTLE_ENDIAN
3369895f942SPaul MundtEX(	mov.b	r0,@r4		)
3379895f942SPaul Mundt	shlr8	r0
3389895f942SPaul Mundt	add	#1,r4
3399895f942SPaul MundtEX(	mov.w	r0,@r4		)
3409895f942SPaul Mundt	shlr16	r0
3419895f942SPaul MundtEX(	mov.b	r0,@(2,r4)	)
3429895f942SPaul Mundt	bf/s	.L_dest01
3439895f942SPaul Mundt	 add	#3,r4
3449895f942SPaul Mundt#else
3459895f942SPaul MundtEX(	mov.b	r0,@(3,r4)	)
3469895f942SPaul Mundt	shlr8	r0
3479895f942SPaul Mundt	swap.w	r0,r7
3489895f942SPaul MundtEX(	mov.b	r7,@r4		)
3499895f942SPaul Mundt	add	#1,r4
3509895f942SPaul MundtEX(	mov.w	r0,@r4		)
3519895f942SPaul Mundt	bf/s	.L_dest01
3529895f942SPaul Mundt	 add	#3,r4
3539895f942SPaul Mundt#endif
3549895f942SPaul Mundt
3559895f942SPaul Mundt! Cleanup last few bytes
3569895f942SPaul Mundt.L_cleanup:
3579895f942SPaul Mundt	mov	r6,r0
3589895f942SPaul Mundt	and	#3,r0
3599895f942SPaul Mundt	tst	r0,r0
3609895f942SPaul Mundt	bt	.L_exit
3619895f942SPaul Mundt	mov	r0,r6
3629895f942SPaul Mundt
3639895f942SPaul Mundt.L_cleanup_loop:
3649895f942SPaul MundtEX(	mov.b	@r5+,r0		)
3659895f942SPaul Mundt	dt	r6
3669895f942SPaul MundtEX(	mov.b	r0,@r4		)
3679895f942SPaul Mundt	bf/s	.L_cleanup_loop
3689895f942SPaul Mundt	 add	#1,r4
3699895f942SPaul Mundt
3709895f942SPaul Mundt.L_exit:
3719895f942SPaul Mundt	mov	#0,r0		! normal return
3729895f942SPaul Mundt
3739895f942SPaul Mundt5000:
3749895f942SPaul Mundt
3759895f942SPaul Mundt# Exception handler:
3769895f942SPaul Mundt.section .fixup, "ax"
3779895f942SPaul Mundt6000:
3789895f942SPaul Mundt	mov.l	8000f,r1
3799895f942SPaul Mundt	mov	r3,r0
3809895f942SPaul Mundt	jmp	@r1
3819895f942SPaul Mundt	 sub	r4,r0
3829895f942SPaul Mundt	.align	2
3839895f942SPaul Mundt8000:	.long	5000b
3849895f942SPaul Mundt
3859895f942SPaul Mundt.previous
3869895f942SPaul Mundt	mov.l	@r15+,r8
3879895f942SPaul Mundt	mov.l	@r15+,r9
3889895f942SPaul Mundt	mov.l	@r15+,r10
3899895f942SPaul Mundt	rts
3909895f942SPaul Mundt	 mov.l	@r15+,r11
391