xref: /openbmc/linux/arch/sh/lib/udivsi3_i4i-Os.S (revision 6197e5b7)
1/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
2 *
3 * Copyright (C) 2006 Free Software Foundation, Inc.
4 */
5
6/* Moderately Space-optimized libgcc routines for the Renesas SH /
7   STMicroelectronics ST40 CPUs.
8   Contributed by J"orn Rennecke joern.rennecke@st.com.  */
9
10/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
11   sh4-200 run times:
12   udiv small divisor: 55 cycles
13   udiv large divisor: 52 cycles
14   sdiv small divisor, positive result: 59 cycles
15   sdiv large divisor, positive result: 56 cycles
16   sdiv small divisor, negative result: 65 cycles (*)
17   sdiv large divisor, negative result: 62 cycles (*)
18   (*): r2 is restored in the rts delay slot and has a lingering latency
19        of two more cycles.  */
20	.balign 4
21	.global	__udivsi3_i4i
22	.global	__udivsi3_i4
23	.set	__udivsi3_i4, __udivsi3_i4i
24	.type	__udivsi3_i4i, @function
25	.type	__sdivsi3_i4i, @function
26__udivsi3_i4i:
27	sts pr,r1
28	mov.l r4,@-r15
29	extu.w r5,r0
30	cmp/eq r5,r0
31	swap.w r4,r0
32	shlr16 r4
33	bf/s large_divisor
34	div0u
35	mov.l r5,@-r15
36	shll16 r5
37sdiv_small_divisor:
38	div1 r5,r4
39	bsr div6
40	div1 r5,r4
41	div1 r5,r4
42	bsr div6
43	div1 r5,r4
44	xtrct r4,r0
45	xtrct r0,r4
46	bsr div7
47	swap.w r4,r4
48	div1 r5,r4
49	bsr div7
50	div1 r5,r4
51	xtrct r4,r0
52	mov.l @r15+,r5
53	swap.w r0,r0
54	mov.l @r15+,r4
55	jmp @r1
56	rotcl r0
57div7:
58	div1 r5,r4
59div6:
60	            div1 r5,r4; div1 r5,r4; div1 r5,r4
61	div1 r5,r4; div1 r5,r4; rts;        div1 r5,r4
62
63divx3:
64	rotcl r0
65	div1 r5,r4
66	rotcl r0
67	div1 r5,r4
68	rotcl r0
69	rts
70	div1 r5,r4
71
72large_divisor:
73	mov.l r5,@-r15
74sdiv_large_divisor:
75	xor r4,r0
76	.rept 4
77	rotcl r0
78	bsr divx3
79	div1 r5,r4
80	.endr
81	mov.l @r15+,r5
82	mov.l @r15+,r4
83	jmp @r1
84	rotcl r0
85
86	.global	__sdivsi3_i4i
87	.global __sdivsi3_i4
88	.global __sdivsi3
89	.set	__sdivsi3_i4, __sdivsi3_i4i
90	.set	__sdivsi3, __sdivsi3_i4i
91__sdivsi3_i4i:
92	mov.l r4,@-r15
93	cmp/pz r5
94	mov.l r5,@-r15
95	bt/s pos_divisor
96	cmp/pz r4
97	neg r5,r5
98	extu.w r5,r0
99	bt/s neg_result
100	cmp/eq r5,r0
101	neg r4,r4
102pos_result:
103	swap.w r4,r0
104	bra sdiv_check_divisor
105	sts pr,r1
106pos_divisor:
107	extu.w r5,r0
108	bt/s pos_result
109	cmp/eq r5,r0
110	neg r4,r4
111neg_result:
112	mova negate_result,r0
113	;
114	mov r0,r1
115	swap.w r4,r0
116	lds r2,macl
117	sts pr,r2
118sdiv_check_divisor:
119	shlr16 r4
120	bf/s sdiv_large_divisor
121	div0u
122	bra sdiv_small_divisor
123	shll16 r5
124	.balign 4
125negate_result:
126	neg r0,r0
127	jmp @r2
128	sts macl,r2
129