xref: /openbmc/linux/arch/arm/lib/div64.S (revision f3a8b664)
1/*
2 *  linux/arch/arm/lib/div64.S
3 *
4 *  Optimized computation of 64-bit dividend / 32-bit divisor
5 *
6 *  Author:	Nicolas Pitre
7 *  Created:	Oct 5, 2003
8 *  Copyright:	Monta Vista Software, Inc.
9 *
10 *  This program is free software; you can redistribute it and/or modify
11 *  it under the terms of the GNU General Public License version 2 as
12 *  published by the Free Software Foundation.
13 */
14
15#include <linux/linkage.h>
16#include <asm/assembler.h>
17#include <asm/unwind.h>
18#include <asm/export.h>
19
20#ifdef __ARMEB__
21#define xh r0
22#define xl r1
23#define yh r2
24#define yl r3
25#else
26#define xl r0
27#define xh r1
28#define yl r2
29#define yh r3
30#endif
31
32/*
33 * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
34 *
35 * Note: Calling convention is totally non standard for optimal code.
36 *       This is meant to be used by do_div() from include/asm/div64.h only.
37 *
38 * Input parameters:
39 * 	xh-xl	= dividend (clobbered)
40 * 	r4	= divisor (preserved)
41 *
42 * Output values:
43 * 	yh-yl	= result
44 * 	xh	= remainder
45 *
46 * Clobbered regs: xl, ip
47 */
48
49ENTRY(__do_div64)
50UNWIND(.fnstart)
51
52	@ Test for easy paths first.
53	subs	ip, r4, #1
54	bls	9f			@ divisor is 0 or 1
55	tst	ip, r4
56	beq	8f			@ divisor is power of 2
57
58	@ See if we need to handle upper 32-bit result.
59	cmp	xh, r4
60	mov	yh, #0
61	blo	3f
62
63	@ Align divisor with upper part of dividend.
64	@ The aligned divisor is stored in yl preserving the original.
65	@ The bit position is stored in ip.
66
67#if __LINUX_ARM_ARCH__ >= 5
68
69	clz	yl, r4
70	clz	ip, xh
71	sub	yl, yl, ip
72	mov	ip, #1
73	mov	ip, ip, lsl yl
74	mov	yl, r4, lsl yl
75
76#else
77
78	mov	yl, r4
79	mov	ip, #1
801:	cmp	yl, #0x80000000
81	cmpcc	yl, xh
82	movcc	yl, yl, lsl #1
83	movcc	ip, ip, lsl #1
84	bcc	1b
85
86#endif
87
88	@ The division loop for needed upper bit positions.
89 	@ Break out early if dividend reaches 0.
902:	cmp	xh, yl
91	orrcs	yh, yh, ip
92	subcss	xh, xh, yl
93	movnes	ip, ip, lsr #1
94	mov	yl, yl, lsr #1
95	bne	2b
96
97	@ See if we need to handle lower 32-bit result.
983:	cmp	xh, #0
99	mov	yl, #0
100	cmpeq	xl, r4
101	movlo	xh, xl
102	retlo	lr
103
104	@ The division loop for lower bit positions.
105	@ Here we shift remainer bits leftwards rather than moving the
106	@ divisor for comparisons, considering the carry-out bit as well.
107	mov	ip, #0x80000000
1084:	movs	xl, xl, lsl #1
109	adcs	xh, xh, xh
110	beq	6f
111	cmpcc	xh, r4
1125:	orrcs	yl, yl, ip
113	subcs	xh, xh, r4
114	movs	ip, ip, lsr #1
115	bne	4b
116	ret	lr
117
118	@ The top part of remainder became zero.  If carry is set
119	@ (the 33th bit) this is a false positive so resume the loop.
120	@ Otherwise, if lower part is also null then we are done.
1216:	bcs	5b
122	cmp	xl, #0
123	reteq	lr
124
125	@ We still have remainer bits in the low part.  Bring them up.
126
127#if __LINUX_ARM_ARCH__ >= 5
128
129	clz	xh, xl			@ we know xh is zero here so...
130	add	xh, xh, #1
131	mov	xl, xl, lsl xh
132	mov	ip, ip, lsr xh
133
134#else
135
1367:	movs	xl, xl, lsl #1
137	mov	ip, ip, lsr #1
138	bcc	7b
139
140#endif
141
142	@ Current remainder is now 1.  It is worthless to compare with
143	@ divisor at this point since divisor can not be smaller than 3 here.
144	@ If possible, branch for another shift in the division loop.
145	@ If no bit position left then we are done.
146	movs	ip, ip, lsr #1
147	mov	xh, #1
148	bne	4b
149	ret	lr
150
1518:	@ Division by a power of 2: determine what that divisor order is
152	@ then simply shift values around
153
154#if __LINUX_ARM_ARCH__ >= 5
155
156	clz	ip, r4
157	rsb	ip, ip, #31
158
159#else
160
161	mov	yl, r4
162	cmp	r4, #(1 << 16)
163	mov	ip, #0
164	movhs	yl, yl, lsr #16
165	movhs	ip, #16
166
167	cmp	yl, #(1 << 8)
168	movhs	yl, yl, lsr #8
169	addhs	ip, ip, #8
170
171	cmp	yl, #(1 << 4)
172	movhs	yl, yl, lsr #4
173	addhs	ip, ip, #4
174
175	cmp	yl, #(1 << 2)
176	addhi	ip, ip, #3
177	addls	ip, ip, yl, lsr #1
178
179#endif
180
181	mov	yh, xh, lsr ip
182	mov	yl, xl, lsr ip
183	rsb	ip, ip, #32
184 ARM(	orr	yl, yl, xh, lsl ip	)
185 THUMB(	lsl	xh, xh, ip		)
186 THUMB(	orr	yl, yl, xh		)
187	mov	xh, xl, lsl ip
188	mov	xh, xh, lsr ip
189	ret	lr
190
191	@ eq -> division by 1: obvious enough...
1929:	moveq	yl, xl
193	moveq	yh, xh
194	moveq	xh, #0
195	reteq	lr
196UNWIND(.fnend)
197
198UNWIND(.fnstart)
199UNWIND(.pad #4)
200UNWIND(.save {lr})
201Ldiv0_64:
202	@ Division by 0:
203	str	lr, [sp, #-8]!
204	bl	__div0
205
206	@ as wrong as it could be...
207	mov	yl, #0
208	mov	yh, #0
209	mov	xh, #0
210	ldr	pc, [sp], #8
211
212UNWIND(.fnend)
213ENDPROC(__do_div64)
214EXPORT_SYMBOL(__do_div64)
215