xref: /openbmc/linux/arch/x86/math-emu/div_Xsig.S (revision 03ab8e6297acd1bc0eedaa050e2a1635c576fd11)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 	.file	"div_Xsig.S"
3 /*---------------------------------------------------------------------------+
4  |  div_Xsig.S                                                               |
5  |                                                                           |
6  | Division subroutine for 96 bit quantities                                 |
7  |                                                                           |
8  | Copyright (C) 1994,1995                                                   |
9  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
10  |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
11  |                                                                           |
12  |                                                                           |
13  +---------------------------------------------------------------------------*/
14 
15 /*---------------------------------------------------------------------------+
16  | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and  |
17  | put the 96 bit result at the location d.                                  |
18  |                                                                           |
19  | The result may not be accurate to 96 bits. It is intended for use where   |
20  | a result better than 64 bits is required. The result should usually be    |
21  | good to at least 94 bits.                                                 |
22  | The returned result is actually divided by one half. This is done to      |
23  | prevent overflow.                                                         |
24  |                                                                           |
25  |  .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb  ->  .dddddddddddd                      |
26  |                                                                           |
27  |  void div_Xsig(Xsig *a, Xsig *b, Xsig *dest)                              |
28  |                                                                           |
29  +---------------------------------------------------------------------------*/
30 
31 #include "exception.h"
32 #include "fpu_emu.h"
33 
34 
35 #define	XsigLL(x)	(x)
36 #define	XsigL(x)	4(x)
37 #define	XsigH(x)	8(x)
38 
39 
40 #ifndef NON_REENTRANT_FPU
41 /*
42 	Local storage on the stack:
43 	Accumulator:	FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
44  */
45 #define FPU_accum_3	-4(%ebp)
46 #define FPU_accum_2	-8(%ebp)
47 #define FPU_accum_1	-12(%ebp)
48 #define FPU_accum_0	-16(%ebp)
49 #define FPU_result_3	-20(%ebp)
50 #define FPU_result_2	-24(%ebp)
51 #define FPU_result_1	-28(%ebp)
52 
53 #else
54 .data
55 /*
56 	Local storage in a static area:
57 	Accumulator:	FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
58  */
59 	.align 4,0
60 FPU_accum_3:
61 	.long	0
62 FPU_accum_2:
63 	.long	0
64 FPU_accum_1:
65 	.long	0
66 FPU_accum_0:
67 	.long	0
68 FPU_result_3:
69 	.long	0
70 FPU_result_2:
71 	.long	0
72 FPU_result_1:
73 	.long	0
74 #endif /* NON_REENTRANT_FPU */
75 
76 
77 .text
78 SYM_FUNC_START(div_Xsig)
79 	pushl	%ebp
80 	movl	%esp,%ebp
81 #ifndef NON_REENTRANT_FPU
82 	subl	$28,%esp
83 #endif /* NON_REENTRANT_FPU */
84 
85 	pushl	%esi
86 	pushl	%edi
87 	pushl	%ebx
88 
89 	movl	PARAM1,%esi	/* pointer to num */
90 	movl	PARAM2,%ebx	/* pointer to denom */
91 
92 #ifdef PARANOID
93 	testl	$0x80000000, XsigH(%ebx)	/* Divisor */
94 	je	L_bugged
95 #endif /* PARANOID */
96 
97 
98 /*---------------------------------------------------------------------------+
99  |  Divide:   Return  arg1/arg2 to arg3.                                     |
100  |                                                                           |
101  |  The maximum returned value is (ignoring exponents)                       |
102  |               .ffffffff ffffffff                                          |
103  |               ------------------  =  1.ffffffff fffffffe                  |
104  |               .80000000 00000000                                          |
105  | and the minimum is                                                        |
106  |               .80000000 00000000                                          |
107  |               ------------------  =  .80000000 00000001   (rounded)       |
108  |               .ffffffff ffffffff                                          |
109  |                                                                           |
110  +---------------------------------------------------------------------------*/
111 
112 	/* Save extended dividend in local register */
113 
114 	/* Divide by 2 to prevent overflow */
115 	clc
116 	movl	XsigH(%esi),%eax
117 	rcrl	%eax
118 	movl	%eax,FPU_accum_3
119 	movl	XsigL(%esi),%eax
120 	rcrl	%eax
121 	movl	%eax,FPU_accum_2
122 	movl	XsigLL(%esi),%eax
123 	rcrl	%eax
124 	movl	%eax,FPU_accum_1
125 	movl	$0,%eax
126 	rcrl	%eax
127 	movl	%eax,FPU_accum_0
128 
129 	movl	FPU_accum_2,%eax	/* Get the current num */
130 	movl	FPU_accum_3,%edx
131 
132 /*----------------------------------------------------------------------*/
133 /* Initialization done.
134    Do the first 32 bits. */
135 
136 	/* We will divide by a number which is too large */
137 	movl	XsigH(%ebx),%ecx
138 	addl	$1,%ecx
139 	jnc	LFirst_div_not_1
140 
141 	/* here we need to divide by 100000000h,
142 	   i.e., no division at all.. */
143 	mov	%edx,%eax
144 	jmp	LFirst_div_done
145 
146 LFirst_div_not_1:
147 	divl	%ecx		/* Divide the numerator by the augmented
148 				   denom ms dw */
149 
150 LFirst_div_done:
151 	movl	%eax,FPU_result_3	/* Put the result in the answer */
152 
153 	mull	XsigH(%ebx)	/* mul by the ms dw of the denom */
154 
155 	subl	%eax,FPU_accum_2	/* Subtract from the num local reg */
156 	sbbl	%edx,FPU_accum_3
157 
158 	movl	FPU_result_3,%eax	/* Get the result back */
159 	mull	XsigL(%ebx)	/* now mul the ls dw of the denom */
160 
161 	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
162 	sbbl	%edx,FPU_accum_2
163 	sbbl	$0,FPU_accum_3
164 	je	LDo_2nd_32_bits		/* Must check for non-zero result here */
165 
166 #ifdef PARANOID
167 	jb	L_bugged_1
168 #endif /* PARANOID */
169 
170 	/* need to subtract another once of the denom */
171 	incl	FPU_result_3	/* Correct the answer */
172 
173 	movl	XsigL(%ebx),%eax
174 	movl	XsigH(%ebx),%edx
175 	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
176 	sbbl	%edx,FPU_accum_2
177 
178 #ifdef PARANOID
179 	sbbl	$0,FPU_accum_3
180 	jne	L_bugged_1	/* Must check for non-zero result here */
181 #endif /* PARANOID */
182 
183 /*----------------------------------------------------------------------*/
184 /* Half of the main problem is done, there is just a reduced numerator
185    to handle now.
186    Work with the second 32 bits, FPU_accum_0 not used from now on */
187 LDo_2nd_32_bits:
188 	movl	FPU_accum_2,%edx	/* get the reduced num */
189 	movl	FPU_accum_1,%eax
190 
191 	/* need to check for possible subsequent overflow */
192 	cmpl	XsigH(%ebx),%edx
193 	jb	LDo_2nd_div
194 	ja	LPrevent_2nd_overflow
195 
196 	cmpl	XsigL(%ebx),%eax
197 	jb	LDo_2nd_div
198 
199 LPrevent_2nd_overflow:
200 /* The numerator is greater or equal, would cause overflow */
201 	/* prevent overflow */
202 	subl	XsigL(%ebx),%eax
203 	sbbl	XsigH(%ebx),%edx
204 	movl	%edx,FPU_accum_2
205 	movl	%eax,FPU_accum_1
206 
207 	incl	FPU_result_3	/* Reflect the subtraction in the answer */
208 
209 #ifdef PARANOID
210 	je	L_bugged_2	/* Can't bump the result to 1.0 */
211 #endif /* PARANOID */
212 
213 LDo_2nd_div:
214 	cmpl	$0,%ecx		/* augmented denom msw */
215 	jnz	LSecond_div_not_1
216 
217 	/* %ecx == 0, we are dividing by 1.0 */
218 	mov	%edx,%eax
219 	jmp	LSecond_div_done
220 
221 LSecond_div_not_1:
222 	divl	%ecx		/* Divide the numerator by the denom ms dw */
223 
224 LSecond_div_done:
225 	movl	%eax,FPU_result_2	/* Put the result in the answer */
226 
227 	mull	XsigH(%ebx)	/* mul by the ms dw of the denom */
228 
229 	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
230 	sbbl	%edx,FPU_accum_2
231 
232 #ifdef PARANOID
233 	jc	L_bugged_2
234 #endif /* PARANOID */
235 
236 	movl	FPU_result_2,%eax	/* Get the result back */
237 	mull	XsigL(%ebx)	/* now mul the ls dw of the denom */
238 
239 	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
240 	sbbl	%edx,FPU_accum_1	/* Subtract from the num local reg */
241 	sbbl	$0,FPU_accum_2
242 
243 #ifdef PARANOID
244 	jc	L_bugged_2
245 #endif /* PARANOID */
246 
247 	jz	LDo_3rd_32_bits
248 
249 #ifdef PARANOID
250 	cmpl	$1,FPU_accum_2
251 	jne	L_bugged_2
252 #endif /* PARANOID */
253 
254 	/* need to subtract another once of the denom */
255 	movl	XsigL(%ebx),%eax
256 	movl	XsigH(%ebx),%edx
257 	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
258 	sbbl	%edx,FPU_accum_1
259 	sbbl	$0,FPU_accum_2
260 
261 #ifdef PARANOID
262 	jc	L_bugged_2
263 	jne	L_bugged_2
264 #endif /* PARANOID */
265 
266 	addl	$1,FPU_result_2	/* Correct the answer */
267 	adcl	$0,FPU_result_3
268 
269 #ifdef PARANOID
270 	jc	L_bugged_2	/* Must check for non-zero result here */
271 #endif /* PARANOID */
272 
273 /*----------------------------------------------------------------------*/
274 /* The division is essentially finished here, we just need to perform
275    tidying operations.
276    Deal with the 3rd 32 bits */
277 LDo_3rd_32_bits:
278 	/* We use an approximation for the third 32 bits.
279 	To take account of the 3rd 32 bits of the divisor
280 	(call them del), we subtract  del * (a/b) */
281 
282 	movl	FPU_result_3,%eax	/* a/b */
283 	mull	XsigLL(%ebx)		/* del */
284 
285 	subl	%edx,FPU_accum_1
286 
287 	/* A borrow indicates that the result is negative */
288 	jnb	LTest_over
289 
290 	movl	XsigH(%ebx),%edx
291 	addl	%edx,FPU_accum_1
292 
293 	subl	$1,FPU_result_2		/* Adjust the answer */
294 	sbbl	$0,FPU_result_3
295 
296 	/* The above addition might not have been enough, check again. */
297 	movl	FPU_accum_1,%edx	/* get the reduced num */
298 	cmpl	XsigH(%ebx),%edx	/* denom */
299 	jb	LDo_3rd_div
300 
301 	movl	XsigH(%ebx),%edx
302 	addl	%edx,FPU_accum_1
303 
304 	subl	$1,FPU_result_2		/* Adjust the answer */
305 	sbbl	$0,FPU_result_3
306 	jmp	LDo_3rd_div
307 
308 LTest_over:
309 	movl	FPU_accum_1,%edx	/* get the reduced num */
310 
311 	/* need to check for possible subsequent overflow */
312 	cmpl	XsigH(%ebx),%edx	/* denom */
313 	jb	LDo_3rd_div
314 
315 	/* prevent overflow */
316 	subl	XsigH(%ebx),%edx
317 	movl	%edx,FPU_accum_1
318 
319 	addl	$1,FPU_result_2	/* Reflect the subtraction in the answer */
320 	adcl	$0,FPU_result_3
321 
322 LDo_3rd_div:
323 	movl	FPU_accum_0,%eax
324 	movl	FPU_accum_1,%edx
325 	divl	XsigH(%ebx)
326 
327 	movl    %eax,FPU_result_1       /* Rough estimate of third word */
328 
329 	movl	PARAM3,%esi		/* pointer to answer */
330 
331 	movl	FPU_result_1,%eax
332 	movl	%eax,XsigLL(%esi)
333 	movl	FPU_result_2,%eax
334 	movl	%eax,XsigL(%esi)
335 	movl	FPU_result_3,%eax
336 	movl	%eax,XsigH(%esi)
337 
338 L_exit:
339 	popl	%ebx
340 	popl	%edi
341 	popl	%esi
342 
343 	leave
344 	RET
345 
346 
347 #ifdef PARANOID
348 /* The logic is wrong if we got here */
349 L_bugged:
350 	pushl	EX_INTERNAL|0x240
351 	call	EXCEPTION
352 	pop	%ebx
353 	jmp	L_exit
354 
355 L_bugged_1:
356 	pushl	EX_INTERNAL|0x241
357 	call	EXCEPTION
358 	pop	%ebx
359 	jmp	L_exit
360 
361 L_bugged_2:
362 	pushl	EX_INTERNAL|0x242
363 	call	EXCEPTION
364 	pop	%ebx
365 	jmp	L_exit
366 #endif /* PARANOID */
367 SYM_FUNC_END(div_Xsig)
368