xref: /openbmc/u-boot/arch/sh/lib/udivsi3_i4i.S (revision 0c01c3e8)
1/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2   2004, 2005, 2006
3   Free Software Foundation, Inc.
4
5 * SPDX-License-Identifier:	GPL-2.0+
6 */
7
8!! libgcc routines for the Renesas / SuperH SH CPUs.
9!! Contributed by Steve Chamberlain.
10!! sac@cygnus.com
11
12!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
13!! recoded in assembly by Toshiyasu Morita
14!! tm@netcom.com
15
16/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
17   ELF local label prefixes by J"orn Rennecke
18   amylaar@cygnus.com  */
19
20/* This code used shld, thus is not suitable for SH1 / SH2.  */
21
22/* Signed / unsigned division without use of FPU, optimized for SH4.
23   Uses a lookup table for divisors in the range -128 .. +128, and
24   div1 with case distinction for larger divisors in three more ranges.
25   The code is lumped together with the table to allow the use of mova.  */
26#ifdef CONFIG_CPU_LITTLE_ENDIAN
27#define L_LSB 0
28#define L_LSWMSB 1
29#define L_MSWLSB 2
30#else
31#define L_LSB 3
32#define L_LSWMSB 2
33#define L_MSWLSB 1
34#endif
35
36	.balign 4
37	.global	__udivsi3_i4i
38	.global	__udivsi3_i4
39	.set	__udivsi3_i4, __udivsi3_i4i
40	.type	__udivsi3_i4i, @function
41__udivsi3_i4i:
42	mov.w c128_w, r1
43	div0u
44	mov r4,r0
45	shlr8 r0
46	cmp/hi r1,r5
47	extu.w r5,r1
48	bf udiv_le128
49	cmp/eq r5,r1
50	bf udiv_ge64k
51	shlr r0
52	mov r5,r1
53	shll16 r5
54	mov.l r4,@-r15
55	div1 r5,r0
56	mov.l r1,@-r15
57	div1 r5,r0
58	div1 r5,r0
59	bra udiv_25
60	div1 r5,r0
61
62div_le128:
63	mova div_table_ix,r0
64	bra div_le128_2
65	mov.b @(r0,r5),r1
66udiv_le128:
67	mov.l r4,@-r15
68	mova div_table_ix,r0
69	mov.b @(r0,r5),r1
70	mov.l r5,@-r15
71div_le128_2:
72	mova div_table_inv,r0
73	mov.l @(r0,r1),r1
74	mov r5,r0
75	tst #0xfe,r0
76	mova div_table_clz,r0
77	dmulu.l r1,r4
78	mov.b @(r0,r5),r1
79	bt/s div_by_1
80	mov r4,r0
81	mov.l @r15+,r5
82	sts mach,r0
83	/* clrt */
84	addc r4,r0
85	mov.l @r15+,r4
86	rotcr r0
87	rts
88	shld r1,r0
89
90div_by_1_neg:
91	neg r4,r0
92div_by_1:
93	mov.l @r15+,r5
94	rts
95	mov.l @r15+,r4
96
97div_ge64k:
98	bt/s div_r8
99	div0u
100	shll8 r5
101	bra div_ge64k_2
102	div1 r5,r0
103udiv_ge64k:
104	cmp/hi r0,r5
105	mov r5,r1
106	bt udiv_r8
107	shll8 r5
108	mov.l r4,@-r15
109	div1 r5,r0
110	mov.l r1,@-r15
111div_ge64k_2:
112	div1 r5,r0
113	mov.l zero_l,r1
114	.rept 4
115	div1 r5,r0
116	.endr
117	mov.l r1,@-r15
118	div1 r5,r0
119	mov.w m256_w,r1
120	div1 r5,r0
121	mov.b r0,@(L_LSWMSB,r15)
122	xor r4,r0
123	and r1,r0
124	bra div_ge64k_end
125	xor r4,r0
126div_r8:
127	shll16 r4
128	bra div_r8_2
129	shll8 r4
130udiv_r8:
131	mov.l r4,@-r15
132	shll16 r4
133	clrt
134	shll8 r4
135	mov.l r5,@-r15
136div_r8_2:
137	rotcl r4
138	mov r0,r1
139	div1 r5,r1
140	mov r4,r0
141	rotcl r0
142	mov r5,r4
143	div1 r5,r1
144	.rept 5
145	rotcl r0; div1 r5,r1
146	.endr
147	rotcl r0
148	mov.l @r15+,r5
149	div1 r4,r1
150	mov.l @r15+,r4
151	rts
152	rotcl r0
153
154	.global	__sdivsi3_i4i
155	.global __sdivsi3_i4
156	.global	__sdivsi3
157	.set	__sdivsi3_i4, __sdivsi3_i4i
158	.set	__sdivsi3, __sdivsi3_i4i
159	.type	__sdivsi3_i4i, @function
160	/* This is link-compatible with a __sdivsi3 call,
161	   but we effectively clobber only r1.  */
162__sdivsi3_i4i:
163	mov.l r4,@-r15
164	cmp/pz r5
165	mov.w c128_w, r1
166	bt/s pos_divisor
167	cmp/pz r4
168	mov.l r5,@-r15
169	neg r5,r5
170	bt/s neg_result
171	cmp/hi r1,r5
172	neg r4,r4
173pos_result:
174	extu.w r5,r0
175	bf div_le128
176	cmp/eq r5,r0
177	mov r4,r0
178	shlr8 r0
179	bf/s div_ge64k
180	cmp/hi r0,r5
181	div0u
182	shll16 r5
183	div1 r5,r0
184	div1 r5,r0
185	div1 r5,r0
186udiv_25:
187	mov.l zero_l,r1
188	div1 r5,r0
189	div1 r5,r0
190	mov.l r1,@-r15
191	.rept 3
192	div1 r5,r0
193	.endr
194	mov.b r0,@(L_MSWLSB,r15)
195	xtrct r4,r0
196	swap.w r0,r0
197	.rept 8
198	div1 r5,r0
199	.endr
200	mov.b r0,@(L_LSWMSB,r15)
201div_ge64k_end:
202	.rept 8
203	div1 r5,r0
204	.endr
205	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
206	extu.b r0,r0
207	mov.l @r15+,r5
208	or r4,r0
209	mov.l @r15+,r4
210	rts
211	rotcl r0
212
213div_le128_neg:
214	tst #0xfe,r0
215	mova div_table_ix,r0
216	mov.b @(r0,r5),r1
217	mova div_table_inv,r0
218	bt/s div_by_1_neg
219	mov.l @(r0,r1),r1
220	mova div_table_clz,r0
221	dmulu.l r1,r4
222	mov.b @(r0,r5),r1
223	mov.l @r15+,r5
224	sts mach,r0
225	/* clrt */
226	addc r4,r0
227	mov.l @r15+,r4
228	rotcr r0
229	shld r1,r0
230	rts
231	neg r0,r0
232
233pos_divisor:
234	mov.l r5,@-r15
235	bt/s pos_result
236	cmp/hi r1,r5
237	neg r4,r4
238neg_result:
239	extu.w r5,r0
240	bf div_le128_neg
241	cmp/eq r5,r0
242	mov r4,r0
243	shlr8 r0
244	bf/s div_ge64k_neg
245	cmp/hi r0,r5
246	div0u
247	mov.l zero_l,r1
248	shll16 r5
249	div1 r5,r0
250	mov.l r1,@-r15
251	.rept 7
252	div1 r5,r0
253	.endr
254	mov.b r0,@(L_MSWLSB,r15)
255	xtrct r4,r0
256	swap.w r0,r0
257	.rept 8
258	div1 r5,r0
259	.endr
260	mov.b r0,@(L_LSWMSB,r15)
261div_ge64k_neg_end:
262	.rept 8
263	div1 r5,r0
264	.endr
265	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
266	extu.b r0,r1
267	mov.l @r15+,r5
268	or r4,r1
269div_r8_neg_end:
270	mov.l @r15+,r4
271	rotcl r1
272	rts
273	neg r1,r0
274
275div_ge64k_neg:
276	bt/s div_r8_neg
277	div0u
278	shll8 r5
279	mov.l zero_l,r1
280	.rept 6
281	div1 r5,r0
282	.endr
283	mov.l r1,@-r15
284	div1 r5,r0
285	mov.w m256_w,r1
286	div1 r5,r0
287	mov.b r0,@(L_LSWMSB,r15)
288	xor r4,r0
289	and r1,r0
290	bra div_ge64k_neg_end
291	xor r4,r0
292
293c128_w:
294	.word 128
295
296div_r8_neg:
297	clrt
298	shll16 r4
299	mov r4,r1
300	shll8 r1
301	mov r5,r4
302	.rept 7
303	rotcl r1; div1 r5,r0
304	.endr
305	mov.l @r15+,r5
306	rotcl r1
307	bra div_r8_neg_end
308	div1 r4,r0
309
310m256_w:
311	.word 0xff00
312/* This table has been generated by divtab-sh4.c.  */
313	.balign 4
314div_table_clz:
315	.byte	0
316	.byte	1
317	.byte	0
318	.byte	-1
319	.byte	-1
320	.byte	-2
321	.byte	-2
322	.byte	-2
323	.byte	-2
324	.byte	-3
325	.byte	-3
326	.byte	-3
327	.byte	-3
328	.byte	-3
329	.byte	-3
330	.byte	-3
331	.byte	-3
332	.byte	-4
333	.byte	-4
334	.byte	-4
335	.byte	-4
336	.byte	-4
337	.byte	-4
338	.byte	-4
339	.byte	-4
340	.byte	-4
341	.byte	-4
342	.byte	-4
343	.byte	-4
344	.byte	-4
345	.byte	-4
346	.byte	-4
347	.byte	-4
348	.byte	-5
349	.byte	-5
350	.byte	-5
351	.byte	-5
352	.byte	-5
353	.byte	-5
354	.byte	-5
355	.byte	-5
356	.byte	-5
357	.byte	-5
358	.byte	-5
359	.byte	-5
360	.byte	-5
361	.byte	-5
362	.byte	-5
363	.byte	-5
364	.byte	-5
365	.byte	-5
366	.byte	-5
367	.byte	-5
368	.byte	-5
369	.byte	-5
370	.byte	-5
371	.byte	-5
372	.byte	-5
373	.byte	-5
374	.byte	-5
375	.byte	-5
376	.byte	-5
377	.byte	-5
378	.byte	-5
379	.byte	-5
380	.byte	-6
381	.byte	-6
382	.byte	-6
383	.byte	-6
384	.byte	-6
385	.byte	-6
386	.byte	-6
387	.byte	-6
388	.byte	-6
389	.byte	-6
390	.byte	-6
391	.byte	-6
392	.byte	-6
393	.byte	-6
394	.byte	-6
395	.byte	-6
396	.byte	-6
397	.byte	-6
398	.byte	-6
399	.byte	-6
400	.byte	-6
401	.byte	-6
402	.byte	-6
403	.byte	-6
404	.byte	-6
405	.byte	-6
406	.byte	-6
407	.byte	-6
408	.byte	-6
409	.byte	-6
410	.byte	-6
411	.byte	-6
412	.byte	-6
413	.byte	-6
414	.byte	-6
415	.byte	-6
416	.byte	-6
417	.byte	-6
418	.byte	-6
419	.byte	-6
420	.byte	-6
421	.byte	-6
422	.byte	-6
423	.byte	-6
424	.byte	-6
425	.byte	-6
426	.byte	-6
427	.byte	-6
428	.byte	-6
429	.byte	-6
430	.byte	-6
431	.byte	-6
432	.byte	-6
433	.byte	-6
434	.byte	-6
435	.byte	-6
436	.byte	-6
437	.byte	-6
438	.byte	-6
439	.byte	-6
440	.byte	-6
441	.byte	-6
442	.byte	-6
443/* Lookup table translating positive divisor to index into table of
444   normalized inverse.  N.B. the '0' entry is also the last entry of the
445 previous table, and causes an unaligned access for division by zero.  */
446div_table_ix:
447	.byte	-6
448	.byte	-128
449	.byte	-128
450	.byte	0
451	.byte	-128
452	.byte	-64
453	.byte	0
454	.byte	64
455	.byte	-128
456	.byte	-96
457	.byte	-64
458	.byte	-32
459	.byte	0
460	.byte	32
461	.byte	64
462	.byte	96
463	.byte	-128
464	.byte	-112
465	.byte	-96
466	.byte	-80
467	.byte	-64
468	.byte	-48
469	.byte	-32
470	.byte	-16
471	.byte	0
472	.byte	16
473	.byte	32
474	.byte	48
475	.byte	64
476	.byte	80
477	.byte	96
478	.byte	112
479	.byte	-128
480	.byte	-120
481	.byte	-112
482	.byte	-104
483	.byte	-96
484	.byte	-88
485	.byte	-80
486	.byte	-72
487	.byte	-64
488	.byte	-56
489	.byte	-48
490	.byte	-40
491	.byte	-32
492	.byte	-24
493	.byte	-16
494	.byte	-8
495	.byte	0
496	.byte	8
497	.byte	16
498	.byte	24
499	.byte	32
500	.byte	40
501	.byte	48
502	.byte	56
503	.byte	64
504	.byte	72
505	.byte	80
506	.byte	88
507	.byte	96
508	.byte	104
509	.byte	112
510	.byte	120
511	.byte	-128
512	.byte	-124
513	.byte	-120
514	.byte	-116
515	.byte	-112
516	.byte	-108
517	.byte	-104
518	.byte	-100
519	.byte	-96
520	.byte	-92
521	.byte	-88
522	.byte	-84
523	.byte	-80
524	.byte	-76
525	.byte	-72
526	.byte	-68
527	.byte	-64
528	.byte	-60
529	.byte	-56
530	.byte	-52
531	.byte	-48
532	.byte	-44
533	.byte	-40
534	.byte	-36
535	.byte	-32
536	.byte	-28
537	.byte	-24
538	.byte	-20
539	.byte	-16
540	.byte	-12
541	.byte	-8
542	.byte	-4
543	.byte	0
544	.byte	4
545	.byte	8
546	.byte	12
547	.byte	16
548	.byte	20
549	.byte	24
550	.byte	28
551	.byte	32
552	.byte	36
553	.byte	40
554	.byte	44
555	.byte	48
556	.byte	52
557	.byte	56
558	.byte	60
559	.byte	64
560	.byte	68
561	.byte	72
562	.byte	76
563	.byte	80
564	.byte	84
565	.byte	88
566	.byte	92
567	.byte	96
568	.byte	100
569	.byte	104
570	.byte	108
571	.byte	112
572	.byte	116
573	.byte	120
574	.byte	124
575	.byte	-128
576/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
577	.balign 4
578zero_l:
579	.long	0x0
580	.long	0xF81F81F9
581	.long	0xF07C1F08
582	.long	0xE9131AC0
583	.long	0xE1E1E1E2
584	.long	0xDAE6076C
585	.long	0xD41D41D5
586	.long	0xCD856891
587	.long	0xC71C71C8
588	.long	0xC0E07039
589	.long	0xBACF914D
590	.long	0xB4E81B4F
591	.long	0xAF286BCB
592	.long	0xA98EF607
593	.long	0xA41A41A5
594	.long	0x9EC8E952
595	.long	0x9999999A
596	.long	0x948B0FCE
597	.long	0x8F9C18FA
598	.long	0x8ACB90F7
599	.long	0x86186187
600	.long	0x81818182
601	.long	0x7D05F418
602	.long	0x78A4C818
603	.long	0x745D1746
604	.long	0x702E05C1
605	.long	0x6C16C16D
606	.long	0x68168169
607	.long	0x642C8591
608	.long	0x60581606
609	.long	0x5C9882BA
610	.long	0x58ED2309
611div_table_inv:
612	.long	0x55555556
613	.long	0x51D07EAF
614	.long	0x4E5E0A73
615	.long	0x4AFD6A06
616	.long	0x47AE147B
617	.long	0x446F8657
618	.long	0x41414142
619	.long	0x3E22CBCF
620	.long	0x3B13B13C
621	.long	0x38138139
622	.long	0x3521CFB3
623	.long	0x323E34A3
624	.long	0x2F684BDB
625	.long	0x2C9FB4D9
626	.long	0x29E4129F
627	.long	0x27350B89
628	.long	0x24924925
629	.long	0x21FB7813
630	.long	0x1F7047DD
631	.long	0x1CF06ADB
632	.long	0x1A7B9612
633	.long	0x18118119
634	.long	0x15B1E5F8
635	.long	0x135C8114
636	.long	0x11111112
637	.long	0xECF56BF
638	.long	0xC9714FC
639	.long	0xA6810A7
640	.long	0x8421085
641	.long	0x624DD30
642	.long	0x4104105
643	.long	0x2040811
644	/* maximum error: 0.987342 scaled: 0.921875*/
645