xref: /openbmc/u-boot/arch/sh/lib/udivsi3_i4i.S (revision b9553986)
1/* SPDX-License-Identifier: GPL-2.0+ */
2/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3   2004, 2005, 2006
4   Free Software Foundation, Inc.
5 */
6
7!! libgcc routines for the Renesas / SuperH SH CPUs.
8!! Contributed by Steve Chamberlain.
9!! sac@cygnus.com
10
11!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
12!! recoded in assembly by Toshiyasu Morita
13!! tm@netcom.com
14
15/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
16   ELF local label prefixes by J"orn Rennecke
17   amylaar@cygnus.com  */
18
19/* This code used shld, thus is not suitable for SH1 / SH2.  */
20
21/* Signed / unsigned division without use of FPU, optimized for SH4.
22   Uses a lookup table for divisors in the range -128 .. +128, and
23   div1 with case distinction for larger divisors in three more ranges.
24   The code is lumped together with the table to allow the use of mova.  */
25#ifdef CONFIG_CPU_LITTLE_ENDIAN
26#define L_LSB 0
27#define L_LSWMSB 1
28#define L_MSWLSB 2
29#else
30#define L_LSB 3
31#define L_LSWMSB 2
32#define L_MSWLSB 1
33#endif
34
35	.balign 4
36	.global	__udivsi3_i4i
37	.global	__udivsi3_i4
38	.set	__udivsi3_i4, __udivsi3_i4i
39	.type	__udivsi3_i4i, @function
40__udivsi3_i4i:
41	mov.w c128_w, r1
42	div0u
43	mov r4,r0
44	shlr8 r0
45	cmp/hi r1,r5
46	extu.w r5,r1
47	bf udiv_le128
48	cmp/eq r5,r1
49	bf udiv_ge64k
50	shlr r0
51	mov r5,r1
52	shll16 r5
53	mov.l r4,@-r15
54	div1 r5,r0
55	mov.l r1,@-r15
56	div1 r5,r0
57	div1 r5,r0
58	bra udiv_25
59	div1 r5,r0
60
61div_le128:
62	mova div_table_ix,r0
63	bra div_le128_2
64	mov.b @(r0,r5),r1
65udiv_le128:
66	mov.l r4,@-r15
67	mova div_table_ix,r0
68	mov.b @(r0,r5),r1
69	mov.l r5,@-r15
70div_le128_2:
71	mova div_table_inv,r0
72	mov.l @(r0,r1),r1
73	mov r5,r0
74	tst #0xfe,r0
75	mova div_table_clz,r0
76	dmulu.l r1,r4
77	mov.b @(r0,r5),r1
78	bt/s div_by_1
79	mov r4,r0
80	mov.l @r15+,r5
81	sts mach,r0
82	/* clrt */
83	addc r4,r0
84	mov.l @r15+,r4
85	rotcr r0
86	rts
87	shld r1,r0
88
89div_by_1_neg:
90	neg r4,r0
91div_by_1:
92	mov.l @r15+,r5
93	rts
94	mov.l @r15+,r4
95
96div_ge64k:
97	bt/s div_r8
98	div0u
99	shll8 r5
100	bra div_ge64k_2
101	div1 r5,r0
102udiv_ge64k:
103	cmp/hi r0,r5
104	mov r5,r1
105	bt udiv_r8
106	shll8 r5
107	mov.l r4,@-r15
108	div1 r5,r0
109	mov.l r1,@-r15
110div_ge64k_2:
111	div1 r5,r0
112	mov.l zero_l,r1
113	.rept 4
114	div1 r5,r0
115	.endr
116	mov.l r1,@-r15
117	div1 r5,r0
118	mov.w m256_w,r1
119	div1 r5,r0
120	mov.b r0,@(L_LSWMSB,r15)
121	xor r4,r0
122	and r1,r0
123	bra div_ge64k_end
124	xor r4,r0
125div_r8:
126	shll16 r4
127	bra div_r8_2
128	shll8 r4
129udiv_r8:
130	mov.l r4,@-r15
131	shll16 r4
132	clrt
133	shll8 r4
134	mov.l r5,@-r15
135div_r8_2:
136	rotcl r4
137	mov r0,r1
138	div1 r5,r1
139	mov r4,r0
140	rotcl r0
141	mov r5,r4
142	div1 r5,r1
143	.rept 5
144	rotcl r0; div1 r5,r1
145	.endr
146	rotcl r0
147	mov.l @r15+,r5
148	div1 r4,r1
149	mov.l @r15+,r4
150	rts
151	rotcl r0
152
153	.global	__sdivsi3_i4i
154	.global __sdivsi3_i4
155	.global	__sdivsi3
156	.set	__sdivsi3_i4, __sdivsi3_i4i
157	.set	__sdivsi3, __sdivsi3_i4i
158	.type	__sdivsi3_i4i, @function
159	/* This is link-compatible with a __sdivsi3 call,
160	   but we effectively clobber only r1.  */
161__sdivsi3_i4i:
162	mov.l r4,@-r15
163	cmp/pz r5
164	mov.w c128_w, r1
165	bt/s pos_divisor
166	cmp/pz r4
167	mov.l r5,@-r15
168	neg r5,r5
169	bt/s neg_result
170	cmp/hi r1,r5
171	neg r4,r4
172pos_result:
173	extu.w r5,r0
174	bf div_le128
175	cmp/eq r5,r0
176	mov r4,r0
177	shlr8 r0
178	bf/s div_ge64k
179	cmp/hi r0,r5
180	div0u
181	shll16 r5
182	div1 r5,r0
183	div1 r5,r0
184	div1 r5,r0
185udiv_25:
186	mov.l zero_l,r1
187	div1 r5,r0
188	div1 r5,r0
189	mov.l r1,@-r15
190	.rept 3
191	div1 r5,r0
192	.endr
193	mov.b r0,@(L_MSWLSB,r15)
194	xtrct r4,r0
195	swap.w r0,r0
196	.rept 8
197	div1 r5,r0
198	.endr
199	mov.b r0,@(L_LSWMSB,r15)
200div_ge64k_end:
201	.rept 8
202	div1 r5,r0
203	.endr
204	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
205	extu.b r0,r0
206	mov.l @r15+,r5
207	or r4,r0
208	mov.l @r15+,r4
209	rts
210	rotcl r0
211
212div_le128_neg:
213	tst #0xfe,r0
214	mova div_table_ix,r0
215	mov.b @(r0,r5),r1
216	mova div_table_inv,r0
217	bt/s div_by_1_neg
218	mov.l @(r0,r1),r1
219	mova div_table_clz,r0
220	dmulu.l r1,r4
221	mov.b @(r0,r5),r1
222	mov.l @r15+,r5
223	sts mach,r0
224	/* clrt */
225	addc r4,r0
226	mov.l @r15+,r4
227	rotcr r0
228	shld r1,r0
229	rts
230	neg r0,r0
231
232pos_divisor:
233	mov.l r5,@-r15
234	bt/s pos_result
235	cmp/hi r1,r5
236	neg r4,r4
237neg_result:
238	extu.w r5,r0
239	bf div_le128_neg
240	cmp/eq r5,r0
241	mov r4,r0
242	shlr8 r0
243	bf/s div_ge64k_neg
244	cmp/hi r0,r5
245	div0u
246	mov.l zero_l,r1
247	shll16 r5
248	div1 r5,r0
249	mov.l r1,@-r15
250	.rept 7
251	div1 r5,r0
252	.endr
253	mov.b r0,@(L_MSWLSB,r15)
254	xtrct r4,r0
255	swap.w r0,r0
256	.rept 8
257	div1 r5,r0
258	.endr
259	mov.b r0,@(L_LSWMSB,r15)
260div_ge64k_neg_end:
261	.rept 8
262	div1 r5,r0
263	.endr
264	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
265	extu.b r0,r1
266	mov.l @r15+,r5
267	or r4,r1
268div_r8_neg_end:
269	mov.l @r15+,r4
270	rotcl r1
271	rts
272	neg r1,r0
273
274div_ge64k_neg:
275	bt/s div_r8_neg
276	div0u
277	shll8 r5
278	mov.l zero_l,r1
279	.rept 6
280	div1 r5,r0
281	.endr
282	mov.l r1,@-r15
283	div1 r5,r0
284	mov.w m256_w,r1
285	div1 r5,r0
286	mov.b r0,@(L_LSWMSB,r15)
287	xor r4,r0
288	and r1,r0
289	bra div_ge64k_neg_end
290	xor r4,r0
291
292c128_w:
293	.word 128
294
295div_r8_neg:
296	clrt
297	shll16 r4
298	mov r4,r1
299	shll8 r1
300	mov r5,r4
301	.rept 7
302	rotcl r1; div1 r5,r0
303	.endr
304	mov.l @r15+,r5
305	rotcl r1
306	bra div_r8_neg_end
307	div1 r4,r0
308
309m256_w:
310	.word 0xff00
311/* This table has been generated by divtab-sh4.c.  */
312	.balign 4
313div_table_clz:
314	.byte	0
315	.byte	1
316	.byte	0
317	.byte	-1
318	.byte	-1
319	.byte	-2
320	.byte	-2
321	.byte	-2
322	.byte	-2
323	.byte	-3
324	.byte	-3
325	.byte	-3
326	.byte	-3
327	.byte	-3
328	.byte	-3
329	.byte	-3
330	.byte	-3
331	.byte	-4
332	.byte	-4
333	.byte	-4
334	.byte	-4
335	.byte	-4
336	.byte	-4
337	.byte	-4
338	.byte	-4
339	.byte	-4
340	.byte	-4
341	.byte	-4
342	.byte	-4
343	.byte	-4
344	.byte	-4
345	.byte	-4
346	.byte	-4
347	.byte	-5
348	.byte	-5
349	.byte	-5
350	.byte	-5
351	.byte	-5
352	.byte	-5
353	.byte	-5
354	.byte	-5
355	.byte	-5
356	.byte	-5
357	.byte	-5
358	.byte	-5
359	.byte	-5
360	.byte	-5
361	.byte	-5
362	.byte	-5
363	.byte	-5
364	.byte	-5
365	.byte	-5
366	.byte	-5
367	.byte	-5
368	.byte	-5
369	.byte	-5
370	.byte	-5
371	.byte	-5
372	.byte	-5
373	.byte	-5
374	.byte	-5
375	.byte	-5
376	.byte	-5
377	.byte	-5
378	.byte	-5
379	.byte	-6
380	.byte	-6
381	.byte	-6
382	.byte	-6
383	.byte	-6
384	.byte	-6
385	.byte	-6
386	.byte	-6
387	.byte	-6
388	.byte	-6
389	.byte	-6
390	.byte	-6
391	.byte	-6
392	.byte	-6
393	.byte	-6
394	.byte	-6
395	.byte	-6
396	.byte	-6
397	.byte	-6
398	.byte	-6
399	.byte	-6
400	.byte	-6
401	.byte	-6
402	.byte	-6
403	.byte	-6
404	.byte	-6
405	.byte	-6
406	.byte	-6
407	.byte	-6
408	.byte	-6
409	.byte	-6
410	.byte	-6
411	.byte	-6
412	.byte	-6
413	.byte	-6
414	.byte	-6
415	.byte	-6
416	.byte	-6
417	.byte	-6
418	.byte	-6
419	.byte	-6
420	.byte	-6
421	.byte	-6
422	.byte	-6
423	.byte	-6
424	.byte	-6
425	.byte	-6
426	.byte	-6
427	.byte	-6
428	.byte	-6
429	.byte	-6
430	.byte	-6
431	.byte	-6
432	.byte	-6
433	.byte	-6
434	.byte	-6
435	.byte	-6
436	.byte	-6
437	.byte	-6
438	.byte	-6
439	.byte	-6
440	.byte	-6
441	.byte	-6
442/* Lookup table translating positive divisor to index into table of
443   normalized inverse.  N.B. the '0' entry is also the last entry of the
444 previous table, and causes an unaligned access for division by zero.  */
445div_table_ix:
446	.byte	-6
447	.byte	-128
448	.byte	-128
449	.byte	0
450	.byte	-128
451	.byte	-64
452	.byte	0
453	.byte	64
454	.byte	-128
455	.byte	-96
456	.byte	-64
457	.byte	-32
458	.byte	0
459	.byte	32
460	.byte	64
461	.byte	96
462	.byte	-128
463	.byte	-112
464	.byte	-96
465	.byte	-80
466	.byte	-64
467	.byte	-48
468	.byte	-32
469	.byte	-16
470	.byte	0
471	.byte	16
472	.byte	32
473	.byte	48
474	.byte	64
475	.byte	80
476	.byte	96
477	.byte	112
478	.byte	-128
479	.byte	-120
480	.byte	-112
481	.byte	-104
482	.byte	-96
483	.byte	-88
484	.byte	-80
485	.byte	-72
486	.byte	-64
487	.byte	-56
488	.byte	-48
489	.byte	-40
490	.byte	-32
491	.byte	-24
492	.byte	-16
493	.byte	-8
494	.byte	0
495	.byte	8
496	.byte	16
497	.byte	24
498	.byte	32
499	.byte	40
500	.byte	48
501	.byte	56
502	.byte	64
503	.byte	72
504	.byte	80
505	.byte	88
506	.byte	96
507	.byte	104
508	.byte	112
509	.byte	120
510	.byte	-128
511	.byte	-124
512	.byte	-120
513	.byte	-116
514	.byte	-112
515	.byte	-108
516	.byte	-104
517	.byte	-100
518	.byte	-96
519	.byte	-92
520	.byte	-88
521	.byte	-84
522	.byte	-80
523	.byte	-76
524	.byte	-72
525	.byte	-68
526	.byte	-64
527	.byte	-60
528	.byte	-56
529	.byte	-52
530	.byte	-48
531	.byte	-44
532	.byte	-40
533	.byte	-36
534	.byte	-32
535	.byte	-28
536	.byte	-24
537	.byte	-20
538	.byte	-16
539	.byte	-12
540	.byte	-8
541	.byte	-4
542	.byte	0
543	.byte	4
544	.byte	8
545	.byte	12
546	.byte	16
547	.byte	20
548	.byte	24
549	.byte	28
550	.byte	32
551	.byte	36
552	.byte	40
553	.byte	44
554	.byte	48
555	.byte	52
556	.byte	56
557	.byte	60
558	.byte	64
559	.byte	68
560	.byte	72
561	.byte	76
562	.byte	80
563	.byte	84
564	.byte	88
565	.byte	92
566	.byte	96
567	.byte	100
568	.byte	104
569	.byte	108
570	.byte	112
571	.byte	116
572	.byte	120
573	.byte	124
574	.byte	-128
575/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
576	.balign 4
577zero_l:
578	.long	0x0
579	.long	0xF81F81F9
580	.long	0xF07C1F08
581	.long	0xE9131AC0
582	.long	0xE1E1E1E2
583	.long	0xDAE6076C
584	.long	0xD41D41D5
585	.long	0xCD856891
586	.long	0xC71C71C8
587	.long	0xC0E07039
588	.long	0xBACF914D
589	.long	0xB4E81B4F
590	.long	0xAF286BCB
591	.long	0xA98EF607
592	.long	0xA41A41A5
593	.long	0x9EC8E952
594	.long	0x9999999A
595	.long	0x948B0FCE
596	.long	0x8F9C18FA
597	.long	0x8ACB90F7
598	.long	0x86186187
599	.long	0x81818182
600	.long	0x7D05F418
601	.long	0x78A4C818
602	.long	0x745D1746
603	.long	0x702E05C1
604	.long	0x6C16C16D
605	.long	0x68168169
606	.long	0x642C8591
607	.long	0x60581606
608	.long	0x5C9882BA
609	.long	0x58ED2309
610div_table_inv:
611	.long	0x55555556
612	.long	0x51D07EAF
613	.long	0x4E5E0A73
614	.long	0x4AFD6A06
615	.long	0x47AE147B
616	.long	0x446F8657
617	.long	0x41414142
618	.long	0x3E22CBCF
619	.long	0x3B13B13C
620	.long	0x38138139
621	.long	0x3521CFB3
622	.long	0x323E34A3
623	.long	0x2F684BDB
624	.long	0x2C9FB4D9
625	.long	0x29E4129F
626	.long	0x27350B89
627	.long	0x24924925
628	.long	0x21FB7813
629	.long	0x1F7047DD
630	.long	0x1CF06ADB
631	.long	0x1A7B9612
632	.long	0x18118119
633	.long	0x15B1E5F8
634	.long	0x135C8114
635	.long	0x11111112
636	.long	0xECF56BF
637	.long	0xC9714FC
638	.long	0xA6810A7
639	.long	0x8421085
640	.long	0x624DD30
641	.long	0x4104105
642	.long	0x2040811
643	/* maximum error: 0.987342 scaled: 0.921875*/
644