xref: /openbmc/linux/arch/xtensa/lib/mulsi3.S (revision 338d9150)
1dbf4ed89SMax Filippov/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
2dbf4ed89SMax Filippov#include <linux/linkage.h>
3dbf4ed89SMax Filippov#include <asm/asmmacro.h>
4dbf4ed89SMax Filippov#include <asm/core.h>
5dbf4ed89SMax Filippov
6dbf4ed89SMax Filippov	.macro	do_addx2 dst, as, at, tmp
7dbf4ed89SMax Filippov#if XCHAL_HAVE_ADDX
8dbf4ed89SMax Filippov	addx2	\dst, \as, \at
9dbf4ed89SMax Filippov#else
10dbf4ed89SMax Filippov	slli	\tmp, \as, 1
11dbf4ed89SMax Filippov	add	\dst, \tmp, \at
12dbf4ed89SMax Filippov#endif
13dbf4ed89SMax Filippov	.endm
14dbf4ed89SMax Filippov
15dbf4ed89SMax Filippov	.macro	do_addx4 dst, as, at, tmp
16dbf4ed89SMax Filippov#if XCHAL_HAVE_ADDX
17dbf4ed89SMax Filippov	addx4	\dst, \as, \at
18dbf4ed89SMax Filippov#else
19dbf4ed89SMax Filippov	slli	\tmp, \as, 2
20dbf4ed89SMax Filippov	add	\dst, \tmp, \at
21dbf4ed89SMax Filippov#endif
22dbf4ed89SMax Filippov	.endm
23dbf4ed89SMax Filippov
24dbf4ed89SMax Filippov	.macro	do_addx8 dst, as, at, tmp
25dbf4ed89SMax Filippov#if XCHAL_HAVE_ADDX
26dbf4ed89SMax Filippov	addx8	\dst, \as, \at
27dbf4ed89SMax Filippov#else
28dbf4ed89SMax Filippov	slli	\tmp, \as, 3
29dbf4ed89SMax Filippov	add	\dst, \tmp, \at
30dbf4ed89SMax Filippov#endif
31dbf4ed89SMax Filippov	.endm
32dbf4ed89SMax Filippov
33dbf4ed89SMax FilippovENTRY(__mulsi3)
34dbf4ed89SMax Filippov
35dbf4ed89SMax Filippov	abi_entry_default
36dbf4ed89SMax Filippov
37dbf4ed89SMax Filippov#if XCHAL_HAVE_MUL32
38dbf4ed89SMax Filippov	mull	a2, a2, a3
39dbf4ed89SMax Filippov
40dbf4ed89SMax Filippov#elif XCHAL_HAVE_MUL16
41dbf4ed89SMax Filippov	or	a4, a2, a3
42dbf4ed89SMax Filippov	srai	a4, a4, 16
43dbf4ed89SMax Filippov	bnez	a4, .LMUL16
44dbf4ed89SMax Filippov	mul16u	a2, a2, a3
45dbf4ed89SMax Filippov	abi_ret_default
46dbf4ed89SMax Filippov.LMUL16:
47dbf4ed89SMax Filippov	srai	a4, a2, 16
48dbf4ed89SMax Filippov	srai	a5, a3, 16
49dbf4ed89SMax Filippov	mul16u	a7, a4, a3
50dbf4ed89SMax Filippov	mul16u	a6, a5, a2
51dbf4ed89SMax Filippov	mul16u	a4, a2, a3
52dbf4ed89SMax Filippov	add	a7, a7, a6
53dbf4ed89SMax Filippov	slli	a7, a7, 16
54dbf4ed89SMax Filippov	add	a2, a7, a4
55dbf4ed89SMax Filippov
56dbf4ed89SMax Filippov#elif XCHAL_HAVE_MAC16
57dbf4ed89SMax Filippov	mul.aa.hl a2, a3
58dbf4ed89SMax Filippov	mula.aa.lh a2, a3
59dbf4ed89SMax Filippov	rsr	a5, ACCLO
60dbf4ed89SMax Filippov	umul.aa.ll a2, a3
61dbf4ed89SMax Filippov	rsr	a4, ACCLO
62dbf4ed89SMax Filippov	slli	a5, a5, 16
63dbf4ed89SMax Filippov	add	a2, a4, a5
64dbf4ed89SMax Filippov
65dbf4ed89SMax Filippov#else /* !MUL32 && !MUL16 && !MAC16 */
66dbf4ed89SMax Filippov
67dbf4ed89SMax Filippov	/* Multiply one bit at a time, but unroll the loop 4x to better
68dbf4ed89SMax Filippov	   exploit the addx instructions and avoid overhead.
69dbf4ed89SMax Filippov	   Peel the first iteration to save a cycle on init.  */
70dbf4ed89SMax Filippov
71dbf4ed89SMax Filippov	/* Avoid negative numbers.  */
72dbf4ed89SMax Filippov	xor	a5, a2, a3	/* Top bit is 1 if one input is negative.  */
73dbf4ed89SMax Filippov	do_abs	a3, a3, a6
74dbf4ed89SMax Filippov	do_abs	a2, a2, a6
75dbf4ed89SMax Filippov
76dbf4ed89SMax Filippov	/* Swap so the second argument is smaller.  */
77dbf4ed89SMax Filippov	sub	a7, a2, a3
78dbf4ed89SMax Filippov	mov	a4, a3
79dbf4ed89SMax Filippov	movgez	a4, a2, a7	/* a4 = max (a2, a3) */
80dbf4ed89SMax Filippov	movltz	a3, a2, a7	/* a3 = min (a2, a3) */
81dbf4ed89SMax Filippov
82dbf4ed89SMax Filippov	movi	a2, 0
83dbf4ed89SMax Filippov	extui	a6, a3, 0, 1
84dbf4ed89SMax Filippov	movnez	a2, a4, a6
85dbf4ed89SMax Filippov
86dbf4ed89SMax Filippov	do_addx2 a7, a4, a2, a7
87dbf4ed89SMax Filippov	extui	a6, a3, 1, 1
88dbf4ed89SMax Filippov	movnez	a2, a7, a6
89dbf4ed89SMax Filippov
90dbf4ed89SMax Filippov	do_addx4 a7, a4, a2, a7
91dbf4ed89SMax Filippov	extui	a6, a3, 2, 1
92dbf4ed89SMax Filippov	movnez	a2, a7, a6
93dbf4ed89SMax Filippov
94dbf4ed89SMax Filippov	do_addx8 a7, a4, a2, a7
95dbf4ed89SMax Filippov	extui	a6, a3, 3, 1
96dbf4ed89SMax Filippov	movnez	a2, a7, a6
97dbf4ed89SMax Filippov
98dbf4ed89SMax Filippov	bgeui	a3, 16, .Lmult_main_loop
99dbf4ed89SMax Filippov	neg	a3, a2
100dbf4ed89SMax Filippov	movltz	a2, a3, a5
101dbf4ed89SMax Filippov	abi_ret_default
102dbf4ed89SMax Filippov
103dbf4ed89SMax Filippov	.align	4
104dbf4ed89SMax Filippov.Lmult_main_loop:
105dbf4ed89SMax Filippov	srli	a3, a3, 4
106dbf4ed89SMax Filippov	slli	a4, a4, 4
107dbf4ed89SMax Filippov
108dbf4ed89SMax Filippov	add	a7, a4, a2
109dbf4ed89SMax Filippov	extui	a6, a3, 0, 1
110dbf4ed89SMax Filippov	movnez	a2, a7, a6
111dbf4ed89SMax Filippov
112dbf4ed89SMax Filippov	do_addx2 a7, a4, a2, a7
113dbf4ed89SMax Filippov	extui	a6, a3, 1, 1
114dbf4ed89SMax Filippov	movnez	a2, a7, a6
115dbf4ed89SMax Filippov
116dbf4ed89SMax Filippov	do_addx4 a7, a4, a2, a7
117dbf4ed89SMax Filippov	extui	a6, a3, 2, 1
118dbf4ed89SMax Filippov	movnez	a2, a7, a6
119dbf4ed89SMax Filippov
120dbf4ed89SMax Filippov	do_addx8 a7, a4, a2, a7
121dbf4ed89SMax Filippov	extui	a6, a3, 3, 1
122dbf4ed89SMax Filippov	movnez	a2, a7, a6
123dbf4ed89SMax Filippov
124dbf4ed89SMax Filippov	bgeui	a3, 16, .Lmult_main_loop
125dbf4ed89SMax Filippov
126dbf4ed89SMax Filippov	neg	a3, a2
127dbf4ed89SMax Filippov	movltz	a2, a3, a5
128dbf4ed89SMax Filippov
129dbf4ed89SMax Filippov#endif /* !MUL32 && !MUL16 && !MAC16 */
130dbf4ed89SMax Filippov
131dbf4ed89SMax Filippov	abi_ret_default
132dbf4ed89SMax Filippov
133dbf4ed89SMax FilippovENDPROC(__mulsi3)
134*338d9150SMax FilippovEXPORT_SYMBOL(__mulsi3)
135