1*dbf4ed89SMax Filippov/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ 2*dbf4ed89SMax Filippov#include <linux/linkage.h> 3*dbf4ed89SMax Filippov#include <asm/asmmacro.h> 4*dbf4ed89SMax Filippov#include <asm/core.h> 5*dbf4ed89SMax Filippov 6*dbf4ed89SMax Filippov .macro do_addx2 dst, as, at, tmp 7*dbf4ed89SMax Filippov#if XCHAL_HAVE_ADDX 8*dbf4ed89SMax Filippov addx2 \dst, \as, \at 9*dbf4ed89SMax Filippov#else 10*dbf4ed89SMax Filippov slli \tmp, \as, 1 11*dbf4ed89SMax Filippov add \dst, \tmp, \at 12*dbf4ed89SMax Filippov#endif 13*dbf4ed89SMax Filippov .endm 14*dbf4ed89SMax Filippov 15*dbf4ed89SMax Filippov .macro do_addx4 dst, as, at, tmp 16*dbf4ed89SMax Filippov#if XCHAL_HAVE_ADDX 17*dbf4ed89SMax Filippov addx4 \dst, \as, \at 18*dbf4ed89SMax Filippov#else 19*dbf4ed89SMax Filippov slli \tmp, \as, 2 20*dbf4ed89SMax Filippov add \dst, \tmp, \at 21*dbf4ed89SMax Filippov#endif 22*dbf4ed89SMax Filippov .endm 23*dbf4ed89SMax Filippov 24*dbf4ed89SMax Filippov .macro do_addx8 dst, as, at, tmp 25*dbf4ed89SMax Filippov#if XCHAL_HAVE_ADDX 26*dbf4ed89SMax Filippov addx8 \dst, \as, \at 27*dbf4ed89SMax Filippov#else 28*dbf4ed89SMax Filippov slli \tmp, \as, 3 29*dbf4ed89SMax Filippov add \dst, \tmp, \at 30*dbf4ed89SMax Filippov#endif 31*dbf4ed89SMax Filippov .endm 32*dbf4ed89SMax Filippov 33*dbf4ed89SMax FilippovENTRY(__mulsi3) 34*dbf4ed89SMax Filippov 35*dbf4ed89SMax Filippov abi_entry_default 36*dbf4ed89SMax Filippov 37*dbf4ed89SMax Filippov#if XCHAL_HAVE_MUL32 38*dbf4ed89SMax Filippov mull a2, a2, a3 39*dbf4ed89SMax Filippov 40*dbf4ed89SMax Filippov#elif XCHAL_HAVE_MUL16 41*dbf4ed89SMax Filippov or a4, a2, a3 42*dbf4ed89SMax Filippov srai a4, a4, 16 43*dbf4ed89SMax Filippov bnez a4, .LMUL16 44*dbf4ed89SMax Filippov mul16u a2, a2, a3 45*dbf4ed89SMax Filippov abi_ret_default 46*dbf4ed89SMax Filippov.LMUL16: 47*dbf4ed89SMax Filippov srai a4, a2, 16 48*dbf4ed89SMax Filippov srai a5, a3, 16 49*dbf4ed89SMax Filippov mul16u a7, a4, a3 50*dbf4ed89SMax Filippov mul16u a6, a5, a2 51*dbf4ed89SMax Filippov mul16u a4, a2, a3 52*dbf4ed89SMax Filippov add a7, a7, a6 53*dbf4ed89SMax Filippov slli a7, a7, 16 54*dbf4ed89SMax Filippov add a2, a7, a4 55*dbf4ed89SMax Filippov 56*dbf4ed89SMax Filippov#elif XCHAL_HAVE_MAC16 57*dbf4ed89SMax Filippov mul.aa.hl a2, a3 58*dbf4ed89SMax Filippov mula.aa.lh a2, a3 59*dbf4ed89SMax Filippov rsr a5, ACCLO 60*dbf4ed89SMax Filippov umul.aa.ll a2, a3 61*dbf4ed89SMax Filippov rsr a4, ACCLO 62*dbf4ed89SMax Filippov slli a5, a5, 16 63*dbf4ed89SMax Filippov add a2, a4, a5 64*dbf4ed89SMax Filippov 65*dbf4ed89SMax Filippov#else /* !MUL32 && !MUL16 && !MAC16 */ 66*dbf4ed89SMax Filippov 67*dbf4ed89SMax Filippov /* Multiply one bit at a time, but unroll the loop 4x to better 68*dbf4ed89SMax Filippov exploit the addx instructions and avoid overhead. 69*dbf4ed89SMax Filippov Peel the first iteration to save a cycle on init. */ 70*dbf4ed89SMax Filippov 71*dbf4ed89SMax Filippov /* Avoid negative numbers. */ 72*dbf4ed89SMax Filippov xor a5, a2, a3 /* Top bit is 1 if one input is negative. */ 73*dbf4ed89SMax Filippov do_abs a3, a3, a6 74*dbf4ed89SMax Filippov do_abs a2, a2, a6 75*dbf4ed89SMax Filippov 76*dbf4ed89SMax Filippov /* Swap so the second argument is smaller. */ 77*dbf4ed89SMax Filippov sub a7, a2, a3 78*dbf4ed89SMax Filippov mov a4, a3 79*dbf4ed89SMax Filippov movgez a4, a2, a7 /* a4 = max (a2, a3) */ 80*dbf4ed89SMax Filippov movltz a3, a2, a7 /* a3 = min (a2, a3) */ 81*dbf4ed89SMax Filippov 82*dbf4ed89SMax Filippov movi a2, 0 83*dbf4ed89SMax Filippov extui a6, a3, 0, 1 84*dbf4ed89SMax Filippov movnez a2, a4, a6 85*dbf4ed89SMax Filippov 86*dbf4ed89SMax Filippov do_addx2 a7, a4, a2, a7 87*dbf4ed89SMax Filippov extui a6, a3, 1, 1 88*dbf4ed89SMax Filippov movnez a2, a7, a6 89*dbf4ed89SMax Filippov 90*dbf4ed89SMax Filippov do_addx4 a7, a4, a2, a7 91*dbf4ed89SMax Filippov extui a6, a3, 2, 1 92*dbf4ed89SMax Filippov movnez a2, a7, a6 93*dbf4ed89SMax Filippov 94*dbf4ed89SMax Filippov do_addx8 a7, a4, a2, a7 95*dbf4ed89SMax Filippov extui a6, a3, 3, 1 96*dbf4ed89SMax Filippov movnez a2, a7, a6 97*dbf4ed89SMax Filippov 98*dbf4ed89SMax Filippov bgeui a3, 16, .Lmult_main_loop 99*dbf4ed89SMax Filippov neg a3, a2 100*dbf4ed89SMax Filippov movltz a2, a3, a5 101*dbf4ed89SMax Filippov abi_ret_default 102*dbf4ed89SMax Filippov 103*dbf4ed89SMax Filippov .align 4 104*dbf4ed89SMax Filippov.Lmult_main_loop: 105*dbf4ed89SMax Filippov srli a3, a3, 4 106*dbf4ed89SMax Filippov slli a4, a4, 4 107*dbf4ed89SMax Filippov 108*dbf4ed89SMax Filippov add a7, a4, a2 109*dbf4ed89SMax Filippov extui a6, a3, 0, 1 110*dbf4ed89SMax Filippov movnez a2, a7, a6 111*dbf4ed89SMax Filippov 112*dbf4ed89SMax Filippov do_addx2 a7, a4, a2, a7 113*dbf4ed89SMax Filippov extui a6, a3, 1, 1 114*dbf4ed89SMax Filippov movnez a2, a7, a6 115*dbf4ed89SMax Filippov 116*dbf4ed89SMax Filippov do_addx4 a7, a4, a2, a7 117*dbf4ed89SMax Filippov extui a6, a3, 2, 1 118*dbf4ed89SMax Filippov movnez a2, a7, a6 119*dbf4ed89SMax Filippov 120*dbf4ed89SMax Filippov do_addx8 a7, a4, a2, a7 121*dbf4ed89SMax Filippov extui a6, a3, 3, 1 122*dbf4ed89SMax Filippov movnez a2, a7, a6 123*dbf4ed89SMax Filippov 124*dbf4ed89SMax Filippov bgeui a3, 16, .Lmult_main_loop 125*dbf4ed89SMax Filippov 126*dbf4ed89SMax Filippov neg a3, a2 127*dbf4ed89SMax Filippov movltz a2, a3, a5 128*dbf4ed89SMax Filippov 129*dbf4ed89SMax Filippov#endif /* !MUL32 && !MUL16 && !MAC16 */ 130*dbf4ed89SMax Filippov 131*dbf4ed89SMax Filippov abi_ret_default 132*dbf4ed89SMax Filippov 133*dbf4ed89SMax FilippovENDPROC(__mulsi3) 134