1dbf4ed89SMax Filippov/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ 2dbf4ed89SMax Filippov#include <linux/linkage.h> 3dbf4ed89SMax Filippov#include <asm/asmmacro.h> 4dbf4ed89SMax Filippov#include <asm/core.h> 5dbf4ed89SMax Filippov 6dbf4ed89SMax Filippov .macro do_addx2 dst, as, at, tmp 7dbf4ed89SMax Filippov#if XCHAL_HAVE_ADDX 8dbf4ed89SMax Filippov addx2 \dst, \as, \at 9dbf4ed89SMax Filippov#else 10dbf4ed89SMax Filippov slli \tmp, \as, 1 11dbf4ed89SMax Filippov add \dst, \tmp, \at 12dbf4ed89SMax Filippov#endif 13dbf4ed89SMax Filippov .endm 14dbf4ed89SMax Filippov 15dbf4ed89SMax Filippov .macro do_addx4 dst, as, at, tmp 16dbf4ed89SMax Filippov#if XCHAL_HAVE_ADDX 17dbf4ed89SMax Filippov addx4 \dst, \as, \at 18dbf4ed89SMax Filippov#else 19dbf4ed89SMax Filippov slli \tmp, \as, 2 20dbf4ed89SMax Filippov add \dst, \tmp, \at 21dbf4ed89SMax Filippov#endif 22dbf4ed89SMax Filippov .endm 23dbf4ed89SMax Filippov 24dbf4ed89SMax Filippov .macro do_addx8 dst, as, at, tmp 25dbf4ed89SMax Filippov#if XCHAL_HAVE_ADDX 26dbf4ed89SMax Filippov addx8 \dst, \as, \at 27dbf4ed89SMax Filippov#else 28dbf4ed89SMax Filippov slli \tmp, \as, 3 29dbf4ed89SMax Filippov add \dst, \tmp, \at 30dbf4ed89SMax Filippov#endif 31dbf4ed89SMax Filippov .endm 32dbf4ed89SMax Filippov 33dbf4ed89SMax FilippovENTRY(__mulsi3) 34dbf4ed89SMax Filippov 35dbf4ed89SMax Filippov abi_entry_default 36dbf4ed89SMax Filippov 37dbf4ed89SMax Filippov#if XCHAL_HAVE_MUL32 38dbf4ed89SMax Filippov mull a2, a2, a3 39dbf4ed89SMax Filippov 40dbf4ed89SMax Filippov#elif XCHAL_HAVE_MUL16 41dbf4ed89SMax Filippov or a4, a2, a3 42dbf4ed89SMax Filippov srai a4, a4, 16 43dbf4ed89SMax Filippov bnez a4, .LMUL16 44dbf4ed89SMax Filippov mul16u a2, a2, a3 45dbf4ed89SMax Filippov abi_ret_default 46dbf4ed89SMax Filippov.LMUL16: 47dbf4ed89SMax Filippov srai a4, a2, 16 48dbf4ed89SMax Filippov srai a5, a3, 16 49dbf4ed89SMax Filippov mul16u a7, a4, a3 50dbf4ed89SMax Filippov mul16u a6, a5, a2 51dbf4ed89SMax Filippov mul16u a4, a2, a3 52dbf4ed89SMax Filippov add a7, a7, a6 53dbf4ed89SMax Filippov slli a7, a7, 16 54dbf4ed89SMax Filippov add a2, a7, a4 55dbf4ed89SMax Filippov 56dbf4ed89SMax Filippov#elif XCHAL_HAVE_MAC16 57dbf4ed89SMax Filippov mul.aa.hl a2, a3 58dbf4ed89SMax Filippov mula.aa.lh a2, a3 59dbf4ed89SMax Filippov rsr a5, ACCLO 60dbf4ed89SMax Filippov umul.aa.ll a2, a3 61dbf4ed89SMax Filippov rsr a4, ACCLO 62dbf4ed89SMax Filippov slli a5, a5, 16 63dbf4ed89SMax Filippov add a2, a4, a5 64dbf4ed89SMax Filippov 65dbf4ed89SMax Filippov#else /* !MUL32 && !MUL16 && !MAC16 */ 66dbf4ed89SMax Filippov 67dbf4ed89SMax Filippov /* Multiply one bit at a time, but unroll the loop 4x to better 68dbf4ed89SMax Filippov exploit the addx instructions and avoid overhead. 69dbf4ed89SMax Filippov Peel the first iteration to save a cycle on init. */ 70dbf4ed89SMax Filippov 71dbf4ed89SMax Filippov /* Avoid negative numbers. */ 72dbf4ed89SMax Filippov xor a5, a2, a3 /* Top bit is 1 if one input is negative. */ 73dbf4ed89SMax Filippov do_abs a3, a3, a6 74dbf4ed89SMax Filippov do_abs a2, a2, a6 75dbf4ed89SMax Filippov 76dbf4ed89SMax Filippov /* Swap so the second argument is smaller. */ 77dbf4ed89SMax Filippov sub a7, a2, a3 78dbf4ed89SMax Filippov mov a4, a3 79dbf4ed89SMax Filippov movgez a4, a2, a7 /* a4 = max (a2, a3) */ 80dbf4ed89SMax Filippov movltz a3, a2, a7 /* a3 = min (a2, a3) */ 81dbf4ed89SMax Filippov 82dbf4ed89SMax Filippov movi a2, 0 83dbf4ed89SMax Filippov extui a6, a3, 0, 1 84dbf4ed89SMax Filippov movnez a2, a4, a6 85dbf4ed89SMax Filippov 86dbf4ed89SMax Filippov do_addx2 a7, a4, a2, a7 87dbf4ed89SMax Filippov extui a6, a3, 1, 1 88dbf4ed89SMax Filippov movnez a2, a7, a6 89dbf4ed89SMax Filippov 90dbf4ed89SMax Filippov do_addx4 a7, a4, a2, a7 91dbf4ed89SMax Filippov extui a6, a3, 2, 1 92dbf4ed89SMax Filippov movnez a2, a7, a6 93dbf4ed89SMax Filippov 94dbf4ed89SMax Filippov do_addx8 a7, a4, a2, a7 95dbf4ed89SMax Filippov extui a6, a3, 3, 1 96dbf4ed89SMax Filippov movnez a2, a7, a6 97dbf4ed89SMax Filippov 98dbf4ed89SMax Filippov bgeui a3, 16, .Lmult_main_loop 99dbf4ed89SMax Filippov neg a3, a2 100dbf4ed89SMax Filippov movltz a2, a3, a5 101dbf4ed89SMax Filippov abi_ret_default 102dbf4ed89SMax Filippov 103dbf4ed89SMax Filippov .align 4 104dbf4ed89SMax Filippov.Lmult_main_loop: 105dbf4ed89SMax Filippov srli a3, a3, 4 106dbf4ed89SMax Filippov slli a4, a4, 4 107dbf4ed89SMax Filippov 108dbf4ed89SMax Filippov add a7, a4, a2 109dbf4ed89SMax Filippov extui a6, a3, 0, 1 110dbf4ed89SMax Filippov movnez a2, a7, a6 111dbf4ed89SMax Filippov 112dbf4ed89SMax Filippov do_addx2 a7, a4, a2, a7 113dbf4ed89SMax Filippov extui a6, a3, 1, 1 114dbf4ed89SMax Filippov movnez a2, a7, a6 115dbf4ed89SMax Filippov 116dbf4ed89SMax Filippov do_addx4 a7, a4, a2, a7 117dbf4ed89SMax Filippov extui a6, a3, 2, 1 118dbf4ed89SMax Filippov movnez a2, a7, a6 119dbf4ed89SMax Filippov 120dbf4ed89SMax Filippov do_addx8 a7, a4, a2, a7 121dbf4ed89SMax Filippov extui a6, a3, 3, 1 122dbf4ed89SMax Filippov movnez a2, a7, a6 123dbf4ed89SMax Filippov 124dbf4ed89SMax Filippov bgeui a3, 16, .Lmult_main_loop 125dbf4ed89SMax Filippov 126dbf4ed89SMax Filippov neg a3, a2 127dbf4ed89SMax Filippov movltz a2, a3, a5 128dbf4ed89SMax Filippov 129dbf4ed89SMax Filippov#endif /* !MUL32 && !MUL16 && !MAC16 */ 130dbf4ed89SMax Filippov 131dbf4ed89SMax Filippov abi_ret_default 132dbf4ed89SMax Filippov 133dbf4ed89SMax FilippovENDPROC(__mulsi3) 134*338d9150SMax FilippovEXPORT_SYMBOL(__mulsi3) 135