1/*
2 * RISC-V translation routines for the RV64M Standard Extension.
3 *
4 * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
5 * Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
6 *                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2 or later, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program.  If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#define REQUIRE_M_OR_ZMMUL(ctx) do {                      \
22    if (!ctx->cfg_ptr->ext_zmmul && !has_ext(ctx, RVM)) { \
23        return false;                                     \
24    }                                                     \
25} while (0)
26
27static void gen_mulhu_i128(TCGv r2, TCGv r3, TCGv al, TCGv ah, TCGv bl, TCGv bh)
28{
29    TCGv tmpl = tcg_temp_new();
30    TCGv tmph = tcg_temp_new();
31    TCGv r0 = tcg_temp_new();
32    TCGv r1 = tcg_temp_new();
33    TCGv zero = tcg_constant_tl(0);
34
35    tcg_gen_mulu2_tl(r0, r1, al, bl);
36
37    tcg_gen_mulu2_tl(tmpl, tmph, al, bh);
38    tcg_gen_add2_tl(r1, r2, r1, zero, tmpl, tmph);
39    tcg_gen_mulu2_tl(tmpl, tmph, ah, bl);
40    tcg_gen_add2_tl(r1, tmph, r1, r2, tmpl, tmph);
41    /* Overflow detection into r3 */
42    tcg_gen_setcond_tl(TCG_COND_LTU, r3, tmph, r2);
43
44    tcg_gen_mov_tl(r2, tmph);
45
46    tcg_gen_mulu2_tl(tmpl, tmph, ah, bh);
47    tcg_gen_add2_tl(r2, r3, r2, r3, tmpl, tmph);
48}
49
50static void gen_mul_i128(TCGv rl, TCGv rh,
51                         TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
52{
53    TCGv tmpl = tcg_temp_new();
54    TCGv tmph = tcg_temp_new();
55    TCGv tmpx = tcg_temp_new();
56    TCGv zero = tcg_constant_tl(0);
57
58    tcg_gen_mulu2_tl(rl, rh, rs1l, rs2l);
59    tcg_gen_mulu2_tl(tmpl, tmph, rs1l, rs2h);
60    tcg_gen_add2_tl(rh, tmpx, rh, zero, tmpl, tmph);
61    tcg_gen_mulu2_tl(tmpl, tmph, rs1h, rs2l);
62    tcg_gen_add2_tl(rh, tmph, rh, tmpx, tmpl, tmph);
63}
64
65static bool trans_mul(DisasContext *ctx, arg_mul *a)
66{
67    REQUIRE_M_OR_ZMMUL(ctx);
68    return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, gen_mul_i128);
69}
70
71static void gen_mulh_i128(TCGv rl, TCGv rh,
72                          TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
73{
74    TCGv t0l = tcg_temp_new();
75    TCGv t0h = tcg_temp_new();
76    TCGv t1l = tcg_temp_new();
77    TCGv t1h = tcg_temp_new();
78
79    gen_mulhu_i128(rl, rh, rs1l, rs1h, rs2l, rs2h);
80    tcg_gen_sari_tl(t0h, rs1h, 63);
81    tcg_gen_and_tl(t0l, t0h, rs2l);
82    tcg_gen_and_tl(t0h, t0h, rs2h);
83    tcg_gen_sari_tl(t1h, rs2h, 63);
84    tcg_gen_and_tl(t1l, t1h, rs1l);
85    tcg_gen_and_tl(t1h, t1h, rs1h);
86    tcg_gen_sub2_tl(t0l, t0h, rl, rh, t0l, t0h);
87    tcg_gen_sub2_tl(rl, rh, t0l, t0h, t1l, t1h);
88}
89
90static void gen_mulh(TCGv ret, TCGv s1, TCGv s2)
91{
92    TCGv discard = tcg_temp_new();
93
94    tcg_gen_muls2_tl(discard, ret, s1, s2);
95}
96
97static void gen_mulh_w(TCGv ret, TCGv s1, TCGv s2)
98{
99    tcg_gen_mul_tl(ret, s1, s2);
100    tcg_gen_sari_tl(ret, ret, 32);
101}
102
103static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
104{
105    REQUIRE_M_OR_ZMMUL(ctx);
106    return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w,
107                            gen_mulh_i128);
108}
109
110static void gen_mulhsu_i128(TCGv rl, TCGv rh,
111                            TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
112{
113
114    TCGv t0l = tcg_temp_new();
115    TCGv t0h = tcg_temp_new();
116
117    gen_mulhu_i128(rl, rh, rs1l, rs1h, rs2l, rs2h);
118    tcg_gen_sari_tl(t0h, rs1h, 63);
119    tcg_gen_and_tl(t0l, t0h, rs2l);
120    tcg_gen_and_tl(t0h, t0h, rs2h);
121    tcg_gen_sub2_tl(rl, rh, rl, rh, t0l, t0h);
122}
123
124static void gen_mulhsu(TCGv ret, TCGv arg1, TCGv arg2)
125{
126    TCGv rl = tcg_temp_new();
127    TCGv rh = tcg_temp_new();
128
129    tcg_gen_mulu2_tl(rl, rh, arg1, arg2);
130    /* fix up for one negative */
131    tcg_gen_sari_tl(rl, arg1, TARGET_LONG_BITS - 1);
132    tcg_gen_and_tl(rl, rl, arg2);
133    tcg_gen_sub_tl(ret, rh, rl);
134}
135
136static void gen_mulhsu_w(TCGv ret, TCGv arg1, TCGv arg2)
137{
138    TCGv t1 = tcg_temp_new();
139    TCGv t2 = tcg_temp_new();
140
141    tcg_gen_ext32s_tl(t1, arg1);
142    tcg_gen_ext32u_tl(t2, arg2);
143    tcg_gen_mul_tl(ret, t1, t2);
144    tcg_gen_sari_tl(ret, ret, 32);
145}
146
147static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
148{
149    REQUIRE_M_OR_ZMMUL(ctx);
150    return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w,
151                            gen_mulhsu_i128);
152}
153
154static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2)
155{
156    TCGv discard = tcg_temp_new();
157
158    tcg_gen_mulu2_tl(discard, ret, s1, s2);
159}
160
161static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
162{
163    REQUIRE_M_OR_ZMMUL(ctx);
164    /* gen_mulh_w works for either sign as input. */
165    return gen_arith_per_ol(ctx, a, EXT_ZERO, gen_mulhu, gen_mulh_w,
166                            gen_mulhu_i128);
167}
168
169static void gen_div_i128(TCGv rdl, TCGv rdh,
170                         TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
171{
172    gen_helper_divs_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
173    tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
174}
175
176static void gen_div(TCGv ret, TCGv source1, TCGv source2)
177{
178    TCGv temp1, temp2, zero, one, mone, min;
179
180    temp1 = tcg_temp_new();
181    temp2 = tcg_temp_new();
182    zero = tcg_constant_tl(0);
183    one = tcg_constant_tl(1);
184    mone = tcg_constant_tl(-1);
185    min = tcg_constant_tl(1ull << (TARGET_LONG_BITS - 1));
186
187    /*
188     * If overflow, set temp2 to 1, else source2.
189     * This produces the required result of min.
190     */
191    tcg_gen_setcond_tl(TCG_COND_EQ, temp1, source1, min);
192    tcg_gen_setcond_tl(TCG_COND_EQ, temp2, source2, mone);
193    tcg_gen_and_tl(temp1, temp1, temp2);
194    tcg_gen_movcond_tl(TCG_COND_NE, temp2, temp1, zero, one, source2);
195
196    /*
197     * If div by zero, set temp1 to -1 and temp2 to 1 to
198     * produce the required result of -1.
199     */
200    tcg_gen_movcond_tl(TCG_COND_EQ, temp1, source2, zero, mone, source1);
201    tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, temp2);
202
203    tcg_gen_div_tl(ret, temp1, temp2);
204}
205
206static bool trans_div(DisasContext *ctx, arg_div *a)
207{
208    REQUIRE_EXT(ctx, RVM);
209    return gen_arith(ctx, a, EXT_SIGN, gen_div, gen_div_i128);
210}
211
212static void gen_divu_i128(TCGv rdl, TCGv rdh,
213                          TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
214{
215    gen_helper_divu_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
216    tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
217}
218
219static void gen_divu(TCGv ret, TCGv source1, TCGv source2)
220{
221    TCGv temp1, temp2, zero, one, max;
222
223    temp1 = tcg_temp_new();
224    temp2 = tcg_temp_new();
225    zero = tcg_constant_tl(0);
226    one = tcg_constant_tl(1);
227    max = tcg_constant_tl(~0);
228
229    /*
230     * If div by zero, set temp1 to max and temp2 to 1 to
231     * produce the required result of max.
232     */
233    tcg_gen_movcond_tl(TCG_COND_EQ, temp1, source2, zero, max, source1);
234    tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, source2);
235    tcg_gen_divu_tl(ret, temp1, temp2);
236}
237
238static bool trans_divu(DisasContext *ctx, arg_divu *a)
239{
240    REQUIRE_EXT(ctx, RVM);
241    return gen_arith(ctx, a, EXT_ZERO, gen_divu, gen_divu_i128);
242}
243
244static void gen_rem_i128(TCGv rdl, TCGv rdh,
245                         TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
246{
247    gen_helper_rems_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
248    tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
249}
250
251static void gen_rem(TCGv ret, TCGv source1, TCGv source2)
252{
253    TCGv temp1, temp2, zero, one, mone, min;
254
255    temp1 = tcg_temp_new();
256    temp2 = tcg_temp_new();
257    zero = tcg_constant_tl(0);
258    one = tcg_constant_tl(1);
259    mone = tcg_constant_tl(-1);
260    min = tcg_constant_tl(1ull << (TARGET_LONG_BITS - 1));
261
262    /*
263     * If overflow, set temp1 to 0, else source1.
264     * This avoids a possible host trap, and produces the required result of 0.
265     */
266    tcg_gen_setcond_tl(TCG_COND_EQ, temp1, source1, min);
267    tcg_gen_setcond_tl(TCG_COND_EQ, temp2, source2, mone);
268    tcg_gen_and_tl(temp1, temp1, temp2);
269    tcg_gen_movcond_tl(TCG_COND_NE, temp1, temp1, zero, zero, source1);
270
271    /*
272     * If div by zero, set temp2 to 1, else source2.
273     * This avoids a possible host trap, but produces an incorrect result.
274     */
275    tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, source2);
276
277    tcg_gen_rem_tl(temp1, temp1, temp2);
278
279    /* If div by zero, the required result is the original dividend. */
280    tcg_gen_movcond_tl(TCG_COND_EQ, ret, source2, zero, source1, temp1);
281}
282
283static bool trans_rem(DisasContext *ctx, arg_rem *a)
284{
285    REQUIRE_EXT(ctx, RVM);
286    return gen_arith(ctx, a, EXT_SIGN, gen_rem, gen_rem_i128);
287}
288
289static void gen_remu_i128(TCGv rdl, TCGv rdh,
290                          TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
291{
292    gen_helper_remu_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
293    tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
294}
295
296static void gen_remu(TCGv ret, TCGv source1, TCGv source2)
297{
298    TCGv temp, zero, one;
299
300    temp = tcg_temp_new();
301    zero = tcg_constant_tl(0);
302    one = tcg_constant_tl(1);
303
304    /*
305     * If div by zero, set temp to 1, else source2.
306     * This avoids a possible host trap, but produces an incorrect result.
307     */
308    tcg_gen_movcond_tl(TCG_COND_EQ, temp, source2, zero, one, source2);
309
310    tcg_gen_remu_tl(temp, source1, temp);
311
312    /* If div by zero, the required result is the original dividend. */
313    tcg_gen_movcond_tl(TCG_COND_EQ, ret, source2, zero, source1, temp);
314}
315
316static bool trans_remu(DisasContext *ctx, arg_remu *a)
317{
318    REQUIRE_EXT(ctx, RVM);
319    return gen_arith(ctx, a, EXT_ZERO, gen_remu, gen_remu_i128);
320}
321
322static bool trans_mulw(DisasContext *ctx, arg_mulw *a)
323{
324    REQUIRE_64_OR_128BIT(ctx);
325    REQUIRE_M_OR_ZMMUL(ctx);
326    ctx->ol = MXL_RV32;
327    return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, NULL);
328}
329
330static bool trans_divw(DisasContext *ctx, arg_divw *a)
331{
332    REQUIRE_64_OR_128BIT(ctx);
333    REQUIRE_EXT(ctx, RVM);
334    ctx->ol = MXL_RV32;
335    return gen_arith(ctx, a, EXT_SIGN, gen_div, NULL);
336}
337
338static bool trans_divuw(DisasContext *ctx, arg_divuw *a)
339{
340    REQUIRE_64_OR_128BIT(ctx);
341    REQUIRE_EXT(ctx, RVM);
342    ctx->ol = MXL_RV32;
343    return gen_arith(ctx, a, EXT_ZERO, gen_divu, NULL);
344}
345
346static bool trans_remw(DisasContext *ctx, arg_remw *a)
347{
348    REQUIRE_64_OR_128BIT(ctx);
349    REQUIRE_EXT(ctx, RVM);
350    ctx->ol = MXL_RV32;
351    return gen_arith(ctx, a, EXT_SIGN, gen_rem, NULL);
352}
353
354static bool trans_remuw(DisasContext *ctx, arg_remuw *a)
355{
356    REQUIRE_64_OR_128BIT(ctx);
357    REQUIRE_EXT(ctx, RVM);
358    ctx->ol = MXL_RV32;
359    return gen_arith(ctx, a, EXT_ZERO, gen_remu, NULL);
360}
361
362static bool trans_muld(DisasContext *ctx, arg_muld *a)
363{
364    REQUIRE_128BIT(ctx);
365    REQUIRE_M_OR_ZMMUL(ctx);
366    ctx->ol = MXL_RV64;
367    return gen_arith(ctx, a, EXT_SIGN, tcg_gen_mul_tl, NULL);
368}
369
370static bool trans_divd(DisasContext *ctx, arg_divd *a)
371{
372    REQUIRE_128BIT(ctx);
373    REQUIRE_EXT(ctx, RVM);
374    ctx->ol = MXL_RV64;
375    return gen_arith(ctx, a, EXT_SIGN, gen_div, NULL);
376}
377
378static bool trans_divud(DisasContext *ctx, arg_divud *a)
379{
380    REQUIRE_128BIT(ctx);
381    REQUIRE_EXT(ctx, RVM);
382    ctx->ol = MXL_RV64;
383    return gen_arith(ctx, a, EXT_ZERO, gen_divu, NULL);
384}
385
386static bool trans_remd(DisasContext *ctx, arg_remd *a)
387{
388    REQUIRE_128BIT(ctx);
389    REQUIRE_EXT(ctx, RVM);
390    ctx->ol = MXL_RV64;
391    return gen_arith(ctx, a, EXT_SIGN, gen_rem, NULL);
392}
393
394static bool trans_remud(DisasContext *ctx, arg_remud *a)
395{
396    REQUIRE_128BIT(ctx);
397    REQUIRE_EXT(ctx, RVM);
398    ctx->ol = MXL_RV64;
399    return gen_arith(ctx, a, EXT_ZERO, gen_remu, NULL);
400}
401