xref: /openbmc/qemu/target/hexagon/op_helper.c (revision 39920a04)
1 /*
2  *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "qemu/log.h"
20 #include "exec/exec-all.h"
21 #include "exec/cpu_ldst.h"
22 #include "exec/helper-proto.h"
23 #include "fpu/softfloat.h"
24 #include "cpu.h"
25 #include "internal.h"
26 #include "macros.h"
27 #include "arch.h"
28 #include "hex_arch_types.h"
29 #include "fma_emu.h"
30 #include "mmvec/mmvec.h"
31 #include "mmvec/macros.h"
32 #include "op_helper.h"
33 #include "translate.h"
34 
35 #define SF_BIAS        127
36 #define SF_MANTBITS    23
37 
38 /* Exceptions processing helpers */
39 static G_NORETURN
40 void do_raise_exception_err(CPUHexagonState *env,
41                             uint32_t exception,
42                             uintptr_t pc)
43 {
44     CPUState *cs = env_cpu(env);
45     qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception);
46     cs->exception_index = exception;
47     cpu_loop_exit_restore(cs, pc);
48 }
49 
50 G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp)
51 {
52     do_raise_exception_err(env, excp, 0);
53 }
54 
55 void log_reg_write(CPUHexagonState *env, int rnum,
56                    target_ulong val)
57 {
58     HEX_DEBUG_LOG("log_reg_write[%d] = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")",
59                   rnum, val, val);
60     if (val == env->gpr[rnum]) {
61         HEX_DEBUG_LOG(" NO CHANGE");
62     }
63     HEX_DEBUG_LOG("\n");
64 
65     env->new_value[rnum] = val;
66     if (HEX_DEBUG) {
67         /* Do this so HELPER(debug_commit_end) will know */
68         env->reg_written[rnum] = 1;
69     }
70 }
71 
72 static void log_pred_write(CPUHexagonState *env, int pnum, target_ulong val)
73 {
74     HEX_DEBUG_LOG("log_pred_write[%d] = " TARGET_FMT_ld
75                   " (0x" TARGET_FMT_lx ")\n",
76                   pnum, val, val);
77 
78     /* Multiple writes to the same preg are and'ed together */
79     if (env->pred_written & (1 << pnum)) {
80         env->new_pred_value[pnum] &= val & 0xff;
81     } else {
82         env->new_pred_value[pnum] = val & 0xff;
83         env->pred_written |= 1 << pnum;
84     }
85 }
86 
87 void log_store32(CPUHexagonState *env, target_ulong addr,
88                  target_ulong val, int width, int slot)
89 {
90     HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
91                   ", %" PRId32 " [0x08%" PRIx32 "])\n",
92                   width, addr, val, val);
93     env->mem_log_stores[slot].va = addr;
94     env->mem_log_stores[slot].width = width;
95     env->mem_log_stores[slot].data32 = val;
96 }
97 
98 void log_store64(CPUHexagonState *env, target_ulong addr,
99                  int64_t val, int width, int slot)
100 {
101     HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
102                   ", %" PRId64 " [0x016%" PRIx64 "])\n",
103                    width, addr, val, val);
104     env->mem_log_stores[slot].va = addr;
105     env->mem_log_stores[slot].width = width;
106     env->mem_log_stores[slot].data64 = val;
107 }
108 
109 /* Handy place to set a breakpoint */
110 void HELPER(debug_start_packet)(CPUHexagonState *env)
111 {
112     HEX_DEBUG_LOG("Start packet: pc = 0x" TARGET_FMT_lx "\n",
113                   env->gpr[HEX_REG_PC]);
114 
115     for (int i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
116         env->reg_written[i] = 0;
117     }
118 }
119 
120 /* Checks for bookkeeping errors between disassembly context and runtime */
121 void HELPER(debug_check_store_width)(CPUHexagonState *env, int slot, int check)
122 {
123     if (env->mem_log_stores[slot].width != check) {
124         HEX_DEBUG_LOG("ERROR: %d != %d\n",
125                       env->mem_log_stores[slot].width, check);
126         g_assert_not_reached();
127     }
128 }
129 
130 void HELPER(commit_store)(CPUHexagonState *env, int slot_num)
131 {
132     uintptr_t ra = GETPC();
133     uint8_t width = env->mem_log_stores[slot_num].width;
134     target_ulong va = env->mem_log_stores[slot_num].va;
135 
136     switch (width) {
137     case 1:
138         cpu_stb_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
139         break;
140     case 2:
141         cpu_stw_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
142         break;
143     case 4:
144         cpu_stl_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
145         break;
146     case 8:
147         cpu_stq_data_ra(env, va, env->mem_log_stores[slot_num].data64, ra);
148         break;
149     default:
150         g_assert_not_reached();
151     }
152 }
153 
154 void HELPER(gather_store)(CPUHexagonState *env, uint32_t addr, int slot)
155 {
156     mem_gather_store(env, addr, slot);
157 }
158 
159 void HELPER(commit_hvx_stores)(CPUHexagonState *env)
160 {
161     uintptr_t ra = GETPC();
162     int i;
163 
164     /* Normal (possibly masked) vector store */
165     for (i = 0; i < VSTORES_MAX; i++) {
166         if (env->vstore_pending[i]) {
167             env->vstore_pending[i] = 0;
168             target_ulong va = env->vstore[i].va;
169             int size = env->vstore[i].size;
170             for (int j = 0; j < size; j++) {
171                 if (test_bit(j, env->vstore[i].mask)) {
172                     cpu_stb_data_ra(env, va + j, env->vstore[i].data.ub[j], ra);
173                 }
174             }
175         }
176     }
177 
178     /* Scatter store */
179     if (env->vtcm_pending) {
180         env->vtcm_pending = false;
181         if (env->vtcm_log.op) {
182             /* Need to perform the scatter read/modify/write at commit time */
183             if (env->vtcm_log.op_size == 2) {
184                 SCATTER_OP_WRITE_TO_MEM(uint16_t);
185             } else if (env->vtcm_log.op_size == 4) {
186                 /* Word Scatter += */
187                 SCATTER_OP_WRITE_TO_MEM(uint32_t);
188             } else {
189                 g_assert_not_reached();
190             }
191         } else {
192             for (i = 0; i < sizeof(MMVector); i++) {
193                 if (test_bit(i, env->vtcm_log.mask)) {
194                     cpu_stb_data_ra(env, env->vtcm_log.va[i],
195                                     env->vtcm_log.data.ub[i], ra);
196                     clear_bit(i, env->vtcm_log.mask);
197                     env->vtcm_log.data.ub[i] = 0;
198                 }
199 
200             }
201         }
202     }
203 }
204 
205 static void print_store(CPUHexagonState *env, int slot)
206 {
207     if (!(env->slot_cancelled & (1 << slot))) {
208         uint8_t width = env->mem_log_stores[slot].width;
209         if (width == 1) {
210             uint32_t data = env->mem_log_stores[slot].data32 & 0xff;
211             HEX_DEBUG_LOG("\tmemb[0x" TARGET_FMT_lx "] = %" PRId32
212                           " (0x%02" PRIx32 ")\n",
213                           env->mem_log_stores[slot].va, data, data);
214         } else if (width == 2) {
215             uint32_t data = env->mem_log_stores[slot].data32 & 0xffff;
216             HEX_DEBUG_LOG("\tmemh[0x" TARGET_FMT_lx "] = %" PRId32
217                           " (0x%04" PRIx32 ")\n",
218                           env->mem_log_stores[slot].va, data, data);
219         } else if (width == 4) {
220             uint32_t data = env->mem_log_stores[slot].data32;
221             HEX_DEBUG_LOG("\tmemw[0x" TARGET_FMT_lx "] = %" PRId32
222                           " (0x%08" PRIx32 ")\n",
223                           env->mem_log_stores[slot].va, data, data);
224         } else if (width == 8) {
225             HEX_DEBUG_LOG("\tmemd[0x" TARGET_FMT_lx "] = %" PRId64
226                           " (0x%016" PRIx64 ")\n",
227                           env->mem_log_stores[slot].va,
228                           env->mem_log_stores[slot].data64,
229                           env->mem_log_stores[slot].data64);
230         } else {
231             HEX_DEBUG_LOG("\tBad store width %d\n", width);
232             g_assert_not_reached();
233         }
234     }
235 }
236 
237 /* This function is a handy place to set a breakpoint */
238 void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1)
239 {
240     bool reg_printed = false;
241     bool pred_printed = false;
242     int i;
243 
244     HEX_DEBUG_LOG("Packet committed: pc = 0x" TARGET_FMT_lx "\n",
245                   env->this_PC);
246     HEX_DEBUG_LOG("slot_cancelled = %d\n", env->slot_cancelled);
247 
248     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
249         if (env->reg_written[i]) {
250             if (!reg_printed) {
251                 HEX_DEBUG_LOG("Regs written\n");
252                 reg_printed = true;
253             }
254             HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n",
255                           i, env->new_value[i], env->new_value[i]);
256         }
257     }
258 
259     for (i = 0; i < NUM_PREGS; i++) {
260         if (env->pred_written & (1 << i)) {
261             if (!pred_printed) {
262                 HEX_DEBUG_LOG("Predicates written\n");
263                 pred_printed = true;
264             }
265             HEX_DEBUG_LOG("\tp%d = 0x" TARGET_FMT_lx "\n",
266                           i, env->new_pred_value[i]);
267         }
268     }
269 
270     if (has_st0 || has_st1) {
271         HEX_DEBUG_LOG("Stores\n");
272         if (has_st0) {
273             print_store(env, 0);
274         }
275         if (has_st1) {
276             print_store(env, 1);
277         }
278     }
279 
280     HEX_DEBUG_LOG("Next PC = " TARGET_FMT_lx "\n", env->gpr[HEX_REG_PC]);
281     HEX_DEBUG_LOG("Exec counters: pkt = " TARGET_FMT_lx
282                   ", insn = " TARGET_FMT_lx
283                   ", hvx = " TARGET_FMT_lx "\n",
284                   env->gpr[HEX_REG_QEMU_PKT_CNT],
285                   env->gpr[HEX_REG_QEMU_INSN_CNT],
286                   env->gpr[HEX_REG_QEMU_HVX_CNT]);
287 
288 }
289 
290 int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS)
291 {
292     uint32_t K_const = extract32(M, 24, 4);
293     uint32_t length = extract32(M, 0, 17);
294     uint32_t new_ptr = RxV + offset;
295     uint32_t start_addr;
296     uint32_t end_addr;
297 
298     if (K_const == 0 && length >= 4) {
299         start_addr = CS;
300         end_addr = start_addr + length;
301     } else {
302         /*
303          * Versions v3 and earlier used the K value to specify a power-of-2 size
304          * 2^(K+2) that is greater than the buffer length
305          */
306         int32_t mask = (1 << (K_const + 2)) - 1;
307         start_addr = RxV & (~mask);
308         end_addr = start_addr | length;
309     }
310 
311     if (new_ptr >= end_addr) {
312         new_ptr -= length;
313     } else if (new_ptr < start_addr) {
314         new_ptr += length;
315     }
316 
317     return new_ptr;
318 }
319 
320 uint32_t HELPER(fbrev)(uint32_t addr)
321 {
322     /*
323      *  Bit reverse the low 16 bits of the address
324      */
325     return deposit32(addr, 0, 16, revbit16(addr));
326 }
327 
328 static float32 build_float32(uint8_t sign, uint32_t exp, uint32_t mant)
329 {
330     return make_float32(
331         ((sign & 1) << 31) |
332         ((exp & 0xff) << SF_MANTBITS) |
333         (mant & ((1 << SF_MANTBITS) - 1)));
334 }
335 
336 /*
337  * sfrecipa, sfinvsqrta have two 32-bit results
338  *     r0,p0=sfrecipa(r1,r2)
339  *     r0,p0=sfinvsqrta(r1)
340  *
341  * Since helpers can only return a single value, we pack the two results
342  * into a 64-bit value.
343  */
344 uint64_t HELPER(sfrecipa)(CPUHexagonState *env, float32 RsV, float32 RtV)
345 {
346     int32_t PeV = 0;
347     float32 RdV;
348     int idx;
349     int adjust;
350     int mant;
351     int exp;
352 
353     arch_fpop_start(env);
354     if (arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status)) {
355         PeV = adjust;
356         idx = (RtV >> 16) & 0x7f;
357         mant = (recip_lookup_table[idx] << 15) | 1;
358         exp = SF_BIAS - (float32_getexp(RtV) - SF_BIAS) - 1;
359         RdV = build_float32(extract32(RtV, 31, 1), exp, mant);
360     }
361     arch_fpop_end(env);
362     return ((uint64_t)RdV << 32) | PeV;
363 }
364 
365 uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV)
366 {
367     int PeV = 0;
368     float32 RdV;
369     int idx;
370     int adjust;
371     int mant;
372     int exp;
373 
374     arch_fpop_start(env);
375     if (arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status)) {
376         PeV = adjust;
377         idx = (RsV >> 17) & 0x7f;
378         mant = (invsqrt_lookup_table[idx] << 15);
379         exp = SF_BIAS - ((float32_getexp(RsV) - SF_BIAS) >> 1) - 1;
380         RdV = build_float32(extract32(RsV, 31, 1), exp, mant);
381     }
382     arch_fpop_end(env);
383     return ((uint64_t)RdV << 32) | PeV;
384 }
385 
386 int64_t HELPER(vacsh_val)(CPUHexagonState *env,
387                            int64_t RxxV, int64_t RssV, int64_t RttV)
388 {
389     for (int i = 0; i < 4; i++) {
390         int xv = sextract64(RxxV, i * 16, 16);
391         int sv = sextract64(RssV, i * 16, 16);
392         int tv = sextract64(RttV, i * 16, 16);
393         int max;
394         xv = xv + tv;
395         sv = sv - tv;
396         max = xv > sv ? xv : sv;
397         /* Note that fSATH can set the OVF bit in usr */
398         RxxV = deposit64(RxxV, i * 16, 16, fSATH(max));
399     }
400     return RxxV;
401 }
402 
403 int32_t HELPER(vacsh_pred)(CPUHexagonState *env,
404                            int64_t RxxV, int64_t RssV, int64_t RttV)
405 {
406     int32_t PeV = 0;
407     for (int i = 0; i < 4; i++) {
408         int xv = sextract64(RxxV, i * 16, 16);
409         int sv = sextract64(RssV, i * 16, 16);
410         int tv = sextract64(RttV, i * 16, 16);
411         xv = xv + tv;
412         sv = sv - tv;
413         PeV = deposit32(PeV, i * 2, 1, (xv > sv));
414         PeV = deposit32(PeV, i * 2 + 1, 1, (xv > sv));
415     }
416     return PeV;
417 }
418 
419 static void probe_store(CPUHexagonState *env, int slot, int mmu_idx,
420                         bool is_predicated)
421 {
422     if (!is_predicated || !(env->slot_cancelled & (1 << slot))) {
423         size1u_t width = env->mem_log_stores[slot].width;
424         target_ulong va = env->mem_log_stores[slot].va;
425         uintptr_t ra = GETPC();
426         probe_write(env, va, width, mmu_idx, ra);
427     }
428 }
429 
430 /*
431  * Called from a mem_noshuf packet to make sure the load doesn't
432  * raise an exception
433  */
434 void HELPER(probe_noshuf_load)(CPUHexagonState *env, target_ulong va,
435                                int size, int mmu_idx)
436 {
437     uintptr_t retaddr = GETPC();
438     probe_read(env, va, size, mmu_idx, retaddr);
439 }
440 
441 /* Called during packet commit when there are two scalar stores */
442 void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int args)
443 {
444     int mmu_idx = FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX);
445     bool is_predicated =
446         FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED);
447     probe_store(env, 0, mmu_idx, is_predicated);
448 }
449 
450 void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx)
451 {
452     uintptr_t retaddr = GETPC();
453     int i;
454 
455     /* Normal (possibly masked) vector store */
456     for (i = 0; i < VSTORES_MAX; i++) {
457         if (env->vstore_pending[i]) {
458             target_ulong va = env->vstore[i].va;
459             int size = env->vstore[i].size;
460             for (int j = 0; j < size; j++) {
461                 if (test_bit(j, env->vstore[i].mask)) {
462                     probe_write(env, va + j, 1, mmu_idx, retaddr);
463                 }
464             }
465         }
466     }
467 
468     /* Scatter store */
469     if (env->vtcm_pending) {
470         if (env->vtcm_log.op) {
471             /* Need to perform the scatter read/modify/write at commit time */
472             if (env->vtcm_log.op_size == 2) {
473                 SCATTER_OP_PROBE_MEM(size2u_t, mmu_idx, retaddr);
474             } else if (env->vtcm_log.op_size == 4) {
475                 /* Word Scatter += */
476                 SCATTER_OP_PROBE_MEM(size4u_t, mmu_idx, retaddr);
477             } else {
478                 g_assert_not_reached();
479             }
480         } else {
481             for (int i = 0; i < sizeof(MMVector); i++) {
482                 if (test_bit(i, env->vtcm_log.mask)) {
483                     probe_write(env, env->vtcm_log.va[i], 1, mmu_idx, retaddr);
484                 }
485 
486             }
487         }
488     }
489 }
490 
491 void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask)
492 {
493     bool has_st0 = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0);
494     bool has_st1 = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1);
495     bool has_hvx_stores =
496         FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_HVX_STORES);
497     bool s0_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, S0_IS_PRED);
498     bool s1_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, S1_IS_PRED);
499     int mmu_idx = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, MMU_IDX);
500 
501     if (has_st0) {
502         probe_store(env, 0, mmu_idx, s0_is_pred);
503     }
504     if (has_st1) {
505         probe_store(env, 1, mmu_idx, s1_is_pred);
506     }
507     if (has_hvx_stores) {
508         HELPER(probe_hvx_stores)(env, mmu_idx);
509     }
510 }
511 
512 /*
513  * mem_noshuf
514  * Section 5.5 of the Hexagon V67 Programmer's Reference Manual
515  *
516  * If the load is in slot 0 and there is a store in slot1 (that
517  * wasn't cancelled), we have to do the store first.
518  */
519 static void check_noshuf(CPUHexagonState *env, uint32_t slot,
520                          target_ulong vaddr, int size)
521 {
522     if (slot == 0 && env->pkt_has_store_s1 &&
523         ((env->slot_cancelled & (1 << 1)) == 0)) {
524         HELPER(probe_noshuf_load)(env, vaddr, size, MMU_USER_IDX);
525         HELPER(commit_store)(env, 1);
526     }
527 }
528 
529 uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
530 {
531     uintptr_t ra = GETPC();
532     check_noshuf(env, slot, vaddr, 1);
533     return cpu_ldub_data_ra(env, vaddr, ra);
534 }
535 
536 uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
537 {
538     uintptr_t ra = GETPC();
539     check_noshuf(env, slot, vaddr, 2);
540     return cpu_lduw_data_ra(env, vaddr, ra);
541 }
542 
543 uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
544 {
545     uintptr_t ra = GETPC();
546     check_noshuf(env, slot, vaddr, 4);
547     return cpu_ldl_data_ra(env, vaddr, ra);
548 }
549 
550 uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
551 {
552     uintptr_t ra = GETPC();
553     check_noshuf(env, slot, vaddr, 8);
554     return cpu_ldq_data_ra(env, vaddr, ra);
555 }
556 
557 /* Floating point */
558 float64 HELPER(conv_sf2df)(CPUHexagonState *env, float32 RsV)
559 {
560     float64 out_f64;
561     arch_fpop_start(env);
562     out_f64 = float32_to_float64(RsV, &env->fp_status);
563     arch_fpop_end(env);
564     return out_f64;
565 }
566 
567 float32 HELPER(conv_df2sf)(CPUHexagonState *env, float64 RssV)
568 {
569     float32 out_f32;
570     arch_fpop_start(env);
571     out_f32 = float64_to_float32(RssV, &env->fp_status);
572     arch_fpop_end(env);
573     return out_f32;
574 }
575 
576 float32 HELPER(conv_uw2sf)(CPUHexagonState *env, int32_t RsV)
577 {
578     float32 RdV;
579     arch_fpop_start(env);
580     RdV = uint32_to_float32(RsV, &env->fp_status);
581     arch_fpop_end(env);
582     return RdV;
583 }
584 
585 float64 HELPER(conv_uw2df)(CPUHexagonState *env, int32_t RsV)
586 {
587     float64 RddV;
588     arch_fpop_start(env);
589     RddV = uint32_to_float64(RsV, &env->fp_status);
590     arch_fpop_end(env);
591     return RddV;
592 }
593 
594 float32 HELPER(conv_w2sf)(CPUHexagonState *env, int32_t RsV)
595 {
596     float32 RdV;
597     arch_fpop_start(env);
598     RdV = int32_to_float32(RsV, &env->fp_status);
599     arch_fpop_end(env);
600     return RdV;
601 }
602 
603 float64 HELPER(conv_w2df)(CPUHexagonState *env, int32_t RsV)
604 {
605     float64 RddV;
606     arch_fpop_start(env);
607     RddV = int32_to_float64(RsV, &env->fp_status);
608     arch_fpop_end(env);
609     return RddV;
610 }
611 
612 float32 HELPER(conv_ud2sf)(CPUHexagonState *env, int64_t RssV)
613 {
614     float32 RdV;
615     arch_fpop_start(env);
616     RdV = uint64_to_float32(RssV, &env->fp_status);
617     arch_fpop_end(env);
618     return RdV;
619 }
620 
621 float64 HELPER(conv_ud2df)(CPUHexagonState *env, int64_t RssV)
622 {
623     float64 RddV;
624     arch_fpop_start(env);
625     RddV = uint64_to_float64(RssV, &env->fp_status);
626     arch_fpop_end(env);
627     return RddV;
628 }
629 
630 float32 HELPER(conv_d2sf)(CPUHexagonState *env, int64_t RssV)
631 {
632     float32 RdV;
633     arch_fpop_start(env);
634     RdV = int64_to_float32(RssV, &env->fp_status);
635     arch_fpop_end(env);
636     return RdV;
637 }
638 
639 float64 HELPER(conv_d2df)(CPUHexagonState *env, int64_t RssV)
640 {
641     float64 RddV;
642     arch_fpop_start(env);
643     RddV = int64_to_float64(RssV, &env->fp_status);
644     arch_fpop_end(env);
645     return RddV;
646 }
647 
648 uint32_t HELPER(conv_sf2uw)(CPUHexagonState *env, float32 RsV)
649 {
650     uint32_t RdV;
651     arch_fpop_start(env);
652     /* Hexagon checks the sign before rounding */
653     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
654         float_raise(float_flag_invalid, &env->fp_status);
655         RdV = 0;
656     } else {
657         RdV = float32_to_uint32(RsV, &env->fp_status);
658     }
659     arch_fpop_end(env);
660     return RdV;
661 }
662 
663 int32_t HELPER(conv_sf2w)(CPUHexagonState *env, float32 RsV)
664 {
665     int32_t RdV;
666     arch_fpop_start(env);
667     /* Hexagon returns -1 for NaN */
668     if (float32_is_any_nan(RsV)) {
669         float_raise(float_flag_invalid, &env->fp_status);
670         RdV = -1;
671     } else {
672         RdV = float32_to_int32(RsV, &env->fp_status);
673     }
674     arch_fpop_end(env);
675     return RdV;
676 }
677 
678 uint64_t HELPER(conv_sf2ud)(CPUHexagonState *env, float32 RsV)
679 {
680     uint64_t RddV;
681     arch_fpop_start(env);
682     /* Hexagon checks the sign before rounding */
683     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
684         float_raise(float_flag_invalid, &env->fp_status);
685         RddV = 0;
686     } else {
687         RddV = float32_to_uint64(RsV, &env->fp_status);
688     }
689     arch_fpop_end(env);
690     return RddV;
691 }
692 
693 int64_t HELPER(conv_sf2d)(CPUHexagonState *env, float32 RsV)
694 {
695     int64_t RddV;
696     arch_fpop_start(env);
697     /* Hexagon returns -1 for NaN */
698     if (float32_is_any_nan(RsV)) {
699         float_raise(float_flag_invalid, &env->fp_status);
700         RddV = -1;
701     } else {
702         RddV = float32_to_int64(RsV, &env->fp_status);
703     }
704     arch_fpop_end(env);
705     return RddV;
706 }
707 
708 uint32_t HELPER(conv_df2uw)(CPUHexagonState *env, float64 RssV)
709 {
710     uint32_t RdV;
711     arch_fpop_start(env);
712     /* Hexagon checks the sign before rounding */
713     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
714         float_raise(float_flag_invalid, &env->fp_status);
715         RdV = 0;
716     } else {
717         RdV = float64_to_uint32(RssV, &env->fp_status);
718     }
719     arch_fpop_end(env);
720     return RdV;
721 }
722 
723 int32_t HELPER(conv_df2w)(CPUHexagonState *env, float64 RssV)
724 {
725     int32_t RdV;
726     arch_fpop_start(env);
727     /* Hexagon returns -1 for NaN */
728     if (float64_is_any_nan(RssV)) {
729         float_raise(float_flag_invalid, &env->fp_status);
730         RdV = -1;
731     } else {
732         RdV = float64_to_int32(RssV, &env->fp_status);
733     }
734     arch_fpop_end(env);
735     return RdV;
736 }
737 
738 uint64_t HELPER(conv_df2ud)(CPUHexagonState *env, float64 RssV)
739 {
740     uint64_t RddV;
741     arch_fpop_start(env);
742     /* Hexagon checks the sign before rounding */
743     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
744         float_raise(float_flag_invalid, &env->fp_status);
745         RddV = 0;
746     } else {
747         RddV = float64_to_uint64(RssV, &env->fp_status);
748     }
749     arch_fpop_end(env);
750     return RddV;
751 }
752 
753 int64_t HELPER(conv_df2d)(CPUHexagonState *env, float64 RssV)
754 {
755     int64_t RddV;
756     arch_fpop_start(env);
757     /* Hexagon returns -1 for NaN */
758     if (float64_is_any_nan(RssV)) {
759         float_raise(float_flag_invalid, &env->fp_status);
760         RddV = -1;
761     } else {
762         RddV = float64_to_int64(RssV, &env->fp_status);
763     }
764     arch_fpop_end(env);
765     return RddV;
766 }
767 
768 uint32_t HELPER(conv_sf2uw_chop)(CPUHexagonState *env, float32 RsV)
769 {
770     uint32_t RdV;
771     arch_fpop_start(env);
772     /* Hexagon checks the sign before rounding */
773     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
774         float_raise(float_flag_invalid, &env->fp_status);
775         RdV = 0;
776     } else {
777         RdV = float32_to_uint32_round_to_zero(RsV, &env->fp_status);
778     }
779     arch_fpop_end(env);
780     return RdV;
781 }
782 
783 int32_t HELPER(conv_sf2w_chop)(CPUHexagonState *env, float32 RsV)
784 {
785     int32_t RdV;
786     arch_fpop_start(env);
787     /* Hexagon returns -1 for NaN */
788     if (float32_is_any_nan(RsV)) {
789         float_raise(float_flag_invalid, &env->fp_status);
790         RdV = -1;
791     } else {
792         RdV = float32_to_int32_round_to_zero(RsV, &env->fp_status);
793     }
794     arch_fpop_end(env);
795     return RdV;
796 }
797 
798 uint64_t HELPER(conv_sf2ud_chop)(CPUHexagonState *env, float32 RsV)
799 {
800     uint64_t RddV;
801     arch_fpop_start(env);
802     /* Hexagon checks the sign before rounding */
803     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
804         float_raise(float_flag_invalid, &env->fp_status);
805         RddV = 0;
806     } else {
807         RddV = float32_to_uint64_round_to_zero(RsV, &env->fp_status);
808     }
809     arch_fpop_end(env);
810     return RddV;
811 }
812 
813 int64_t HELPER(conv_sf2d_chop)(CPUHexagonState *env, float32 RsV)
814 {
815     int64_t RddV;
816     arch_fpop_start(env);
817     /* Hexagon returns -1 for NaN */
818     if (float32_is_any_nan(RsV)) {
819         float_raise(float_flag_invalid, &env->fp_status);
820         RddV = -1;
821     } else {
822         RddV = float32_to_int64_round_to_zero(RsV, &env->fp_status);
823     }
824     arch_fpop_end(env);
825     return RddV;
826 }
827 
828 uint32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV)
829 {
830     uint32_t RdV;
831     arch_fpop_start(env);
832     /* Hexagon checks the sign before rounding */
833     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
834         float_raise(float_flag_invalid, &env->fp_status);
835         RdV = 0;
836     } else {
837         RdV = float64_to_uint32_round_to_zero(RssV, &env->fp_status);
838     }
839     arch_fpop_end(env);
840     return RdV;
841 }
842 
843 int32_t HELPER(conv_df2w_chop)(CPUHexagonState *env, float64 RssV)
844 {
845     int32_t RdV;
846     arch_fpop_start(env);
847     /* Hexagon returns -1 for NaN */
848     if (float64_is_any_nan(RssV)) {
849         float_raise(float_flag_invalid, &env->fp_status);
850         RdV = -1;
851     } else {
852         RdV = float64_to_int32_round_to_zero(RssV, &env->fp_status);
853     }
854     arch_fpop_end(env);
855     return RdV;
856 }
857 
858 uint64_t HELPER(conv_df2ud_chop)(CPUHexagonState *env, float64 RssV)
859 {
860     uint64_t RddV;
861     arch_fpop_start(env);
862     /* Hexagon checks the sign before rounding */
863     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
864         float_raise(float_flag_invalid, &env->fp_status);
865         RddV = 0;
866     } else {
867         RddV = float64_to_uint64_round_to_zero(RssV, &env->fp_status);
868     }
869     arch_fpop_end(env);
870     return RddV;
871 }
872 
873 int64_t HELPER(conv_df2d_chop)(CPUHexagonState *env, float64 RssV)
874 {
875     int64_t RddV;
876     arch_fpop_start(env);
877     /* Hexagon returns -1 for NaN */
878     if (float64_is_any_nan(RssV)) {
879         float_raise(float_flag_invalid, &env->fp_status);
880         RddV = -1;
881     } else {
882         RddV = float64_to_int64_round_to_zero(RssV, &env->fp_status);
883     }
884     arch_fpop_end(env);
885     return RddV;
886 }
887 
888 float32 HELPER(sfadd)(CPUHexagonState *env, float32 RsV, float32 RtV)
889 {
890     float32 RdV;
891     arch_fpop_start(env);
892     RdV = float32_add(RsV, RtV, &env->fp_status);
893     arch_fpop_end(env);
894     return RdV;
895 }
896 
897 float32 HELPER(sfsub)(CPUHexagonState *env, float32 RsV, float32 RtV)
898 {
899     float32 RdV;
900     arch_fpop_start(env);
901     RdV = float32_sub(RsV, RtV, &env->fp_status);
902     arch_fpop_end(env);
903     return RdV;
904 }
905 
906 int32_t HELPER(sfcmpeq)(CPUHexagonState *env, float32 RsV, float32 RtV)
907 {
908     int32_t PdV;
909     arch_fpop_start(env);
910     PdV = f8BITSOF(float32_eq_quiet(RsV, RtV, &env->fp_status));
911     arch_fpop_end(env);
912     return PdV;
913 }
914 
915 int32_t HELPER(sfcmpgt)(CPUHexagonState *env, float32 RsV, float32 RtV)
916 {
917     int cmp;
918     int32_t PdV;
919     arch_fpop_start(env);
920     cmp = float32_compare_quiet(RsV, RtV, &env->fp_status);
921     PdV = f8BITSOF(cmp == float_relation_greater);
922     arch_fpop_end(env);
923     return PdV;
924 }
925 
926 int32_t HELPER(sfcmpge)(CPUHexagonState *env, float32 RsV, float32 RtV)
927 {
928     int cmp;
929     int32_t PdV;
930     arch_fpop_start(env);
931     cmp = float32_compare_quiet(RsV, RtV, &env->fp_status);
932     PdV = f8BITSOF(cmp == float_relation_greater ||
933                    cmp == float_relation_equal);
934     arch_fpop_end(env);
935     return PdV;
936 }
937 
938 int32_t HELPER(sfcmpuo)(CPUHexagonState *env, float32 RsV, float32 RtV)
939 {
940     int32_t PdV;
941     arch_fpop_start(env);
942     PdV = f8BITSOF(float32_unordered_quiet(RsV, RtV, &env->fp_status));
943     arch_fpop_end(env);
944     return PdV;
945 }
946 
947 float32 HELPER(sfmax)(CPUHexagonState *env, float32 RsV, float32 RtV)
948 {
949     float32 RdV;
950     arch_fpop_start(env);
951     RdV = float32_maximum_number(RsV, RtV, &env->fp_status);
952     arch_fpop_end(env);
953     return RdV;
954 }
955 
956 float32 HELPER(sfmin)(CPUHexagonState *env, float32 RsV, float32 RtV)
957 {
958     float32 RdV;
959     arch_fpop_start(env);
960     RdV = float32_minimum_number(RsV, RtV, &env->fp_status);
961     arch_fpop_end(env);
962     return RdV;
963 }
964 
965 int32_t HELPER(sfclass)(CPUHexagonState *env, float32 RsV, int32_t uiV)
966 {
967     int32_t PdV = 0;
968     arch_fpop_start(env);
969     if (fGETBIT(0, uiV) && float32_is_zero(RsV)) {
970         PdV = 0xff;
971     }
972     if (fGETBIT(1, uiV) && float32_is_normal(RsV)) {
973         PdV = 0xff;
974     }
975     if (fGETBIT(2, uiV) && float32_is_denormal(RsV)) {
976         PdV = 0xff;
977     }
978     if (fGETBIT(3, uiV) && float32_is_infinity(RsV)) {
979         PdV = 0xff;
980     }
981     if (fGETBIT(4, uiV) && float32_is_any_nan(RsV)) {
982         PdV = 0xff;
983     }
984     set_float_exception_flags(0, &env->fp_status);
985     arch_fpop_end(env);
986     return PdV;
987 }
988 
989 float32 HELPER(sffixupn)(CPUHexagonState *env, float32 RsV, float32 RtV)
990 {
991     float32 RdV = 0;
992     int adjust;
993     arch_fpop_start(env);
994     arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status);
995     RdV = RsV;
996     arch_fpop_end(env);
997     return RdV;
998 }
999 
1000 float32 HELPER(sffixupd)(CPUHexagonState *env, float32 RsV, float32 RtV)
1001 {
1002     float32 RdV = 0;
1003     int adjust;
1004     arch_fpop_start(env);
1005     arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status);
1006     RdV = RtV;
1007     arch_fpop_end(env);
1008     return RdV;
1009 }
1010 
1011 float32 HELPER(sffixupr)(CPUHexagonState *env, float32 RsV)
1012 {
1013     float32 RdV = 0;
1014     int adjust;
1015     arch_fpop_start(env);
1016     arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status);
1017     RdV = RsV;
1018     arch_fpop_end(env);
1019     return RdV;
1020 }
1021 
1022 float64 HELPER(dfadd)(CPUHexagonState *env, float64 RssV, float64 RttV)
1023 {
1024     float64 RddV;
1025     arch_fpop_start(env);
1026     RddV = float64_add(RssV, RttV, &env->fp_status);
1027     arch_fpop_end(env);
1028     return RddV;
1029 }
1030 
1031 float64 HELPER(dfsub)(CPUHexagonState *env, float64 RssV, float64 RttV)
1032 {
1033     float64 RddV;
1034     arch_fpop_start(env);
1035     RddV = float64_sub(RssV, RttV, &env->fp_status);
1036     arch_fpop_end(env);
1037     return RddV;
1038 }
1039 
1040 float64 HELPER(dfmax)(CPUHexagonState *env, float64 RssV, float64 RttV)
1041 {
1042     float64 RddV;
1043     arch_fpop_start(env);
1044     RddV = float64_maximum_number(RssV, RttV, &env->fp_status);
1045     arch_fpop_end(env);
1046     return RddV;
1047 }
1048 
1049 float64 HELPER(dfmin)(CPUHexagonState *env, float64 RssV, float64 RttV)
1050 {
1051     float64 RddV;
1052     arch_fpop_start(env);
1053     RddV = float64_minimum_number(RssV, RttV, &env->fp_status);
1054     arch_fpop_end(env);
1055     return RddV;
1056 }
1057 
1058 int32_t HELPER(dfcmpeq)(CPUHexagonState *env, float64 RssV, float64 RttV)
1059 {
1060     int32_t PdV;
1061     arch_fpop_start(env);
1062     PdV = f8BITSOF(float64_eq_quiet(RssV, RttV, &env->fp_status));
1063     arch_fpop_end(env);
1064     return PdV;
1065 }
1066 
1067 int32_t HELPER(dfcmpgt)(CPUHexagonState *env, float64 RssV, float64 RttV)
1068 {
1069     int cmp;
1070     int32_t PdV;
1071     arch_fpop_start(env);
1072     cmp = float64_compare_quiet(RssV, RttV, &env->fp_status);
1073     PdV = f8BITSOF(cmp == float_relation_greater);
1074     arch_fpop_end(env);
1075     return PdV;
1076 }
1077 
1078 int32_t HELPER(dfcmpge)(CPUHexagonState *env, float64 RssV, float64 RttV)
1079 {
1080     int cmp;
1081     int32_t PdV;
1082     arch_fpop_start(env);
1083     cmp = float64_compare_quiet(RssV, RttV, &env->fp_status);
1084     PdV = f8BITSOF(cmp == float_relation_greater ||
1085                    cmp == float_relation_equal);
1086     arch_fpop_end(env);
1087     return PdV;
1088 }
1089 
1090 int32_t HELPER(dfcmpuo)(CPUHexagonState *env, float64 RssV, float64 RttV)
1091 {
1092     int32_t PdV;
1093     arch_fpop_start(env);
1094     PdV = f8BITSOF(float64_unordered_quiet(RssV, RttV, &env->fp_status));
1095     arch_fpop_end(env);
1096     return PdV;
1097 }
1098 
1099 int32_t HELPER(dfclass)(CPUHexagonState *env, float64 RssV, int32_t uiV)
1100 {
1101     int32_t PdV = 0;
1102     arch_fpop_start(env);
1103     if (fGETBIT(0, uiV) && float64_is_zero(RssV)) {
1104         PdV = 0xff;
1105     }
1106     if (fGETBIT(1, uiV) && float64_is_normal(RssV)) {
1107         PdV = 0xff;
1108     }
1109     if (fGETBIT(2, uiV) && float64_is_denormal(RssV)) {
1110         PdV = 0xff;
1111     }
1112     if (fGETBIT(3, uiV) && float64_is_infinity(RssV)) {
1113         PdV = 0xff;
1114     }
1115     if (fGETBIT(4, uiV) && float64_is_any_nan(RssV)) {
1116         PdV = 0xff;
1117     }
1118     set_float_exception_flags(0, &env->fp_status);
1119     arch_fpop_end(env);
1120     return PdV;
1121 }
1122 
1123 float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
1124 {
1125     float32 RdV;
1126     arch_fpop_start(env);
1127     RdV = internal_mpyf(RsV, RtV, &env->fp_status);
1128     arch_fpop_end(env);
1129     return RdV;
1130 }
1131 
1132 float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
1133                       float32 RsV, float32 RtV)
1134 {
1135     arch_fpop_start(env);
1136     RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
1137     arch_fpop_end(env);
1138     return RxV;
1139 }
1140 
1141 static bool is_zero_prod(float32 a, float32 b)
1142 {
1143     return ((float32_is_zero(a) && is_finite(b)) ||
1144             (float32_is_zero(b) && is_finite(a)));
1145 }
1146 
1147 static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
1148 {
1149     float32 ret = dst;
1150     if (float32_is_any_nan(x)) {
1151         if (extract32(x, 22, 1) == 0) {
1152             float_raise(float_flag_invalid, fp_status);
1153         }
1154         ret = make_float32(0xffffffff);    /* nan */
1155     }
1156     return ret;
1157 }
1158 
1159 float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
1160                          float32 RsV, float32 RtV, float32 PuV)
1161 {
1162     size4s_t tmp;
1163     arch_fpop_start(env);
1164     RxV = check_nan(RxV, RxV, &env->fp_status);
1165     RxV = check_nan(RxV, RsV, &env->fp_status);
1166     RxV = check_nan(RxV, RtV, &env->fp_status);
1167     tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
1168     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1169         RxV = tmp;
1170     }
1171     arch_fpop_end(env);
1172     return RxV;
1173 }
1174 
1175 float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
1176                       float32 RsV, float32 RtV)
1177 {
1178     float32 neg_RsV;
1179     arch_fpop_start(env);
1180     neg_RsV = float32_set_sign(RsV, float32_is_neg(RsV) ? 0 : 1);
1181     RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
1182     arch_fpop_end(env);
1183     return RxV;
1184 }
1185 
1186 static bool is_inf_prod(int32_t a, int32_t b)
1187 {
1188     return (float32_is_infinity(a) && float32_is_infinity(b)) ||
1189            (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
1190            (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
1191 }
1192 
1193 float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
1194                           float32 RsV, float32 RtV)
1195 {
1196     bool infinp;
1197     bool infminusinf;
1198     float32 tmp;
1199 
1200     arch_fpop_start(env);
1201     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
1202     infminusinf = float32_is_infinity(RxV) &&
1203                   is_inf_prod(RsV, RtV) &&
1204                   (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
1205     infinp = float32_is_infinity(RxV) ||
1206              float32_is_infinity(RtV) ||
1207              float32_is_infinity(RsV);
1208     RxV = check_nan(RxV, RxV, &env->fp_status);
1209     RxV = check_nan(RxV, RsV, &env->fp_status);
1210     RxV = check_nan(RxV, RtV, &env->fp_status);
1211     tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
1212     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1213         RxV = tmp;
1214     }
1215     set_float_exception_flags(0, &env->fp_status);
1216     if (float32_is_infinity(RxV) && !infinp) {
1217         RxV = RxV - 1;
1218     }
1219     if (infminusinf) {
1220         RxV = 0;
1221     }
1222     arch_fpop_end(env);
1223     return RxV;
1224 }
1225 
1226 float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
1227                           float32 RsV, float32 RtV)
1228 {
1229     bool infinp;
1230     bool infminusinf;
1231     float32 tmp;
1232 
1233     arch_fpop_start(env);
1234     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
1235     infminusinf = float32_is_infinity(RxV) &&
1236                   is_inf_prod(RsV, RtV) &&
1237                   (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
1238     infinp = float32_is_infinity(RxV) ||
1239              float32_is_infinity(RtV) ||
1240              float32_is_infinity(RsV);
1241     RxV = check_nan(RxV, RxV, &env->fp_status);
1242     RxV = check_nan(RxV, RsV, &env->fp_status);
1243     RxV = check_nan(RxV, RtV, &env->fp_status);
1244     float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
1245     tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
1246     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1247         RxV = tmp;
1248     }
1249     set_float_exception_flags(0, &env->fp_status);
1250     if (float32_is_infinity(RxV) && !infinp) {
1251         RxV = RxV - 1;
1252     }
1253     if (infminusinf) {
1254         RxV = 0;
1255     }
1256     arch_fpop_end(env);
1257     return RxV;
1258 }
1259 
1260 float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
1261 {
1262     int64_t RddV;
1263     arch_fpop_start(env);
1264     if (float64_is_denormal(RssV) &&
1265         (float64_getexp(RttV) >= 512) &&
1266         float64_is_normal(RttV)) {
1267         RddV = float64_mul(RssV, make_float64(0x4330000000000000),
1268                            &env->fp_status);
1269     } else if (float64_is_denormal(RttV) &&
1270                (float64_getexp(RssV) >= 512) &&
1271                float64_is_normal(RssV)) {
1272         RddV = float64_mul(RssV, make_float64(0x3cb0000000000000),
1273                            &env->fp_status);
1274     } else {
1275         RddV = RssV;
1276     }
1277     arch_fpop_end(env);
1278     return RddV;
1279 }
1280 
1281 float64 HELPER(dfmpyhh)(CPUHexagonState *env, float64 RxxV,
1282                         float64 RssV, float64 RttV)
1283 {
1284     arch_fpop_start(env);
1285     RxxV = internal_mpyhh(RssV, RttV, RxxV, &env->fp_status);
1286     arch_fpop_end(env);
1287     return RxxV;
1288 }
1289 
1290 /* Histogram instructions */
1291 
1292 void HELPER(vhist)(CPUHexagonState *env)
1293 {
1294     MMVector *input = &env->tmp_VRegs[0];
1295 
1296     for (int lane = 0; lane < 8; lane++) {
1297         for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
1298             unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
1299             unsigned char regno = value >> 3;
1300             unsigned char element = value & 7;
1301 
1302             env->VRegs[regno].uh[(sizeof(MMVector) / 16) * lane + element]++;
1303         }
1304     }
1305 }
1306 
1307 void HELPER(vhistq)(CPUHexagonState *env)
1308 {
1309     MMVector *input = &env->tmp_VRegs[0];
1310 
1311     for (int lane = 0; lane < 8; lane++) {
1312         for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
1313             unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
1314             unsigned char regno = value >> 3;
1315             unsigned char element = value & 7;
1316 
1317             if (fGETQBIT(env->qtmp, sizeof(MMVector) / 8 * lane + i)) {
1318                 env->VRegs[regno].uh[
1319                     (sizeof(MMVector) / 16) * lane + element]++;
1320             }
1321         }
1322     }
1323 }
1324 
1325 void HELPER(vwhist256)(CPUHexagonState *env)
1326 {
1327     MMVector *input = &env->tmp_VRegs[0];
1328 
1329     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1330         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1331         unsigned int weight = fGETUBYTE(1, input->h[i]);
1332         unsigned int vindex = (bucket >> 3) & 0x1F;
1333         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1334 
1335         env->VRegs[vindex].uh[elindex] =
1336             env->VRegs[vindex].uh[elindex] + weight;
1337     }
1338 }
1339 
1340 void HELPER(vwhist256q)(CPUHexagonState *env)
1341 {
1342     MMVector *input = &env->tmp_VRegs[0];
1343 
1344     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1345         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1346         unsigned int weight = fGETUBYTE(1, input->h[i]);
1347         unsigned int vindex = (bucket >> 3) & 0x1F;
1348         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1349 
1350         if (fGETQBIT(env->qtmp, 2 * i)) {
1351             env->VRegs[vindex].uh[elindex] =
1352                 env->VRegs[vindex].uh[elindex] + weight;
1353         }
1354     }
1355 }
1356 
1357 void HELPER(vwhist256_sat)(CPUHexagonState *env)
1358 {
1359     MMVector *input = &env->tmp_VRegs[0];
1360 
1361     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1362         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1363         unsigned int weight = fGETUBYTE(1, input->h[i]);
1364         unsigned int vindex = (bucket >> 3) & 0x1F;
1365         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1366 
1367         env->VRegs[vindex].uh[elindex] =
1368             fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
1369     }
1370 }
1371 
1372 void HELPER(vwhist256q_sat)(CPUHexagonState *env)
1373 {
1374     MMVector *input = &env->tmp_VRegs[0];
1375 
1376     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1377         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1378         unsigned int weight = fGETUBYTE(1, input->h[i]);
1379         unsigned int vindex = (bucket >> 3) & 0x1F;
1380         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1381 
1382         if (fGETQBIT(env->qtmp, 2 * i)) {
1383             env->VRegs[vindex].uh[elindex] =
1384                 fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
1385         }
1386     }
1387 }
1388 
1389 void HELPER(vwhist128)(CPUHexagonState *env)
1390 {
1391     MMVector *input = &env->tmp_VRegs[0];
1392 
1393     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1394         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1395         unsigned int weight = fGETUBYTE(1, input->h[i]);
1396         unsigned int vindex = (bucket >> 3) & 0x1F;
1397         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1398 
1399         env->VRegs[vindex].uw[elindex] =
1400             env->VRegs[vindex].uw[elindex] + weight;
1401     }
1402 }
1403 
1404 void HELPER(vwhist128q)(CPUHexagonState *env)
1405 {
1406     MMVector *input = &env->tmp_VRegs[0];
1407 
1408     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1409         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1410         unsigned int weight = fGETUBYTE(1, input->h[i]);
1411         unsigned int vindex = (bucket >> 3) & 0x1F;
1412         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1413 
1414         if (fGETQBIT(env->qtmp, 2 * i)) {
1415             env->VRegs[vindex].uw[elindex] =
1416                 env->VRegs[vindex].uw[elindex] + weight;
1417         }
1418     }
1419 }
1420 
1421 void HELPER(vwhist128m)(CPUHexagonState *env, int32_t uiV)
1422 {
1423     MMVector *input = &env->tmp_VRegs[0];
1424 
1425     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1426         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1427         unsigned int weight = fGETUBYTE(1, input->h[i]);
1428         unsigned int vindex = (bucket >> 3) & 0x1F;
1429         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1430 
1431         if ((bucket & 1) == uiV) {
1432             env->VRegs[vindex].uw[elindex] =
1433                 env->VRegs[vindex].uw[elindex] + weight;
1434         }
1435     }
1436 }
1437 
1438 void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV)
1439 {
1440     MMVector *input = &env->tmp_VRegs[0];
1441 
1442     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1443         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1444         unsigned int weight = fGETUBYTE(1, input->h[i]);
1445         unsigned int vindex = (bucket >> 3) & 0x1F;
1446         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1447 
1448         if (((bucket & 1) == uiV) && fGETQBIT(env->qtmp, 2 * i)) {
1449             env->VRegs[vindex].uw[elindex] =
1450                 env->VRegs[vindex].uw[elindex] + weight;
1451         }
1452     }
1453 }
1454 
1455 /* These macros can be referenced in the generated helper functions */
1456 #define warn(...) /* Nothing */
1457 #define fatal(...) g_assert_not_reached();
1458 
1459 #define BOGUS_HELPER(tag) \
1460     printf("ERROR: bogus helper: " #tag "\n")
1461 
1462 #include "helper_funcs_generated.c.inc"
1463