xref: /openbmc/qemu/target/hexagon/op_helper.c (revision a91390164449c550d5cbba18147588fd1ddc985e)
1 /*
2  *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "qemu/log.h"
20 #include "exec/exec-all.h"
21 #include "exec/cpu_ldst.h"
22 #include "exec/helper-proto.h"
23 #include "fpu/softfloat.h"
24 #include "cpu.h"
25 #include "internal.h"
26 #include "macros.h"
27 #include "arch.h"
28 #include "hex_arch_types.h"
29 #include "fma_emu.h"
30 #include "mmvec/mmvec.h"
31 #include "mmvec/macros.h"
32 #include "op_helper.h"
33 
34 #define SF_BIAS        127
35 #define SF_MANTBITS    23
36 
37 /* Exceptions processing helpers */
38 static G_NORETURN
39 void do_raise_exception_err(CPUHexagonState *env,
40                             uint32_t exception,
41                             uintptr_t pc)
42 {
43     CPUState *cs = env_cpu(env);
44     qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception);
45     cs->exception_index = exception;
46     cpu_loop_exit_restore(cs, pc);
47 }
48 
49 G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp)
50 {
51     do_raise_exception_err(env, excp, 0);
52 }
53 
54 void log_reg_write(CPUHexagonState *env, int rnum,
55                    target_ulong val, uint32_t slot)
56 {
57     HEX_DEBUG_LOG("log_reg_write[%d] = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")",
58                   rnum, val, val);
59     if (val == env->gpr[rnum]) {
60         HEX_DEBUG_LOG(" NO CHANGE");
61     }
62     HEX_DEBUG_LOG("\n");
63 
64     env->new_value[rnum] = val;
65     if (HEX_DEBUG) {
66         /* Do this so HELPER(debug_commit_end) will know */
67         env->reg_written[rnum] = 1;
68     }
69 }
70 
71 static void log_pred_write(CPUHexagonState *env, int pnum, target_ulong val)
72 {
73     HEX_DEBUG_LOG("log_pred_write[%d] = " TARGET_FMT_ld
74                   " (0x" TARGET_FMT_lx ")\n",
75                   pnum, val, val);
76 
77     /* Multiple writes to the same preg are and'ed together */
78     if (env->pred_written & (1 << pnum)) {
79         env->new_pred_value[pnum] &= val & 0xff;
80     } else {
81         env->new_pred_value[pnum] = val & 0xff;
82         env->pred_written |= 1 << pnum;
83     }
84 }
85 
86 void log_store32(CPUHexagonState *env, target_ulong addr,
87                  target_ulong val, int width, int slot)
88 {
89     HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
90                   ", %" PRId32 " [0x08%" PRIx32 "])\n",
91                   width, addr, val, val);
92     env->mem_log_stores[slot].va = addr;
93     env->mem_log_stores[slot].width = width;
94     env->mem_log_stores[slot].data32 = val;
95 }
96 
97 void log_store64(CPUHexagonState *env, target_ulong addr,
98                  int64_t val, int width, int slot)
99 {
100     HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
101                   ", %" PRId64 " [0x016%" PRIx64 "])\n",
102                    width, addr, val, val);
103     env->mem_log_stores[slot].va = addr;
104     env->mem_log_stores[slot].width = width;
105     env->mem_log_stores[slot].data64 = val;
106 }
107 
108 void write_new_pc(CPUHexagonState *env, bool pkt_has_multi_cof,
109                          target_ulong addr)
110 {
111     HEX_DEBUG_LOG("write_new_pc(0x" TARGET_FMT_lx ")\n", addr);
112 
113     if (pkt_has_multi_cof) {
114         /*
115          * If more than one branch is taken in a packet, only the first one
116          * is actually done.
117          */
118         if (env->branch_taken) {
119             HEX_DEBUG_LOG("INFO: multiple branches taken in same packet, "
120                           "ignoring the second one\n");
121         } else {
122             fCHECK_PCALIGN(addr);
123             env->gpr[HEX_REG_PC] = addr;
124             env->branch_taken = 1;
125         }
126     } else {
127         fCHECK_PCALIGN(addr);
128         env->gpr[HEX_REG_PC] = addr;
129     }
130 }
131 
132 /* Handy place to set a breakpoint */
133 void HELPER(debug_start_packet)(CPUHexagonState *env)
134 {
135     HEX_DEBUG_LOG("Start packet: pc = 0x" TARGET_FMT_lx "\n",
136                   env->gpr[HEX_REG_PC]);
137 
138     for (int i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
139         env->reg_written[i] = 0;
140     }
141 }
142 
143 /* Checks for bookkeeping errors between disassembly context and runtime */
144 void HELPER(debug_check_store_width)(CPUHexagonState *env, int slot, int check)
145 {
146     if (env->mem_log_stores[slot].width != check) {
147         HEX_DEBUG_LOG("ERROR: %d != %d\n",
148                       env->mem_log_stores[slot].width, check);
149         g_assert_not_reached();
150     }
151 }
152 
153 void HELPER(commit_store)(CPUHexagonState *env, int slot_num)
154 {
155     uintptr_t ra = GETPC();
156     uint8_t width = env->mem_log_stores[slot_num].width;
157     target_ulong va = env->mem_log_stores[slot_num].va;
158 
159     switch (width) {
160     case 1:
161         cpu_stb_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
162         break;
163     case 2:
164         cpu_stw_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
165         break;
166     case 4:
167         cpu_stl_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
168         break;
169     case 8:
170         cpu_stq_data_ra(env, va, env->mem_log_stores[slot_num].data64, ra);
171         break;
172     default:
173         g_assert_not_reached();
174     }
175 }
176 
177 void HELPER(gather_store)(CPUHexagonState *env, uint32_t addr, int slot)
178 {
179     mem_gather_store(env, addr, slot);
180 }
181 
182 void HELPER(commit_hvx_stores)(CPUHexagonState *env)
183 {
184     uintptr_t ra = GETPC();
185     int i;
186 
187     /* Normal (possibly masked) vector store */
188     for (i = 0; i < VSTORES_MAX; i++) {
189         if (env->vstore_pending[i]) {
190             env->vstore_pending[i] = 0;
191             target_ulong va = env->vstore[i].va;
192             int size = env->vstore[i].size;
193             for (int j = 0; j < size; j++) {
194                 if (test_bit(j, env->vstore[i].mask)) {
195                     cpu_stb_data_ra(env, va + j, env->vstore[i].data.ub[j], ra);
196                 }
197             }
198         }
199     }
200 
201     /* Scatter store */
202     if (env->vtcm_pending) {
203         env->vtcm_pending = false;
204         if (env->vtcm_log.op) {
205             /* Need to perform the scatter read/modify/write at commit time */
206             if (env->vtcm_log.op_size == 2) {
207                 SCATTER_OP_WRITE_TO_MEM(uint16_t);
208             } else if (env->vtcm_log.op_size == 4) {
209                 /* Word Scatter += */
210                 SCATTER_OP_WRITE_TO_MEM(uint32_t);
211             } else {
212                 g_assert_not_reached();
213             }
214         } else {
215             for (i = 0; i < sizeof(MMVector); i++) {
216                 if (test_bit(i, env->vtcm_log.mask)) {
217                     cpu_stb_data_ra(env, env->vtcm_log.va[i],
218                                     env->vtcm_log.data.ub[i], ra);
219                     clear_bit(i, env->vtcm_log.mask);
220                     env->vtcm_log.data.ub[i] = 0;
221                 }
222 
223             }
224         }
225     }
226 }
227 
228 static void print_store(CPUHexagonState *env, int slot)
229 {
230     if (!(env->slot_cancelled & (1 << slot))) {
231         uint8_t width = env->mem_log_stores[slot].width;
232         if (width == 1) {
233             uint32_t data = env->mem_log_stores[slot].data32 & 0xff;
234             HEX_DEBUG_LOG("\tmemb[0x" TARGET_FMT_lx "] = %" PRId32
235                           " (0x%02" PRIx32 ")\n",
236                           env->mem_log_stores[slot].va, data, data);
237         } else if (width == 2) {
238             uint32_t data = env->mem_log_stores[slot].data32 & 0xffff;
239             HEX_DEBUG_LOG("\tmemh[0x" TARGET_FMT_lx "] = %" PRId32
240                           " (0x%04" PRIx32 ")\n",
241                           env->mem_log_stores[slot].va, data, data);
242         } else if (width == 4) {
243             uint32_t data = env->mem_log_stores[slot].data32;
244             HEX_DEBUG_LOG("\tmemw[0x" TARGET_FMT_lx "] = %" PRId32
245                           " (0x%08" PRIx32 ")\n",
246                           env->mem_log_stores[slot].va, data, data);
247         } else if (width == 8) {
248             HEX_DEBUG_LOG("\tmemd[0x" TARGET_FMT_lx "] = %" PRId64
249                           " (0x%016" PRIx64 ")\n",
250                           env->mem_log_stores[slot].va,
251                           env->mem_log_stores[slot].data64,
252                           env->mem_log_stores[slot].data64);
253         } else {
254             HEX_DEBUG_LOG("\tBad store width %d\n", width);
255             g_assert_not_reached();
256         }
257     }
258 }
259 
260 /* This function is a handy place to set a breakpoint */
261 void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1)
262 {
263     bool reg_printed = false;
264     bool pred_printed = false;
265     int i;
266 
267     HEX_DEBUG_LOG("Packet committed: pc = 0x" TARGET_FMT_lx "\n",
268                   env->this_PC);
269     HEX_DEBUG_LOG("slot_cancelled = %d\n", env->slot_cancelled);
270 
271     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
272         if (env->reg_written[i]) {
273             if (!reg_printed) {
274                 HEX_DEBUG_LOG("Regs written\n");
275                 reg_printed = true;
276             }
277             HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n",
278                           i, env->new_value[i], env->new_value[i]);
279         }
280     }
281 
282     for (i = 0; i < NUM_PREGS; i++) {
283         if (env->pred_written & (1 << i)) {
284             if (!pred_printed) {
285                 HEX_DEBUG_LOG("Predicates written\n");
286                 pred_printed = true;
287             }
288             HEX_DEBUG_LOG("\tp%d = 0x" TARGET_FMT_lx "\n",
289                           i, env->new_pred_value[i]);
290         }
291     }
292 
293     if (has_st0 || has_st1) {
294         HEX_DEBUG_LOG("Stores\n");
295         if (has_st0) {
296             print_store(env, 0);
297         }
298         if (has_st1) {
299             print_store(env, 1);
300         }
301     }
302 
303     HEX_DEBUG_LOG("Next PC = " TARGET_FMT_lx "\n", env->gpr[HEX_REG_PC]);
304     HEX_DEBUG_LOG("Exec counters: pkt = " TARGET_FMT_lx
305                   ", insn = " TARGET_FMT_lx
306                   ", hvx = " TARGET_FMT_lx "\n",
307                   env->gpr[HEX_REG_QEMU_PKT_CNT],
308                   env->gpr[HEX_REG_QEMU_INSN_CNT],
309                   env->gpr[HEX_REG_QEMU_HVX_CNT]);
310 
311 }
312 
313 int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS)
314 {
315     uint32_t K_const = extract32(M, 24, 4);
316     uint32_t length = extract32(M, 0, 17);
317     uint32_t new_ptr = RxV + offset;
318     uint32_t start_addr;
319     uint32_t end_addr;
320 
321     if (K_const == 0 && length >= 4) {
322         start_addr = CS;
323         end_addr = start_addr + length;
324     } else {
325         /*
326          * Versions v3 and earlier used the K value to specify a power-of-2 size
327          * 2^(K+2) that is greater than the buffer length
328          */
329         int32_t mask = (1 << (K_const + 2)) - 1;
330         start_addr = RxV & (~mask);
331         end_addr = start_addr | length;
332     }
333 
334     if (new_ptr >= end_addr) {
335         new_ptr -= length;
336     } else if (new_ptr < start_addr) {
337         new_ptr += length;
338     }
339 
340     return new_ptr;
341 }
342 
343 uint32_t HELPER(fbrev)(uint32_t addr)
344 {
345     /*
346      *  Bit reverse the low 16 bits of the address
347      */
348     return deposit32(addr, 0, 16, revbit16(addr));
349 }
350 
351 static float32 build_float32(uint8_t sign, uint32_t exp, uint32_t mant)
352 {
353     return make_float32(
354         ((sign & 1) << 31) |
355         ((exp & 0xff) << SF_MANTBITS) |
356         (mant & ((1 << SF_MANTBITS) - 1)));
357 }
358 
359 /*
360  * sfrecipa, sfinvsqrta have two 32-bit results
361  *     r0,p0=sfrecipa(r1,r2)
362  *     r0,p0=sfinvsqrta(r1)
363  *
364  * Since helpers can only return a single value, we pack the two results
365  * into a 64-bit value.
366  */
367 uint64_t HELPER(sfrecipa)(CPUHexagonState *env, float32 RsV, float32 RtV)
368 {
369     int32_t PeV = 0;
370     float32 RdV;
371     int idx;
372     int adjust;
373     int mant;
374     int exp;
375 
376     arch_fpop_start(env);
377     if (arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status)) {
378         PeV = adjust;
379         idx = (RtV >> 16) & 0x7f;
380         mant = (recip_lookup_table[idx] << 15) | 1;
381         exp = SF_BIAS - (float32_getexp(RtV) - SF_BIAS) - 1;
382         RdV = build_float32(extract32(RtV, 31, 1), exp, mant);
383     }
384     arch_fpop_end(env);
385     return ((uint64_t)RdV << 32) | PeV;
386 }
387 
388 uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV)
389 {
390     int PeV = 0;
391     float32 RdV;
392     int idx;
393     int adjust;
394     int mant;
395     int exp;
396 
397     arch_fpop_start(env);
398     if (arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status)) {
399         PeV = adjust;
400         idx = (RsV >> 17) & 0x7f;
401         mant = (invsqrt_lookup_table[idx] << 15);
402         exp = SF_BIAS - ((float32_getexp(RsV) - SF_BIAS) >> 1) - 1;
403         RdV = build_float32(extract32(RsV, 31, 1), exp, mant);
404     }
405     arch_fpop_end(env);
406     return ((uint64_t)RdV << 32) | PeV;
407 }
408 
409 int64_t HELPER(vacsh_val)(CPUHexagonState *env,
410                            int64_t RxxV, int64_t RssV, int64_t RttV)
411 {
412     for (int i = 0; i < 4; i++) {
413         int xv = sextract64(RxxV, i * 16, 16);
414         int sv = sextract64(RssV, i * 16, 16);
415         int tv = sextract64(RttV, i * 16, 16);
416         int max;
417         xv = xv + tv;
418         sv = sv - tv;
419         max = xv > sv ? xv : sv;
420         /* Note that fSATH can set the OVF bit in usr */
421         RxxV = deposit64(RxxV, i * 16, 16, fSATH(max));
422     }
423     return RxxV;
424 }
425 
426 int32_t HELPER(vacsh_pred)(CPUHexagonState *env,
427                            int64_t RxxV, int64_t RssV, int64_t RttV)
428 {
429     int32_t PeV = 0;
430     for (int i = 0; i < 4; i++) {
431         int xv = sextract64(RxxV, i * 16, 16);
432         int sv = sextract64(RssV, i * 16, 16);
433         int tv = sextract64(RttV, i * 16, 16);
434         xv = xv + tv;
435         sv = sv - tv;
436         PeV = deposit32(PeV, i * 2, 1, (xv > sv));
437         PeV = deposit32(PeV, i * 2 + 1, 1, (xv > sv));
438     }
439     return PeV;
440 }
441 
442 static void probe_store(CPUHexagonState *env, int slot, int mmu_idx)
443 {
444     if (!(env->slot_cancelled & (1 << slot))) {
445         size1u_t width = env->mem_log_stores[slot].width;
446         target_ulong va = env->mem_log_stores[slot].va;
447         uintptr_t ra = GETPC();
448         probe_write(env, va, width, mmu_idx, ra);
449     }
450 }
451 
452 /*
453  * Called from a mem_noshuf packet to make sure the load doesn't
454  * raise an exception
455  */
456 void HELPER(probe_noshuf_load)(CPUHexagonState *env, target_ulong va,
457                                int size, int mmu_idx)
458 {
459     uintptr_t retaddr = GETPC();
460     probe_read(env, va, size, mmu_idx, retaddr);
461 }
462 
463 /* Called during packet commit when there are two scalar stores */
464 void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int mmu_idx)
465 {
466     probe_store(env, 0, mmu_idx);
467 }
468 
469 void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx)
470 {
471     uintptr_t retaddr = GETPC();
472     int i;
473 
474     /* Normal (possibly masked) vector store */
475     for (i = 0; i < VSTORES_MAX; i++) {
476         if (env->vstore_pending[i]) {
477             target_ulong va = env->vstore[i].va;
478             int size = env->vstore[i].size;
479             for (int j = 0; j < size; j++) {
480                 if (test_bit(j, env->vstore[i].mask)) {
481                     probe_write(env, va + j, 1, mmu_idx, retaddr);
482                 }
483             }
484         }
485     }
486 
487     /* Scatter store */
488     if (env->vtcm_pending) {
489         if (env->vtcm_log.op) {
490             /* Need to perform the scatter read/modify/write at commit time */
491             if (env->vtcm_log.op_size == 2) {
492                 SCATTER_OP_PROBE_MEM(size2u_t, mmu_idx, retaddr);
493             } else if (env->vtcm_log.op_size == 4) {
494                 /* Word Scatter += */
495                 SCATTER_OP_PROBE_MEM(size4u_t, mmu_idx, retaddr);
496             } else {
497                 g_assert_not_reached();
498             }
499         } else {
500             for (int i = 0; i < sizeof(MMVector); i++) {
501                 if (test_bit(i, env->vtcm_log.mask)) {
502                     probe_write(env, env->vtcm_log.va[i], 1, mmu_idx, retaddr);
503                 }
504 
505             }
506         }
507     }
508 }
509 
510 void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask,
511                                          int mmu_idx)
512 {
513     bool has_st0        = (mask >> 0) & 1;
514     bool has_st1        = (mask >> 1) & 1;
515     bool has_hvx_stores = (mask >> 2) & 1;
516 
517     if (has_st0) {
518         probe_store(env, 0, mmu_idx);
519     }
520     if (has_st1) {
521         probe_store(env, 1, mmu_idx);
522     }
523     if (has_hvx_stores) {
524         HELPER(probe_hvx_stores)(env, mmu_idx);
525     }
526 }
527 
528 /*
529  * mem_noshuf
530  * Section 5.5 of the Hexagon V67 Programmer's Reference Manual
531  *
532  * If the load is in slot 0 and there is a store in slot1 (that
533  * wasn't cancelled), we have to do the store first.
534  */
535 static void check_noshuf(CPUHexagonState *env, uint32_t slot,
536                          target_ulong vaddr, int size)
537 {
538     if (slot == 0 && env->pkt_has_store_s1 &&
539         ((env->slot_cancelled & (1 << 1)) == 0)) {
540         HELPER(probe_noshuf_load)(env, vaddr, size, MMU_USER_IDX);
541         HELPER(commit_store)(env, 1);
542     }
543 }
544 
545 uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
546 {
547     uintptr_t ra = GETPC();
548     check_noshuf(env, slot, vaddr, 1);
549     return cpu_ldub_data_ra(env, vaddr, ra);
550 }
551 
552 uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
553 {
554     uintptr_t ra = GETPC();
555     check_noshuf(env, slot, vaddr, 2);
556     return cpu_lduw_data_ra(env, vaddr, ra);
557 }
558 
559 uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
560 {
561     uintptr_t ra = GETPC();
562     check_noshuf(env, slot, vaddr, 4);
563     return cpu_ldl_data_ra(env, vaddr, ra);
564 }
565 
566 uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
567 {
568     uintptr_t ra = GETPC();
569     check_noshuf(env, slot, vaddr, 8);
570     return cpu_ldq_data_ra(env, vaddr, ra);
571 }
572 
573 /* Floating point */
574 float64 HELPER(conv_sf2df)(CPUHexagonState *env, float32 RsV)
575 {
576     float64 out_f64;
577     arch_fpop_start(env);
578     out_f64 = float32_to_float64(RsV, &env->fp_status);
579     arch_fpop_end(env);
580     return out_f64;
581 }
582 
583 float32 HELPER(conv_df2sf)(CPUHexagonState *env, float64 RssV)
584 {
585     float32 out_f32;
586     arch_fpop_start(env);
587     out_f32 = float64_to_float32(RssV, &env->fp_status);
588     arch_fpop_end(env);
589     return out_f32;
590 }
591 
592 float32 HELPER(conv_uw2sf)(CPUHexagonState *env, int32_t RsV)
593 {
594     float32 RdV;
595     arch_fpop_start(env);
596     RdV = uint32_to_float32(RsV, &env->fp_status);
597     arch_fpop_end(env);
598     return RdV;
599 }
600 
601 float64 HELPER(conv_uw2df)(CPUHexagonState *env, int32_t RsV)
602 {
603     float64 RddV;
604     arch_fpop_start(env);
605     RddV = uint32_to_float64(RsV, &env->fp_status);
606     arch_fpop_end(env);
607     return RddV;
608 }
609 
610 float32 HELPER(conv_w2sf)(CPUHexagonState *env, int32_t RsV)
611 {
612     float32 RdV;
613     arch_fpop_start(env);
614     RdV = int32_to_float32(RsV, &env->fp_status);
615     arch_fpop_end(env);
616     return RdV;
617 }
618 
619 float64 HELPER(conv_w2df)(CPUHexagonState *env, int32_t RsV)
620 {
621     float64 RddV;
622     arch_fpop_start(env);
623     RddV = int32_to_float64(RsV, &env->fp_status);
624     arch_fpop_end(env);
625     return RddV;
626 }
627 
628 float32 HELPER(conv_ud2sf)(CPUHexagonState *env, int64_t RssV)
629 {
630     float32 RdV;
631     arch_fpop_start(env);
632     RdV = uint64_to_float32(RssV, &env->fp_status);
633     arch_fpop_end(env);
634     return RdV;
635 }
636 
637 float64 HELPER(conv_ud2df)(CPUHexagonState *env, int64_t RssV)
638 {
639     float64 RddV;
640     arch_fpop_start(env);
641     RddV = uint64_to_float64(RssV, &env->fp_status);
642     arch_fpop_end(env);
643     return RddV;
644 }
645 
646 float32 HELPER(conv_d2sf)(CPUHexagonState *env, int64_t RssV)
647 {
648     float32 RdV;
649     arch_fpop_start(env);
650     RdV = int64_to_float32(RssV, &env->fp_status);
651     arch_fpop_end(env);
652     return RdV;
653 }
654 
655 float64 HELPER(conv_d2df)(CPUHexagonState *env, int64_t RssV)
656 {
657     float64 RddV;
658     arch_fpop_start(env);
659     RddV = int64_to_float64(RssV, &env->fp_status);
660     arch_fpop_end(env);
661     return RddV;
662 }
663 
664 uint32_t HELPER(conv_sf2uw)(CPUHexagonState *env, float32 RsV)
665 {
666     uint32_t RdV;
667     arch_fpop_start(env);
668     /* Hexagon checks the sign before rounding */
669     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
670         float_raise(float_flag_invalid, &env->fp_status);
671         RdV = 0;
672     } else {
673         RdV = float32_to_uint32(RsV, &env->fp_status);
674     }
675     arch_fpop_end(env);
676     return RdV;
677 }
678 
679 int32_t HELPER(conv_sf2w)(CPUHexagonState *env, float32 RsV)
680 {
681     int32_t RdV;
682     arch_fpop_start(env);
683     /* Hexagon returns -1 for NaN */
684     if (float32_is_any_nan(RsV)) {
685         float_raise(float_flag_invalid, &env->fp_status);
686         RdV = -1;
687     } else {
688         RdV = float32_to_int32(RsV, &env->fp_status);
689     }
690     arch_fpop_end(env);
691     return RdV;
692 }
693 
694 uint64_t HELPER(conv_sf2ud)(CPUHexagonState *env, float32 RsV)
695 {
696     uint64_t RddV;
697     arch_fpop_start(env);
698     /* Hexagon checks the sign before rounding */
699     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
700         float_raise(float_flag_invalid, &env->fp_status);
701         RddV = 0;
702     } else {
703         RddV = float32_to_uint64(RsV, &env->fp_status);
704     }
705     arch_fpop_end(env);
706     return RddV;
707 }
708 
709 int64_t HELPER(conv_sf2d)(CPUHexagonState *env, float32 RsV)
710 {
711     int64_t RddV;
712     arch_fpop_start(env);
713     /* Hexagon returns -1 for NaN */
714     if (float32_is_any_nan(RsV)) {
715         float_raise(float_flag_invalid, &env->fp_status);
716         RddV = -1;
717     } else {
718         RddV = float32_to_int64(RsV, &env->fp_status);
719     }
720     arch_fpop_end(env);
721     return RddV;
722 }
723 
724 uint32_t HELPER(conv_df2uw)(CPUHexagonState *env, float64 RssV)
725 {
726     uint32_t RdV;
727     arch_fpop_start(env);
728     /* Hexagon checks the sign before rounding */
729     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
730         float_raise(float_flag_invalid, &env->fp_status);
731         RdV = 0;
732     } else {
733         RdV = float64_to_uint32(RssV, &env->fp_status);
734     }
735     arch_fpop_end(env);
736     return RdV;
737 }
738 
739 int32_t HELPER(conv_df2w)(CPUHexagonState *env, float64 RssV)
740 {
741     int32_t RdV;
742     arch_fpop_start(env);
743     /* Hexagon returns -1 for NaN */
744     if (float64_is_any_nan(RssV)) {
745         float_raise(float_flag_invalid, &env->fp_status);
746         RdV = -1;
747     } else {
748         RdV = float64_to_int32(RssV, &env->fp_status);
749     }
750     arch_fpop_end(env);
751     return RdV;
752 }
753 
754 uint64_t HELPER(conv_df2ud)(CPUHexagonState *env, float64 RssV)
755 {
756     uint64_t RddV;
757     arch_fpop_start(env);
758     /* Hexagon checks the sign before rounding */
759     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
760         float_raise(float_flag_invalid, &env->fp_status);
761         RddV = 0;
762     } else {
763         RddV = float64_to_uint64(RssV, &env->fp_status);
764     }
765     arch_fpop_end(env);
766     return RddV;
767 }
768 
769 int64_t HELPER(conv_df2d)(CPUHexagonState *env, float64 RssV)
770 {
771     int64_t RddV;
772     arch_fpop_start(env);
773     /* Hexagon returns -1 for NaN */
774     if (float64_is_any_nan(RssV)) {
775         float_raise(float_flag_invalid, &env->fp_status);
776         RddV = -1;
777     } else {
778         RddV = float64_to_int64(RssV, &env->fp_status);
779     }
780     arch_fpop_end(env);
781     return RddV;
782 }
783 
784 uint32_t HELPER(conv_sf2uw_chop)(CPUHexagonState *env, float32 RsV)
785 {
786     uint32_t RdV;
787     arch_fpop_start(env);
788     /* Hexagon checks the sign before rounding */
789     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
790         float_raise(float_flag_invalid, &env->fp_status);
791         RdV = 0;
792     } else {
793         RdV = float32_to_uint32_round_to_zero(RsV, &env->fp_status);
794     }
795     arch_fpop_end(env);
796     return RdV;
797 }
798 
799 int32_t HELPER(conv_sf2w_chop)(CPUHexagonState *env, float32 RsV)
800 {
801     int32_t RdV;
802     arch_fpop_start(env);
803     /* Hexagon returns -1 for NaN */
804     if (float32_is_any_nan(RsV)) {
805         float_raise(float_flag_invalid, &env->fp_status);
806         RdV = -1;
807     } else {
808         RdV = float32_to_int32_round_to_zero(RsV, &env->fp_status);
809     }
810     arch_fpop_end(env);
811     return RdV;
812 }
813 
814 uint64_t HELPER(conv_sf2ud_chop)(CPUHexagonState *env, float32 RsV)
815 {
816     uint64_t RddV;
817     arch_fpop_start(env);
818     /* Hexagon checks the sign before rounding */
819     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
820         float_raise(float_flag_invalid, &env->fp_status);
821         RddV = 0;
822     } else {
823         RddV = float32_to_uint64_round_to_zero(RsV, &env->fp_status);
824     }
825     arch_fpop_end(env);
826     return RddV;
827 }
828 
829 int64_t HELPER(conv_sf2d_chop)(CPUHexagonState *env, float32 RsV)
830 {
831     int64_t RddV;
832     arch_fpop_start(env);
833     /* Hexagon returns -1 for NaN */
834     if (float32_is_any_nan(RsV)) {
835         float_raise(float_flag_invalid, &env->fp_status);
836         RddV = -1;
837     } else {
838         RddV = float32_to_int64_round_to_zero(RsV, &env->fp_status);
839     }
840     arch_fpop_end(env);
841     return RddV;
842 }
843 
844 uint32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV)
845 {
846     uint32_t RdV;
847     arch_fpop_start(env);
848     /* Hexagon checks the sign before rounding */
849     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
850         float_raise(float_flag_invalid, &env->fp_status);
851         RdV = 0;
852     } else {
853         RdV = float64_to_uint32_round_to_zero(RssV, &env->fp_status);
854     }
855     arch_fpop_end(env);
856     return RdV;
857 }
858 
859 int32_t HELPER(conv_df2w_chop)(CPUHexagonState *env, float64 RssV)
860 {
861     int32_t RdV;
862     arch_fpop_start(env);
863     /* Hexagon returns -1 for NaN */
864     if (float64_is_any_nan(RssV)) {
865         float_raise(float_flag_invalid, &env->fp_status);
866         RdV = -1;
867     } else {
868         RdV = float64_to_int32_round_to_zero(RssV, &env->fp_status);
869     }
870     arch_fpop_end(env);
871     return RdV;
872 }
873 
874 uint64_t HELPER(conv_df2ud_chop)(CPUHexagonState *env, float64 RssV)
875 {
876     uint64_t RddV;
877     arch_fpop_start(env);
878     /* Hexagon checks the sign before rounding */
879     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
880         float_raise(float_flag_invalid, &env->fp_status);
881         RddV = 0;
882     } else {
883         RddV = float64_to_uint64_round_to_zero(RssV, &env->fp_status);
884     }
885     arch_fpop_end(env);
886     return RddV;
887 }
888 
889 int64_t HELPER(conv_df2d_chop)(CPUHexagonState *env, float64 RssV)
890 {
891     int64_t RddV;
892     arch_fpop_start(env);
893     /* Hexagon returns -1 for NaN */
894     if (float64_is_any_nan(RssV)) {
895         float_raise(float_flag_invalid, &env->fp_status);
896         RddV = -1;
897     } else {
898         RddV = float64_to_int64_round_to_zero(RssV, &env->fp_status);
899     }
900     arch_fpop_end(env);
901     return RddV;
902 }
903 
904 float32 HELPER(sfadd)(CPUHexagonState *env, float32 RsV, float32 RtV)
905 {
906     float32 RdV;
907     arch_fpop_start(env);
908     RdV = float32_add(RsV, RtV, &env->fp_status);
909     arch_fpop_end(env);
910     return RdV;
911 }
912 
913 float32 HELPER(sfsub)(CPUHexagonState *env, float32 RsV, float32 RtV)
914 {
915     float32 RdV;
916     arch_fpop_start(env);
917     RdV = float32_sub(RsV, RtV, &env->fp_status);
918     arch_fpop_end(env);
919     return RdV;
920 }
921 
922 int32_t HELPER(sfcmpeq)(CPUHexagonState *env, float32 RsV, float32 RtV)
923 {
924     int32_t PdV;
925     arch_fpop_start(env);
926     PdV = f8BITSOF(float32_eq_quiet(RsV, RtV, &env->fp_status));
927     arch_fpop_end(env);
928     return PdV;
929 }
930 
931 int32_t HELPER(sfcmpgt)(CPUHexagonState *env, float32 RsV, float32 RtV)
932 {
933     int cmp;
934     int32_t PdV;
935     arch_fpop_start(env);
936     cmp = float32_compare_quiet(RsV, RtV, &env->fp_status);
937     PdV = f8BITSOF(cmp == float_relation_greater);
938     arch_fpop_end(env);
939     return PdV;
940 }
941 
942 int32_t HELPER(sfcmpge)(CPUHexagonState *env, float32 RsV, float32 RtV)
943 {
944     int cmp;
945     int32_t PdV;
946     arch_fpop_start(env);
947     cmp = float32_compare_quiet(RsV, RtV, &env->fp_status);
948     PdV = f8BITSOF(cmp == float_relation_greater ||
949                    cmp == float_relation_equal);
950     arch_fpop_end(env);
951     return PdV;
952 }
953 
954 int32_t HELPER(sfcmpuo)(CPUHexagonState *env, float32 RsV, float32 RtV)
955 {
956     int32_t PdV;
957     arch_fpop_start(env);
958     PdV = f8BITSOF(float32_unordered_quiet(RsV, RtV, &env->fp_status));
959     arch_fpop_end(env);
960     return PdV;
961 }
962 
963 float32 HELPER(sfmax)(CPUHexagonState *env, float32 RsV, float32 RtV)
964 {
965     float32 RdV;
966     arch_fpop_start(env);
967     RdV = float32_maximum_number(RsV, RtV, &env->fp_status);
968     arch_fpop_end(env);
969     return RdV;
970 }
971 
972 float32 HELPER(sfmin)(CPUHexagonState *env, float32 RsV, float32 RtV)
973 {
974     float32 RdV;
975     arch_fpop_start(env);
976     RdV = float32_minimum_number(RsV, RtV, &env->fp_status);
977     arch_fpop_end(env);
978     return RdV;
979 }
980 
981 int32_t HELPER(sfclass)(CPUHexagonState *env, float32 RsV, int32_t uiV)
982 {
983     int32_t PdV = 0;
984     arch_fpop_start(env);
985     if (fGETBIT(0, uiV) && float32_is_zero(RsV)) {
986         PdV = 0xff;
987     }
988     if (fGETBIT(1, uiV) && float32_is_normal(RsV)) {
989         PdV = 0xff;
990     }
991     if (fGETBIT(2, uiV) && float32_is_denormal(RsV)) {
992         PdV = 0xff;
993     }
994     if (fGETBIT(3, uiV) && float32_is_infinity(RsV)) {
995         PdV = 0xff;
996     }
997     if (fGETBIT(4, uiV) && float32_is_any_nan(RsV)) {
998         PdV = 0xff;
999     }
1000     set_float_exception_flags(0, &env->fp_status);
1001     arch_fpop_end(env);
1002     return PdV;
1003 }
1004 
1005 float32 HELPER(sffixupn)(CPUHexagonState *env, float32 RsV, float32 RtV)
1006 {
1007     float32 RdV = 0;
1008     int adjust;
1009     arch_fpop_start(env);
1010     arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status);
1011     RdV = RsV;
1012     arch_fpop_end(env);
1013     return RdV;
1014 }
1015 
1016 float32 HELPER(sffixupd)(CPUHexagonState *env, float32 RsV, float32 RtV)
1017 {
1018     float32 RdV = 0;
1019     int adjust;
1020     arch_fpop_start(env);
1021     arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status);
1022     RdV = RtV;
1023     arch_fpop_end(env);
1024     return RdV;
1025 }
1026 
1027 float32 HELPER(sffixupr)(CPUHexagonState *env, float32 RsV)
1028 {
1029     float32 RdV = 0;
1030     int adjust;
1031     arch_fpop_start(env);
1032     arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status);
1033     RdV = RsV;
1034     arch_fpop_end(env);
1035     return RdV;
1036 }
1037 
1038 float64 HELPER(dfadd)(CPUHexagonState *env, float64 RssV, float64 RttV)
1039 {
1040     float64 RddV;
1041     arch_fpop_start(env);
1042     RddV = float64_add(RssV, RttV, &env->fp_status);
1043     arch_fpop_end(env);
1044     return RddV;
1045 }
1046 
1047 float64 HELPER(dfsub)(CPUHexagonState *env, float64 RssV, float64 RttV)
1048 {
1049     float64 RddV;
1050     arch_fpop_start(env);
1051     RddV = float64_sub(RssV, RttV, &env->fp_status);
1052     arch_fpop_end(env);
1053     return RddV;
1054 }
1055 
1056 float64 HELPER(dfmax)(CPUHexagonState *env, float64 RssV, float64 RttV)
1057 {
1058     float64 RddV;
1059     arch_fpop_start(env);
1060     RddV = float64_maximum_number(RssV, RttV, &env->fp_status);
1061     arch_fpop_end(env);
1062     return RddV;
1063 }
1064 
1065 float64 HELPER(dfmin)(CPUHexagonState *env, float64 RssV, float64 RttV)
1066 {
1067     float64 RddV;
1068     arch_fpop_start(env);
1069     RddV = float64_minimum_number(RssV, RttV, &env->fp_status);
1070     arch_fpop_end(env);
1071     return RddV;
1072 }
1073 
1074 int32_t HELPER(dfcmpeq)(CPUHexagonState *env, float64 RssV, float64 RttV)
1075 {
1076     int32_t PdV;
1077     arch_fpop_start(env);
1078     PdV = f8BITSOF(float64_eq_quiet(RssV, RttV, &env->fp_status));
1079     arch_fpop_end(env);
1080     return PdV;
1081 }
1082 
1083 int32_t HELPER(dfcmpgt)(CPUHexagonState *env, float64 RssV, float64 RttV)
1084 {
1085     int cmp;
1086     int32_t PdV;
1087     arch_fpop_start(env);
1088     cmp = float64_compare_quiet(RssV, RttV, &env->fp_status);
1089     PdV = f8BITSOF(cmp == float_relation_greater);
1090     arch_fpop_end(env);
1091     return PdV;
1092 }
1093 
1094 int32_t HELPER(dfcmpge)(CPUHexagonState *env, float64 RssV, float64 RttV)
1095 {
1096     int cmp;
1097     int32_t PdV;
1098     arch_fpop_start(env);
1099     cmp = float64_compare_quiet(RssV, RttV, &env->fp_status);
1100     PdV = f8BITSOF(cmp == float_relation_greater ||
1101                    cmp == float_relation_equal);
1102     arch_fpop_end(env);
1103     return PdV;
1104 }
1105 
1106 int32_t HELPER(dfcmpuo)(CPUHexagonState *env, float64 RssV, float64 RttV)
1107 {
1108     int32_t PdV;
1109     arch_fpop_start(env);
1110     PdV = f8BITSOF(float64_unordered_quiet(RssV, RttV, &env->fp_status));
1111     arch_fpop_end(env);
1112     return PdV;
1113 }
1114 
1115 int32_t HELPER(dfclass)(CPUHexagonState *env, float64 RssV, int32_t uiV)
1116 {
1117     int32_t PdV = 0;
1118     arch_fpop_start(env);
1119     if (fGETBIT(0, uiV) && float64_is_zero(RssV)) {
1120         PdV = 0xff;
1121     }
1122     if (fGETBIT(1, uiV) && float64_is_normal(RssV)) {
1123         PdV = 0xff;
1124     }
1125     if (fGETBIT(2, uiV) && float64_is_denormal(RssV)) {
1126         PdV = 0xff;
1127     }
1128     if (fGETBIT(3, uiV) && float64_is_infinity(RssV)) {
1129         PdV = 0xff;
1130     }
1131     if (fGETBIT(4, uiV) && float64_is_any_nan(RssV)) {
1132         PdV = 0xff;
1133     }
1134     set_float_exception_flags(0, &env->fp_status);
1135     arch_fpop_end(env);
1136     return PdV;
1137 }
1138 
1139 float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
1140 {
1141     float32 RdV;
1142     arch_fpop_start(env);
1143     RdV = internal_mpyf(RsV, RtV, &env->fp_status);
1144     arch_fpop_end(env);
1145     return RdV;
1146 }
1147 
1148 float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
1149                       float32 RsV, float32 RtV)
1150 {
1151     arch_fpop_start(env);
1152     RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
1153     arch_fpop_end(env);
1154     return RxV;
1155 }
1156 
1157 static bool is_zero_prod(float32 a, float32 b)
1158 {
1159     return ((float32_is_zero(a) && is_finite(b)) ||
1160             (float32_is_zero(b) && is_finite(a)));
1161 }
1162 
1163 static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
1164 {
1165     float32 ret = dst;
1166     if (float32_is_any_nan(x)) {
1167         if (extract32(x, 22, 1) == 0) {
1168             float_raise(float_flag_invalid, fp_status);
1169         }
1170         ret = make_float32(0xffffffff);    /* nan */
1171     }
1172     return ret;
1173 }
1174 
1175 float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
1176                          float32 RsV, float32 RtV, float32 PuV)
1177 {
1178     size4s_t tmp;
1179     arch_fpop_start(env);
1180     RxV = check_nan(RxV, RxV, &env->fp_status);
1181     RxV = check_nan(RxV, RsV, &env->fp_status);
1182     RxV = check_nan(RxV, RtV, &env->fp_status);
1183     tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
1184     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1185         RxV = tmp;
1186     }
1187     arch_fpop_end(env);
1188     return RxV;
1189 }
1190 
1191 float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
1192                       float32 RsV, float32 RtV)
1193 {
1194     float32 neg_RsV;
1195     arch_fpop_start(env);
1196     neg_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
1197     RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
1198     arch_fpop_end(env);
1199     return RxV;
1200 }
1201 
1202 static bool is_inf_prod(int32_t a, int32_t b)
1203 {
1204     return (float32_is_infinity(a) && float32_is_infinity(b)) ||
1205            (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
1206            (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
1207 }
1208 
1209 float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
1210                           float32 RsV, float32 RtV)
1211 {
1212     bool infinp;
1213     bool infminusinf;
1214     float32 tmp;
1215 
1216     arch_fpop_start(env);
1217     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
1218     infminusinf = float32_is_infinity(RxV) &&
1219                   is_inf_prod(RsV, RtV) &&
1220                   (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
1221     infinp = float32_is_infinity(RxV) ||
1222              float32_is_infinity(RtV) ||
1223              float32_is_infinity(RsV);
1224     RxV = check_nan(RxV, RxV, &env->fp_status);
1225     RxV = check_nan(RxV, RsV, &env->fp_status);
1226     RxV = check_nan(RxV, RtV, &env->fp_status);
1227     tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
1228     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1229         RxV = tmp;
1230     }
1231     set_float_exception_flags(0, &env->fp_status);
1232     if (float32_is_infinity(RxV) && !infinp) {
1233         RxV = RxV - 1;
1234     }
1235     if (infminusinf) {
1236         RxV = 0;
1237     }
1238     arch_fpop_end(env);
1239     return RxV;
1240 }
1241 
1242 float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
1243                           float32 RsV, float32 RtV)
1244 {
1245     bool infinp;
1246     bool infminusinf;
1247     float32 tmp;
1248 
1249     arch_fpop_start(env);
1250     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
1251     infminusinf = float32_is_infinity(RxV) &&
1252                   is_inf_prod(RsV, RtV) &&
1253                   (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
1254     infinp = float32_is_infinity(RxV) ||
1255              float32_is_infinity(RtV) ||
1256              float32_is_infinity(RsV);
1257     RxV = check_nan(RxV, RxV, &env->fp_status);
1258     RxV = check_nan(RxV, RsV, &env->fp_status);
1259     RxV = check_nan(RxV, RtV, &env->fp_status);
1260     float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
1261     tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
1262     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1263         RxV = tmp;
1264     }
1265     set_float_exception_flags(0, &env->fp_status);
1266     if (float32_is_infinity(RxV) && !infinp) {
1267         RxV = RxV - 1;
1268     }
1269     if (infminusinf) {
1270         RxV = 0;
1271     }
1272     arch_fpop_end(env);
1273     return RxV;
1274 }
1275 
1276 float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
1277 {
1278     int64_t RddV;
1279     arch_fpop_start(env);
1280     if (float64_is_denormal(RssV) &&
1281         (float64_getexp(RttV) >= 512) &&
1282         float64_is_normal(RttV)) {
1283         RddV = float64_mul(RssV, make_float64(0x4330000000000000),
1284                            &env->fp_status);
1285     } else if (float64_is_denormal(RttV) &&
1286                (float64_getexp(RssV) >= 512) &&
1287                float64_is_normal(RssV)) {
1288         RddV = float64_mul(RssV, make_float64(0x3cb0000000000000),
1289                            &env->fp_status);
1290     } else {
1291         RddV = RssV;
1292     }
1293     arch_fpop_end(env);
1294     return RddV;
1295 }
1296 
1297 float64 HELPER(dfmpyhh)(CPUHexagonState *env, float64 RxxV,
1298                         float64 RssV, float64 RttV)
1299 {
1300     arch_fpop_start(env);
1301     RxxV = internal_mpyhh(RssV, RttV, RxxV, &env->fp_status);
1302     arch_fpop_end(env);
1303     return RxxV;
1304 }
1305 
1306 /* Histogram instructions */
1307 
1308 void HELPER(vhist)(CPUHexagonState *env)
1309 {
1310     MMVector *input = &env->tmp_VRegs[0];
1311 
1312     for (int lane = 0; lane < 8; lane++) {
1313         for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
1314             unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
1315             unsigned char regno = value >> 3;
1316             unsigned char element = value & 7;
1317 
1318             env->VRegs[regno].uh[(sizeof(MMVector) / 16) * lane + element]++;
1319         }
1320     }
1321 }
1322 
1323 void HELPER(vhistq)(CPUHexagonState *env)
1324 {
1325     MMVector *input = &env->tmp_VRegs[0];
1326 
1327     for (int lane = 0; lane < 8; lane++) {
1328         for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
1329             unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
1330             unsigned char regno = value >> 3;
1331             unsigned char element = value & 7;
1332 
1333             if (fGETQBIT(env->qtmp, sizeof(MMVector) / 8 * lane + i)) {
1334                 env->VRegs[regno].uh[
1335                     (sizeof(MMVector) / 16) * lane + element]++;
1336             }
1337         }
1338     }
1339 }
1340 
1341 void HELPER(vwhist256)(CPUHexagonState *env)
1342 {
1343     MMVector *input = &env->tmp_VRegs[0];
1344 
1345     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1346         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1347         unsigned int weight = fGETUBYTE(1, input->h[i]);
1348         unsigned int vindex = (bucket >> 3) & 0x1F;
1349         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1350 
1351         env->VRegs[vindex].uh[elindex] =
1352             env->VRegs[vindex].uh[elindex] + weight;
1353     }
1354 }
1355 
1356 void HELPER(vwhist256q)(CPUHexagonState *env)
1357 {
1358     MMVector *input = &env->tmp_VRegs[0];
1359 
1360     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1361         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1362         unsigned int weight = fGETUBYTE(1, input->h[i]);
1363         unsigned int vindex = (bucket >> 3) & 0x1F;
1364         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1365 
1366         if (fGETQBIT(env->qtmp, 2 * i)) {
1367             env->VRegs[vindex].uh[elindex] =
1368                 env->VRegs[vindex].uh[elindex] + weight;
1369         }
1370     }
1371 }
1372 
1373 void HELPER(vwhist256_sat)(CPUHexagonState *env)
1374 {
1375     MMVector *input = &env->tmp_VRegs[0];
1376 
1377     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1378         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1379         unsigned int weight = fGETUBYTE(1, input->h[i]);
1380         unsigned int vindex = (bucket >> 3) & 0x1F;
1381         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1382 
1383         env->VRegs[vindex].uh[elindex] =
1384             fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
1385     }
1386 }
1387 
1388 void HELPER(vwhist256q_sat)(CPUHexagonState *env)
1389 {
1390     MMVector *input = &env->tmp_VRegs[0];
1391 
1392     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1393         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1394         unsigned int weight = fGETUBYTE(1, input->h[i]);
1395         unsigned int vindex = (bucket >> 3) & 0x1F;
1396         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1397 
1398         if (fGETQBIT(env->qtmp, 2 * i)) {
1399             env->VRegs[vindex].uh[elindex] =
1400                 fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
1401         }
1402     }
1403 }
1404 
1405 void HELPER(vwhist128)(CPUHexagonState *env)
1406 {
1407     MMVector *input = &env->tmp_VRegs[0];
1408 
1409     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1410         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1411         unsigned int weight = fGETUBYTE(1, input->h[i]);
1412         unsigned int vindex = (bucket >> 3) & 0x1F;
1413         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1414 
1415         env->VRegs[vindex].uw[elindex] =
1416             env->VRegs[vindex].uw[elindex] + weight;
1417     }
1418 }
1419 
1420 void HELPER(vwhist128q)(CPUHexagonState *env)
1421 {
1422     MMVector *input = &env->tmp_VRegs[0];
1423 
1424     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1425         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1426         unsigned int weight = fGETUBYTE(1, input->h[i]);
1427         unsigned int vindex = (bucket >> 3) & 0x1F;
1428         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1429 
1430         if (fGETQBIT(env->qtmp, 2 * i)) {
1431             env->VRegs[vindex].uw[elindex] =
1432                 env->VRegs[vindex].uw[elindex] + weight;
1433         }
1434     }
1435 }
1436 
1437 void HELPER(vwhist128m)(CPUHexagonState *env, int32_t uiV)
1438 {
1439     MMVector *input = &env->tmp_VRegs[0];
1440 
1441     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1442         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1443         unsigned int weight = fGETUBYTE(1, input->h[i]);
1444         unsigned int vindex = (bucket >> 3) & 0x1F;
1445         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1446 
1447         if ((bucket & 1) == uiV) {
1448             env->VRegs[vindex].uw[elindex] =
1449                 env->VRegs[vindex].uw[elindex] + weight;
1450         }
1451     }
1452 }
1453 
1454 void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV)
1455 {
1456     MMVector *input = &env->tmp_VRegs[0];
1457 
1458     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1459         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1460         unsigned int weight = fGETUBYTE(1, input->h[i]);
1461         unsigned int vindex = (bucket >> 3) & 0x1F;
1462         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1463 
1464         if (((bucket & 1) == uiV) && fGETQBIT(env->qtmp, 2 * i)) {
1465             env->VRegs[vindex].uw[elindex] =
1466                 env->VRegs[vindex].uw[elindex] + weight;
1467         }
1468     }
1469 }
1470 
1471 void cancel_slot(CPUHexagonState *env, uint32_t slot)
1472 {
1473     HEX_DEBUG_LOG("Slot %d cancelled\n", slot);
1474     env->slot_cancelled |= (1 << slot);
1475 }
1476 
1477 /* These macros can be referenced in the generated helper functions */
1478 #define warn(...) /* Nothing */
1479 #define fatal(...) g_assert_not_reached();
1480 
1481 #define BOGUS_HELPER(tag) \
1482     printf("ERROR: bogus helper: " #tag "\n")
1483 
1484 #include "helper_funcs_generated.c.inc"
1485