xref: /openbmc/qemu/target/hexagon/op_helper.c (revision 7e6055c9)
1 /*
2  *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "qemu/log.h"
20 #include "exec/exec-all.h"
21 #include "exec/cpu_ldst.h"
22 #include "exec/helper-proto.h"
23 #include "fpu/softfloat.h"
24 #include "cpu.h"
25 #include "internal.h"
26 #include "macros.h"
27 #include "arch.h"
28 #include "hex_arch_types.h"
29 #include "fma_emu.h"
30 #include "mmvec/mmvec.h"
31 #include "mmvec/macros.h"
32 
33 #define SF_BIAS        127
34 #define SF_MANTBITS    23
35 
36 /* Exceptions processing helpers */
37 static void QEMU_NORETURN do_raise_exception_err(CPUHexagonState *env,
38                                                  uint32_t exception,
39                                                  uintptr_t pc)
40 {
41     CPUState *cs = env_cpu(env);
42     qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception);
43     cs->exception_index = exception;
44     cpu_loop_exit_restore(cs, pc);
45 }
46 
47 void QEMU_NORETURN HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp)
48 {
49     do_raise_exception_err(env, excp, 0);
50 }
51 
52 static void log_reg_write(CPUHexagonState *env, int rnum,
53                           target_ulong val, uint32_t slot)
54 {
55     HEX_DEBUG_LOG("log_reg_write[%d] = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")",
56                   rnum, val, val);
57     if (val == env->gpr[rnum]) {
58         HEX_DEBUG_LOG(" NO CHANGE");
59     }
60     HEX_DEBUG_LOG("\n");
61 
62     env->new_value[rnum] = val;
63     if (HEX_DEBUG) {
64         /* Do this so HELPER(debug_commit_end) will know */
65         env->reg_written[rnum] = 1;
66     }
67 }
68 
69 static void log_pred_write(CPUHexagonState *env, int pnum, target_ulong val)
70 {
71     HEX_DEBUG_LOG("log_pred_write[%d] = " TARGET_FMT_ld
72                   " (0x" TARGET_FMT_lx ")\n",
73                   pnum, val, val);
74 
75     /* Multiple writes to the same preg are and'ed together */
76     if (env->pred_written & (1 << pnum)) {
77         env->new_pred_value[pnum] &= val & 0xff;
78     } else {
79         env->new_pred_value[pnum] = val & 0xff;
80         env->pred_written |= 1 << pnum;
81     }
82 }
83 
84 static void log_store32(CPUHexagonState *env, target_ulong addr,
85                         target_ulong val, int width, int slot)
86 {
87     HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
88                   ", %" PRId32 " [0x08%" PRIx32 "])\n",
89                   width, addr, val, val);
90     env->mem_log_stores[slot].va = addr;
91     env->mem_log_stores[slot].width = width;
92     env->mem_log_stores[slot].data32 = val;
93 }
94 
95 static void log_store64(CPUHexagonState *env, target_ulong addr,
96                         int64_t val, int width, int slot)
97 {
98     HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
99                   ", %" PRId64 " [0x016%" PRIx64 "])\n",
100                    width, addr, val, val);
101     env->mem_log_stores[slot].va = addr;
102     env->mem_log_stores[slot].width = width;
103     env->mem_log_stores[slot].data64 = val;
104 }
105 
106 static void write_new_pc(CPUHexagonState *env, target_ulong addr)
107 {
108     HEX_DEBUG_LOG("write_new_pc(0x" TARGET_FMT_lx ")\n", addr);
109 
110     /*
111      * If more than one branch is taken in a packet, only the first one
112      * is actually done.
113      */
114     if (env->branch_taken) {
115         HEX_DEBUG_LOG("INFO: multiple branches taken in same packet, "
116                       "ignoring the second one\n");
117     } else {
118         fCHECK_PCALIGN(addr);
119         env->branch_taken = 1;
120         env->next_PC = addr;
121     }
122 }
123 
124 /* Handy place to set a breakpoint */
125 void HELPER(debug_start_packet)(CPUHexagonState *env)
126 {
127     HEX_DEBUG_LOG("Start packet: pc = 0x" TARGET_FMT_lx "\n",
128                   env->gpr[HEX_REG_PC]);
129 
130     for (int i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
131         env->reg_written[i] = 0;
132     }
133 }
134 
135 /* Checks for bookkeeping errors between disassembly context and runtime */
136 void HELPER(debug_check_store_width)(CPUHexagonState *env, int slot, int check)
137 {
138     if (env->mem_log_stores[slot].width != check) {
139         HEX_DEBUG_LOG("ERROR: %d != %d\n",
140                       env->mem_log_stores[slot].width, check);
141         g_assert_not_reached();
142     }
143 }
144 
145 void HELPER(commit_store)(CPUHexagonState *env, int slot_num)
146 {
147     uintptr_t ra = GETPC();
148     uint8_t width = env->mem_log_stores[slot_num].width;
149     target_ulong va = env->mem_log_stores[slot_num].va;
150 
151     switch (width) {
152     case 1:
153         cpu_stb_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
154         break;
155     case 2:
156         cpu_stw_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
157         break;
158     case 4:
159         cpu_stl_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
160         break;
161     case 8:
162         cpu_stq_data_ra(env, va, env->mem_log_stores[slot_num].data64, ra);
163         break;
164     default:
165         g_assert_not_reached();
166     }
167 }
168 
169 void HELPER(gather_store)(CPUHexagonState *env, uint32_t addr, int slot)
170 {
171     mem_gather_store(env, addr, slot);
172 }
173 
174 void HELPER(commit_hvx_stores)(CPUHexagonState *env)
175 {
176     uintptr_t ra = GETPC();
177     int i;
178 
179     /* Normal (possibly masked) vector store */
180     for (i = 0; i < VSTORES_MAX; i++) {
181         if (env->vstore_pending[i]) {
182             env->vstore_pending[i] = 0;
183             target_ulong va = env->vstore[i].va;
184             int size = env->vstore[i].size;
185             for (int j = 0; j < size; j++) {
186                 if (test_bit(j, env->vstore[i].mask)) {
187                     cpu_stb_data_ra(env, va + j, env->vstore[i].data.ub[j], ra);
188                 }
189             }
190         }
191     }
192 
193     /* Scatter store */
194     if (env->vtcm_pending) {
195         env->vtcm_pending = false;
196         if (env->vtcm_log.op) {
197             /* Need to perform the scatter read/modify/write at commit time */
198             if (env->vtcm_log.op_size == 2) {
199                 SCATTER_OP_WRITE_TO_MEM(uint16_t);
200             } else if (env->vtcm_log.op_size == 4) {
201                 /* Word Scatter += */
202                 SCATTER_OP_WRITE_TO_MEM(uint32_t);
203             } else {
204                 g_assert_not_reached();
205             }
206         } else {
207             for (i = 0; i < sizeof(MMVector); i++) {
208                 if (test_bit(i, env->vtcm_log.mask)) {
209                     cpu_stb_data_ra(env, env->vtcm_log.va[i],
210                                     env->vtcm_log.data.ub[i], ra);
211                     clear_bit(i, env->vtcm_log.mask);
212                     env->vtcm_log.data.ub[i] = 0;
213                 }
214 
215             }
216         }
217     }
218 }
219 
220 static void print_store(CPUHexagonState *env, int slot)
221 {
222     if (!(env->slot_cancelled & (1 << slot))) {
223         uint8_t width = env->mem_log_stores[slot].width;
224         if (width == 1) {
225             uint32_t data = env->mem_log_stores[slot].data32 & 0xff;
226             HEX_DEBUG_LOG("\tmemb[0x" TARGET_FMT_lx "] = %" PRId32
227                           " (0x%02" PRIx32 ")\n",
228                           env->mem_log_stores[slot].va, data, data);
229         } else if (width == 2) {
230             uint32_t data = env->mem_log_stores[slot].data32 & 0xffff;
231             HEX_DEBUG_LOG("\tmemh[0x" TARGET_FMT_lx "] = %" PRId32
232                           " (0x%04" PRIx32 ")\n",
233                           env->mem_log_stores[slot].va, data, data);
234         } else if (width == 4) {
235             uint32_t data = env->mem_log_stores[slot].data32;
236             HEX_DEBUG_LOG("\tmemw[0x" TARGET_FMT_lx "] = %" PRId32
237                           " (0x%08" PRIx32 ")\n",
238                           env->mem_log_stores[slot].va, data, data);
239         } else if (width == 8) {
240             HEX_DEBUG_LOG("\tmemd[0x" TARGET_FMT_lx "] = %" PRId64
241                           " (0x%016" PRIx64 ")\n",
242                           env->mem_log_stores[slot].va,
243                           env->mem_log_stores[slot].data64,
244                           env->mem_log_stores[slot].data64);
245         } else {
246             HEX_DEBUG_LOG("\tBad store width %d\n", width);
247             g_assert_not_reached();
248         }
249     }
250 }
251 
252 /* This function is a handy place to set a breakpoint */
253 void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1)
254 {
255     bool reg_printed = false;
256     bool pred_printed = false;
257     int i;
258 
259     HEX_DEBUG_LOG("Packet committed: pc = 0x" TARGET_FMT_lx "\n",
260                   env->this_PC);
261     HEX_DEBUG_LOG("slot_cancelled = %d\n", env->slot_cancelled);
262 
263     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
264         if (env->reg_written[i]) {
265             if (!reg_printed) {
266                 HEX_DEBUG_LOG("Regs written\n");
267                 reg_printed = true;
268             }
269             HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n",
270                           i, env->new_value[i], env->new_value[i]);
271         }
272     }
273 
274     for (i = 0; i < NUM_PREGS; i++) {
275         if (env->pred_written & (1 << i)) {
276             if (!pred_printed) {
277                 HEX_DEBUG_LOG("Predicates written\n");
278                 pred_printed = true;
279             }
280             HEX_DEBUG_LOG("\tp%d = 0x" TARGET_FMT_lx "\n",
281                           i, env->new_pred_value[i]);
282         }
283     }
284 
285     if (has_st0 || has_st1) {
286         HEX_DEBUG_LOG("Stores\n");
287         if (has_st0) {
288             print_store(env, 0);
289         }
290         if (has_st1) {
291             print_store(env, 1);
292         }
293     }
294 
295     HEX_DEBUG_LOG("Next PC = " TARGET_FMT_lx "\n", env->next_PC);
296     HEX_DEBUG_LOG("Exec counters: pkt = " TARGET_FMT_lx
297                   ", insn = " TARGET_FMT_lx
298                   ", hvx = " TARGET_FMT_lx "\n",
299                   env->gpr[HEX_REG_QEMU_PKT_CNT],
300                   env->gpr[HEX_REG_QEMU_INSN_CNT],
301                   env->gpr[HEX_REG_QEMU_HVX_CNT]);
302 
303 }
304 
305 int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS)
306 {
307     int32_t K_const = sextract32(M, 24, 4);
308     int32_t length = sextract32(M, 0, 17);
309     uint32_t new_ptr = RxV + offset;
310     uint32_t start_addr;
311     uint32_t end_addr;
312 
313     if (K_const == 0 && length >= 4) {
314         start_addr = CS;
315         end_addr = start_addr + length;
316     } else {
317         /*
318          * Versions v3 and earlier used the K value to specify a power-of-2 size
319          * 2^(K+2) that is greater than the buffer length
320          */
321         int32_t mask = (1 << (K_const + 2)) - 1;
322         start_addr = RxV & (~mask);
323         end_addr = start_addr | length;
324     }
325 
326     if (new_ptr >= end_addr) {
327         new_ptr -= length;
328     } else if (new_ptr < start_addr) {
329         new_ptr += length;
330     }
331 
332     return new_ptr;
333 }
334 
335 uint32_t HELPER(fbrev)(uint32_t addr)
336 {
337     /*
338      *  Bit reverse the low 16 bits of the address
339      */
340     return deposit32(addr, 0, 16, revbit16(addr));
341 }
342 
343 static float32 build_float32(uint8_t sign, uint32_t exp, uint32_t mant)
344 {
345     return make_float32(
346         ((sign & 1) << 31) |
347         ((exp & 0xff) << SF_MANTBITS) |
348         (mant & ((1 << SF_MANTBITS) - 1)));
349 }
350 
351 /*
352  * sfrecipa, sfinvsqrta have two 32-bit results
353  *     r0,p0=sfrecipa(r1,r2)
354  *     r0,p0=sfinvsqrta(r1)
355  *
356  * Since helpers can only return a single value, we pack the two results
357  * into a 64-bit value.
358  */
359 uint64_t HELPER(sfrecipa)(CPUHexagonState *env, float32 RsV, float32 RtV)
360 {
361     int32_t PeV = 0;
362     float32 RdV;
363     int idx;
364     int adjust;
365     int mant;
366     int exp;
367 
368     arch_fpop_start(env);
369     if (arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status)) {
370         PeV = adjust;
371         idx = (RtV >> 16) & 0x7f;
372         mant = (recip_lookup_table[idx] << 15) | 1;
373         exp = SF_BIAS - (float32_getexp(RtV) - SF_BIAS) - 1;
374         RdV = build_float32(extract32(RtV, 31, 1), exp, mant);
375     }
376     arch_fpop_end(env);
377     return ((uint64_t)RdV << 32) | PeV;
378 }
379 
380 uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV)
381 {
382     int PeV = 0;
383     float32 RdV;
384     int idx;
385     int adjust;
386     int mant;
387     int exp;
388 
389     arch_fpop_start(env);
390     if (arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status)) {
391         PeV = adjust;
392         idx = (RsV >> 17) & 0x7f;
393         mant = (invsqrt_lookup_table[idx] << 15);
394         exp = SF_BIAS - ((float32_getexp(RsV) - SF_BIAS) >> 1) - 1;
395         RdV = build_float32(extract32(RsV, 31, 1), exp, mant);
396     }
397     arch_fpop_end(env);
398     return ((uint64_t)RdV << 32) | PeV;
399 }
400 
401 int64_t HELPER(vacsh_val)(CPUHexagonState *env,
402                            int64_t RxxV, int64_t RssV, int64_t RttV)
403 {
404     for (int i = 0; i < 4; i++) {
405         int xv = sextract64(RxxV, i * 16, 16);
406         int sv = sextract64(RssV, i * 16, 16);
407         int tv = sextract64(RttV, i * 16, 16);
408         int max;
409         xv = xv + tv;
410         sv = sv - tv;
411         max = xv > sv ? xv : sv;
412         /* Note that fSATH can set the OVF bit in usr */
413         RxxV = deposit64(RxxV, i * 16, 16, fSATH(max));
414     }
415     return RxxV;
416 }
417 
418 int32_t HELPER(vacsh_pred)(CPUHexagonState *env,
419                            int64_t RxxV, int64_t RssV, int64_t RttV)
420 {
421     int32_t PeV = 0;
422     for (int i = 0; i < 4; i++) {
423         int xv = sextract64(RxxV, i * 16, 16);
424         int sv = sextract64(RssV, i * 16, 16);
425         int tv = sextract64(RttV, i * 16, 16);
426         xv = xv + tv;
427         sv = sv - tv;
428         PeV = deposit32(PeV, i * 2, 1, (xv > sv));
429         PeV = deposit32(PeV, i * 2 + 1, 1, (xv > sv));
430     }
431     return PeV;
432 }
433 
434 static void probe_store(CPUHexagonState *env, int slot, int mmu_idx)
435 {
436     if (!(env->slot_cancelled & (1 << slot))) {
437         size1u_t width = env->mem_log_stores[slot].width;
438         target_ulong va = env->mem_log_stores[slot].va;
439         uintptr_t ra = GETPC();
440         probe_write(env, va, width, mmu_idx, ra);
441     }
442 }
443 
444 /* Called during packet commit when there are two scalar stores */
445 void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int mmu_idx)
446 {
447     probe_store(env, 0, mmu_idx);
448 }
449 
450 void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx)
451 {
452     uintptr_t retaddr = GETPC();
453     int i;
454 
455     /* Normal (possibly masked) vector store */
456     for (i = 0; i < VSTORES_MAX; i++) {
457         if (env->vstore_pending[i]) {
458             target_ulong va = env->vstore[i].va;
459             int size = env->vstore[i].size;
460             for (int j = 0; j < size; j++) {
461                 if (test_bit(j, env->vstore[i].mask)) {
462                     probe_write(env, va + j, 1, mmu_idx, retaddr);
463                 }
464             }
465         }
466     }
467 
468     /* Scatter store */
469     if (env->vtcm_pending) {
470         if (env->vtcm_log.op) {
471             /* Need to perform the scatter read/modify/write at commit time */
472             if (env->vtcm_log.op_size == 2) {
473                 SCATTER_OP_PROBE_MEM(size2u_t, mmu_idx, retaddr);
474             } else if (env->vtcm_log.op_size == 4) {
475                 /* Word Scatter += */
476                 SCATTER_OP_PROBE_MEM(size4u_t, mmu_idx, retaddr);
477             } else {
478                 g_assert_not_reached();
479             }
480         } else {
481             for (int i = 0; i < sizeof(MMVector); i++) {
482                 if (test_bit(i, env->vtcm_log.mask)) {
483                     probe_write(env, env->vtcm_log.va[i], 1, mmu_idx, retaddr);
484                 }
485 
486             }
487         }
488     }
489 }
490 
491 void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask,
492                                          int mmu_idx)
493 {
494     bool has_st0        = (mask >> 0) & 1;
495     bool has_st1        = (mask >> 1) & 1;
496     bool has_hvx_stores = (mask >> 2) & 1;
497 
498     if (has_st0) {
499         probe_store(env, 0, mmu_idx);
500     }
501     if (has_st1) {
502         probe_store(env, 1, mmu_idx);
503     }
504     if (has_hvx_stores) {
505         HELPER(probe_hvx_stores)(env, mmu_idx);
506     }
507 }
508 
509 /*
510  * mem_noshuf
511  * Section 5.5 of the Hexagon V67 Programmer's Reference Manual
512  *
513  * If the load is in slot 0 and there is a store in slot1 (that
514  * wasn't cancelled), we have to do the store first.
515  */
516 static void check_noshuf(CPUHexagonState *env, uint32_t slot)
517 {
518     if (slot == 0 && env->pkt_has_store_s1 &&
519         ((env->slot_cancelled & (1 << 1)) == 0)) {
520         HELPER(commit_store)(env, 1);
521     }
522 }
523 
524 static uint8_t mem_load1(CPUHexagonState *env, uint32_t slot,
525                          target_ulong vaddr)
526 {
527     uintptr_t ra = GETPC();
528     check_noshuf(env, slot);
529     return cpu_ldub_data_ra(env, vaddr, ra);
530 }
531 
532 static uint16_t mem_load2(CPUHexagonState *env, uint32_t slot,
533                           target_ulong vaddr)
534 {
535     uintptr_t ra = GETPC();
536     check_noshuf(env, slot);
537     return cpu_lduw_data_ra(env, vaddr, ra);
538 }
539 
540 static uint32_t mem_load4(CPUHexagonState *env, uint32_t slot,
541                           target_ulong vaddr)
542 {
543     uintptr_t ra = GETPC();
544     check_noshuf(env, slot);
545     return cpu_ldl_data_ra(env, vaddr, ra);
546 }
547 
548 static uint64_t mem_load8(CPUHexagonState *env, uint32_t slot,
549                           target_ulong vaddr)
550 {
551     uintptr_t ra = GETPC();
552     check_noshuf(env, slot);
553     return cpu_ldq_data_ra(env, vaddr, ra);
554 }
555 
556 /* Floating point */
557 float64 HELPER(conv_sf2df)(CPUHexagonState *env, float32 RsV)
558 {
559     float64 out_f64;
560     arch_fpop_start(env);
561     out_f64 = float32_to_float64(RsV, &env->fp_status);
562     arch_fpop_end(env);
563     return out_f64;
564 }
565 
566 float32 HELPER(conv_df2sf)(CPUHexagonState *env, float64 RssV)
567 {
568     float32 out_f32;
569     arch_fpop_start(env);
570     out_f32 = float64_to_float32(RssV, &env->fp_status);
571     arch_fpop_end(env);
572     return out_f32;
573 }
574 
575 float32 HELPER(conv_uw2sf)(CPUHexagonState *env, int32_t RsV)
576 {
577     float32 RdV;
578     arch_fpop_start(env);
579     RdV = uint32_to_float32(RsV, &env->fp_status);
580     arch_fpop_end(env);
581     return RdV;
582 }
583 
584 float64 HELPER(conv_uw2df)(CPUHexagonState *env, int32_t RsV)
585 {
586     float64 RddV;
587     arch_fpop_start(env);
588     RddV = uint32_to_float64(RsV, &env->fp_status);
589     arch_fpop_end(env);
590     return RddV;
591 }
592 
593 float32 HELPER(conv_w2sf)(CPUHexagonState *env, int32_t RsV)
594 {
595     float32 RdV;
596     arch_fpop_start(env);
597     RdV = int32_to_float32(RsV, &env->fp_status);
598     arch_fpop_end(env);
599     return RdV;
600 }
601 
602 float64 HELPER(conv_w2df)(CPUHexagonState *env, int32_t RsV)
603 {
604     float64 RddV;
605     arch_fpop_start(env);
606     RddV = int32_to_float64(RsV, &env->fp_status);
607     arch_fpop_end(env);
608     return RddV;
609 }
610 
611 float32 HELPER(conv_ud2sf)(CPUHexagonState *env, int64_t RssV)
612 {
613     float32 RdV;
614     arch_fpop_start(env);
615     RdV = uint64_to_float32(RssV, &env->fp_status);
616     arch_fpop_end(env);
617     return RdV;
618 }
619 
620 float64 HELPER(conv_ud2df)(CPUHexagonState *env, int64_t RssV)
621 {
622     float64 RddV;
623     arch_fpop_start(env);
624     RddV = uint64_to_float64(RssV, &env->fp_status);
625     arch_fpop_end(env);
626     return RddV;
627 }
628 
629 float32 HELPER(conv_d2sf)(CPUHexagonState *env, int64_t RssV)
630 {
631     float32 RdV;
632     arch_fpop_start(env);
633     RdV = int64_to_float32(RssV, &env->fp_status);
634     arch_fpop_end(env);
635     return RdV;
636 }
637 
638 float64 HELPER(conv_d2df)(CPUHexagonState *env, int64_t RssV)
639 {
640     float64 RddV;
641     arch_fpop_start(env);
642     RddV = int64_to_float64(RssV, &env->fp_status);
643     arch_fpop_end(env);
644     return RddV;
645 }
646 
647 uint32_t HELPER(conv_sf2uw)(CPUHexagonState *env, float32 RsV)
648 {
649     uint32_t RdV;
650     arch_fpop_start(env);
651     /* Hexagon checks the sign before rounding */
652     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
653         float_raise(float_flag_invalid, &env->fp_status);
654         RdV = 0;
655     } else {
656         RdV = float32_to_uint32(RsV, &env->fp_status);
657     }
658     arch_fpop_end(env);
659     return RdV;
660 }
661 
662 int32_t HELPER(conv_sf2w)(CPUHexagonState *env, float32 RsV)
663 {
664     int32_t RdV;
665     arch_fpop_start(env);
666     /* Hexagon returns -1 for NaN */
667     if (float32_is_any_nan(RsV)) {
668         float_raise(float_flag_invalid, &env->fp_status);
669         RdV = -1;
670     } else {
671         RdV = float32_to_int32(RsV, &env->fp_status);
672     }
673     arch_fpop_end(env);
674     return RdV;
675 }
676 
677 uint64_t HELPER(conv_sf2ud)(CPUHexagonState *env, float32 RsV)
678 {
679     uint64_t RddV;
680     arch_fpop_start(env);
681     /* Hexagon checks the sign before rounding */
682     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
683         float_raise(float_flag_invalid, &env->fp_status);
684         RddV = 0;
685     } else {
686         RddV = float32_to_uint64(RsV, &env->fp_status);
687     }
688     arch_fpop_end(env);
689     return RddV;
690 }
691 
692 int64_t HELPER(conv_sf2d)(CPUHexagonState *env, float32 RsV)
693 {
694     int64_t RddV;
695     arch_fpop_start(env);
696     /* Hexagon returns -1 for NaN */
697     if (float32_is_any_nan(RsV)) {
698         float_raise(float_flag_invalid, &env->fp_status);
699         RddV = -1;
700     } else {
701         RddV = float32_to_int64(RsV, &env->fp_status);
702     }
703     arch_fpop_end(env);
704     return RddV;
705 }
706 
707 uint32_t HELPER(conv_df2uw)(CPUHexagonState *env, float64 RssV)
708 {
709     uint32_t RdV;
710     arch_fpop_start(env);
711     /* Hexagon checks the sign before rounding */
712     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
713         float_raise(float_flag_invalid, &env->fp_status);
714         RdV = 0;
715     } else {
716         RdV = float64_to_uint32(RssV, &env->fp_status);
717     }
718     arch_fpop_end(env);
719     return RdV;
720 }
721 
722 int32_t HELPER(conv_df2w)(CPUHexagonState *env, float64 RssV)
723 {
724     int32_t RdV;
725     arch_fpop_start(env);
726     /* Hexagon returns -1 for NaN */
727     if (float64_is_any_nan(RssV)) {
728         float_raise(float_flag_invalid, &env->fp_status);
729         RdV = -1;
730     } else {
731         RdV = float64_to_int32(RssV, &env->fp_status);
732     }
733     arch_fpop_end(env);
734     return RdV;
735 }
736 
737 uint64_t HELPER(conv_df2ud)(CPUHexagonState *env, float64 RssV)
738 {
739     uint64_t RddV;
740     arch_fpop_start(env);
741     /* Hexagon checks the sign before rounding */
742     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
743         float_raise(float_flag_invalid, &env->fp_status);
744         RddV = 0;
745     } else {
746         RddV = float64_to_uint64(RssV, &env->fp_status);
747     }
748     arch_fpop_end(env);
749     return RddV;
750 }
751 
752 int64_t HELPER(conv_df2d)(CPUHexagonState *env, float64 RssV)
753 {
754     int64_t RddV;
755     arch_fpop_start(env);
756     /* Hexagon returns -1 for NaN */
757     if (float64_is_any_nan(RssV)) {
758         float_raise(float_flag_invalid, &env->fp_status);
759         RddV = -1;
760     } else {
761         RddV = float64_to_int64(RssV, &env->fp_status);
762     }
763     arch_fpop_end(env);
764     return RddV;
765 }
766 
767 uint32_t HELPER(conv_sf2uw_chop)(CPUHexagonState *env, float32 RsV)
768 {
769     uint32_t RdV;
770     arch_fpop_start(env);
771     /* Hexagon checks the sign before rounding */
772     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
773         float_raise(float_flag_invalid, &env->fp_status);
774         RdV = 0;
775     } else {
776         RdV = float32_to_uint32_round_to_zero(RsV, &env->fp_status);
777     }
778     arch_fpop_end(env);
779     return RdV;
780 }
781 
782 int32_t HELPER(conv_sf2w_chop)(CPUHexagonState *env, float32 RsV)
783 {
784     int32_t RdV;
785     arch_fpop_start(env);
786     /* Hexagon returns -1 for NaN */
787     if (float32_is_any_nan(RsV)) {
788         float_raise(float_flag_invalid, &env->fp_status);
789         RdV = -1;
790     } else {
791         RdV = float32_to_int32_round_to_zero(RsV, &env->fp_status);
792     }
793     arch_fpop_end(env);
794     return RdV;
795 }
796 
797 uint64_t HELPER(conv_sf2ud_chop)(CPUHexagonState *env, float32 RsV)
798 {
799     uint64_t RddV;
800     arch_fpop_start(env);
801     /* Hexagon checks the sign before rounding */
802     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
803         float_raise(float_flag_invalid, &env->fp_status);
804         RddV = 0;
805     } else {
806         RddV = float32_to_uint64_round_to_zero(RsV, &env->fp_status);
807     }
808     arch_fpop_end(env);
809     return RddV;
810 }
811 
812 int64_t HELPER(conv_sf2d_chop)(CPUHexagonState *env, float32 RsV)
813 {
814     int64_t RddV;
815     arch_fpop_start(env);
816     /* Hexagon returns -1 for NaN */
817     if (float32_is_any_nan(RsV)) {
818         float_raise(float_flag_invalid, &env->fp_status);
819         RddV = -1;
820     } else {
821         RddV = float32_to_int64_round_to_zero(RsV, &env->fp_status);
822     }
823     arch_fpop_end(env);
824     return RddV;
825 }
826 
827 uint32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV)
828 {
829     uint32_t RdV;
830     arch_fpop_start(env);
831     /* Hexagon checks the sign before rounding */
832     if (float64_is_neg(RssV) && !float32_is_any_nan(RssV)) {
833         float_raise(float_flag_invalid, &env->fp_status);
834         RdV = 0;
835     } else {
836         RdV = float64_to_uint32_round_to_zero(RssV, &env->fp_status);
837     }
838     arch_fpop_end(env);
839     return RdV;
840 }
841 
842 int32_t HELPER(conv_df2w_chop)(CPUHexagonState *env, float64 RssV)
843 {
844     int32_t RdV;
845     arch_fpop_start(env);
846     /* Hexagon returns -1 for NaN */
847     if (float64_is_any_nan(RssV)) {
848         float_raise(float_flag_invalid, &env->fp_status);
849         RdV = -1;
850     } else {
851         RdV = float64_to_int32_round_to_zero(RssV, &env->fp_status);
852     }
853     arch_fpop_end(env);
854     return RdV;
855 }
856 
857 uint64_t HELPER(conv_df2ud_chop)(CPUHexagonState *env, float64 RssV)
858 {
859     uint64_t RddV;
860     arch_fpop_start(env);
861     /* Hexagon checks the sign before rounding */
862     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
863         float_raise(float_flag_invalid, &env->fp_status);
864         RddV = 0;
865     } else {
866         RddV = float64_to_uint64_round_to_zero(RssV, &env->fp_status);
867     }
868     arch_fpop_end(env);
869     return RddV;
870 }
871 
872 int64_t HELPER(conv_df2d_chop)(CPUHexagonState *env, float64 RssV)
873 {
874     int64_t RddV;
875     arch_fpop_start(env);
876     /* Hexagon returns -1 for NaN */
877     if (float64_is_any_nan(RssV)) {
878         float_raise(float_flag_invalid, &env->fp_status);
879         RddV = -1;
880     } else {
881         RddV = float64_to_int64_round_to_zero(RssV, &env->fp_status);
882     }
883     arch_fpop_end(env);
884     return RddV;
885 }
886 
887 float32 HELPER(sfadd)(CPUHexagonState *env, float32 RsV, float32 RtV)
888 {
889     float32 RdV;
890     arch_fpop_start(env);
891     RdV = float32_add(RsV, RtV, &env->fp_status);
892     arch_fpop_end(env);
893     return RdV;
894 }
895 
896 float32 HELPER(sfsub)(CPUHexagonState *env, float32 RsV, float32 RtV)
897 {
898     float32 RdV;
899     arch_fpop_start(env);
900     RdV = float32_sub(RsV, RtV, &env->fp_status);
901     arch_fpop_end(env);
902     return RdV;
903 }
904 
905 int32_t HELPER(sfcmpeq)(CPUHexagonState *env, float32 RsV, float32 RtV)
906 {
907     int32_t PdV;
908     arch_fpop_start(env);
909     PdV = f8BITSOF(float32_eq_quiet(RsV, RtV, &env->fp_status));
910     arch_fpop_end(env);
911     return PdV;
912 }
913 
914 int32_t HELPER(sfcmpgt)(CPUHexagonState *env, float32 RsV, float32 RtV)
915 {
916     int cmp;
917     int32_t PdV;
918     arch_fpop_start(env);
919     cmp = float32_compare_quiet(RsV, RtV, &env->fp_status);
920     PdV = f8BITSOF(cmp == float_relation_greater);
921     arch_fpop_end(env);
922     return PdV;
923 }
924 
925 int32_t HELPER(sfcmpge)(CPUHexagonState *env, float32 RsV, float32 RtV)
926 {
927     int cmp;
928     int32_t PdV;
929     arch_fpop_start(env);
930     cmp = float32_compare_quiet(RsV, RtV, &env->fp_status);
931     PdV = f8BITSOF(cmp == float_relation_greater ||
932                    cmp == float_relation_equal);
933     arch_fpop_end(env);
934     return PdV;
935 }
936 
937 int32_t HELPER(sfcmpuo)(CPUHexagonState *env, float32 RsV, float32 RtV)
938 {
939     int32_t PdV;
940     arch_fpop_start(env);
941     PdV = f8BITSOF(float32_is_any_nan(RsV) ||
942                    float32_is_any_nan(RtV));
943     arch_fpop_end(env);
944     return PdV;
945 }
946 
947 float32 HELPER(sfmax)(CPUHexagonState *env, float32 RsV, float32 RtV)
948 {
949     float32 RdV;
950     arch_fpop_start(env);
951     RdV = float32_maxnum(RsV, RtV, &env->fp_status);
952     arch_fpop_end(env);
953     return RdV;
954 }
955 
956 float32 HELPER(sfmin)(CPUHexagonState *env, float32 RsV, float32 RtV)
957 {
958     float32 RdV;
959     arch_fpop_start(env);
960     RdV = float32_minnum(RsV, RtV, &env->fp_status);
961     arch_fpop_end(env);
962     return RdV;
963 }
964 
965 int32_t HELPER(sfclass)(CPUHexagonState *env, float32 RsV, int32_t uiV)
966 {
967     int32_t PdV = 0;
968     arch_fpop_start(env);
969     if (fGETBIT(0, uiV) && float32_is_zero(RsV)) {
970         PdV = 0xff;
971     }
972     if (fGETBIT(1, uiV) && float32_is_normal(RsV)) {
973         PdV = 0xff;
974     }
975     if (fGETBIT(2, uiV) && float32_is_denormal(RsV)) {
976         PdV = 0xff;
977     }
978     if (fGETBIT(3, uiV) && float32_is_infinity(RsV)) {
979         PdV = 0xff;
980     }
981     if (fGETBIT(4, uiV) && float32_is_any_nan(RsV)) {
982         PdV = 0xff;
983     }
984     set_float_exception_flags(0, &env->fp_status);
985     arch_fpop_end(env);
986     return PdV;
987 }
988 
989 float32 HELPER(sffixupn)(CPUHexagonState *env, float32 RsV, float32 RtV)
990 {
991     float32 RdV = 0;
992     int adjust;
993     arch_fpop_start(env);
994     arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status);
995     RdV = RsV;
996     arch_fpop_end(env);
997     return RdV;
998 }
999 
1000 float32 HELPER(sffixupd)(CPUHexagonState *env, float32 RsV, float32 RtV)
1001 {
1002     float32 RdV = 0;
1003     int adjust;
1004     arch_fpop_start(env);
1005     arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status);
1006     RdV = RtV;
1007     arch_fpop_end(env);
1008     return RdV;
1009 }
1010 
1011 float32 HELPER(sffixupr)(CPUHexagonState *env, float32 RsV)
1012 {
1013     float32 RdV = 0;
1014     int adjust;
1015     arch_fpop_start(env);
1016     arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status);
1017     RdV = RsV;
1018     arch_fpop_end(env);
1019     return RdV;
1020 }
1021 
1022 float64 HELPER(dfadd)(CPUHexagonState *env, float64 RssV, float64 RttV)
1023 {
1024     float64 RddV;
1025     arch_fpop_start(env);
1026     RddV = float64_add(RssV, RttV, &env->fp_status);
1027     arch_fpop_end(env);
1028     return RddV;
1029 }
1030 
1031 float64 HELPER(dfsub)(CPUHexagonState *env, float64 RssV, float64 RttV)
1032 {
1033     float64 RddV;
1034     arch_fpop_start(env);
1035     RddV = float64_sub(RssV, RttV, &env->fp_status);
1036     arch_fpop_end(env);
1037     return RddV;
1038 }
1039 
1040 float64 HELPER(dfmax)(CPUHexagonState *env, float64 RssV, float64 RttV)
1041 {
1042     float64 RddV;
1043     arch_fpop_start(env);
1044     RddV = float64_maxnum(RssV, RttV, &env->fp_status);
1045     if (float64_is_any_nan(RssV) || float64_is_any_nan(RttV)) {
1046         float_raise(float_flag_invalid, &env->fp_status);
1047     }
1048     arch_fpop_end(env);
1049     return RddV;
1050 }
1051 
1052 float64 HELPER(dfmin)(CPUHexagonState *env, float64 RssV, float64 RttV)
1053 {
1054     float64 RddV;
1055     arch_fpop_start(env);
1056     RddV = float64_minnum(RssV, RttV, &env->fp_status);
1057     if (float64_is_any_nan(RssV) || float64_is_any_nan(RttV)) {
1058         float_raise(float_flag_invalid, &env->fp_status);
1059     }
1060     arch_fpop_end(env);
1061     return RddV;
1062 }
1063 
1064 int32_t HELPER(dfcmpeq)(CPUHexagonState *env, float64 RssV, float64 RttV)
1065 {
1066     int32_t PdV;
1067     arch_fpop_start(env);
1068     PdV = f8BITSOF(float64_eq_quiet(RssV, RttV, &env->fp_status));
1069     arch_fpop_end(env);
1070     return PdV;
1071 }
1072 
1073 int32_t HELPER(dfcmpgt)(CPUHexagonState *env, float64 RssV, float64 RttV)
1074 {
1075     int cmp;
1076     int32_t PdV;
1077     arch_fpop_start(env);
1078     cmp = float64_compare_quiet(RssV, RttV, &env->fp_status);
1079     PdV = f8BITSOF(cmp == float_relation_greater);
1080     arch_fpop_end(env);
1081     return PdV;
1082 }
1083 
1084 int32_t HELPER(dfcmpge)(CPUHexagonState *env, float64 RssV, float64 RttV)
1085 {
1086     int cmp;
1087     int32_t PdV;
1088     arch_fpop_start(env);
1089     cmp = float64_compare_quiet(RssV, RttV, &env->fp_status);
1090     PdV = f8BITSOF(cmp == float_relation_greater ||
1091                    cmp == float_relation_equal);
1092     arch_fpop_end(env);
1093     return PdV;
1094 }
1095 
1096 int32_t HELPER(dfcmpuo)(CPUHexagonState *env, float64 RssV, float64 RttV)
1097 {
1098     int32_t PdV;
1099     arch_fpop_start(env);
1100     PdV = f8BITSOF(float64_is_any_nan(RssV) ||
1101                    float64_is_any_nan(RttV));
1102     arch_fpop_end(env);
1103     return PdV;
1104 }
1105 
1106 int32_t HELPER(dfclass)(CPUHexagonState *env, float64 RssV, int32_t uiV)
1107 {
1108     int32_t PdV = 0;
1109     arch_fpop_start(env);
1110     if (fGETBIT(0, uiV) && float64_is_zero(RssV)) {
1111         PdV = 0xff;
1112     }
1113     if (fGETBIT(1, uiV) && float64_is_normal(RssV)) {
1114         PdV = 0xff;
1115     }
1116     if (fGETBIT(2, uiV) && float64_is_denormal(RssV)) {
1117         PdV = 0xff;
1118     }
1119     if (fGETBIT(3, uiV) && float64_is_infinity(RssV)) {
1120         PdV = 0xff;
1121     }
1122     if (fGETBIT(4, uiV) && float64_is_any_nan(RssV)) {
1123         PdV = 0xff;
1124     }
1125     set_float_exception_flags(0, &env->fp_status);
1126     arch_fpop_end(env);
1127     return PdV;
1128 }
1129 
1130 float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
1131 {
1132     float32 RdV;
1133     arch_fpop_start(env);
1134     RdV = internal_mpyf(RsV, RtV, &env->fp_status);
1135     arch_fpop_end(env);
1136     return RdV;
1137 }
1138 
1139 float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
1140                       float32 RsV, float32 RtV)
1141 {
1142     arch_fpop_start(env);
1143     RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
1144     arch_fpop_end(env);
1145     return RxV;
1146 }
1147 
1148 static bool is_zero_prod(float32 a, float32 b)
1149 {
1150     return ((float32_is_zero(a) && is_finite(b)) ||
1151             (float32_is_zero(b) && is_finite(a)));
1152 }
1153 
1154 static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
1155 {
1156     float32 ret = dst;
1157     if (float32_is_any_nan(x)) {
1158         if (extract32(x, 22, 1) == 0) {
1159             float_raise(float_flag_invalid, fp_status);
1160         }
1161         ret = make_float32(0xffffffff);    /* nan */
1162     }
1163     return ret;
1164 }
1165 
1166 float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
1167                          float32 RsV, float32 RtV, float32 PuV)
1168 {
1169     size4s_t tmp;
1170     arch_fpop_start(env);
1171     RxV = check_nan(RxV, RxV, &env->fp_status);
1172     RxV = check_nan(RxV, RsV, &env->fp_status);
1173     RxV = check_nan(RxV, RtV, &env->fp_status);
1174     tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
1175     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1176         RxV = tmp;
1177     }
1178     arch_fpop_end(env);
1179     return RxV;
1180 }
1181 
1182 float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
1183                       float32 RsV, float32 RtV)
1184 {
1185     float32 neg_RsV;
1186     arch_fpop_start(env);
1187     neg_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
1188     RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
1189     arch_fpop_end(env);
1190     return RxV;
1191 }
1192 
1193 static bool is_inf_prod(int32_t a, int32_t b)
1194 {
1195     return (float32_is_infinity(a) && float32_is_infinity(b)) ||
1196            (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
1197            (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
1198 }
1199 
1200 float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
1201                           float32 RsV, float32 RtV)
1202 {
1203     bool infinp;
1204     bool infminusinf;
1205     float32 tmp;
1206 
1207     arch_fpop_start(env);
1208     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
1209     infminusinf = float32_is_infinity(RxV) &&
1210                   is_inf_prod(RsV, RtV) &&
1211                   (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
1212     infinp = float32_is_infinity(RxV) ||
1213              float32_is_infinity(RtV) ||
1214              float32_is_infinity(RsV);
1215     RxV = check_nan(RxV, RxV, &env->fp_status);
1216     RxV = check_nan(RxV, RsV, &env->fp_status);
1217     RxV = check_nan(RxV, RtV, &env->fp_status);
1218     tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
1219     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1220         RxV = tmp;
1221     }
1222     set_float_exception_flags(0, &env->fp_status);
1223     if (float32_is_infinity(RxV) && !infinp) {
1224         RxV = RxV - 1;
1225     }
1226     if (infminusinf) {
1227         RxV = 0;
1228     }
1229     arch_fpop_end(env);
1230     return RxV;
1231 }
1232 
1233 float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
1234                           float32 RsV, float32 RtV)
1235 {
1236     bool infinp;
1237     bool infminusinf;
1238     float32 tmp;
1239 
1240     arch_fpop_start(env);
1241     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
1242     infminusinf = float32_is_infinity(RxV) &&
1243                   is_inf_prod(RsV, RtV) &&
1244                   (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
1245     infinp = float32_is_infinity(RxV) ||
1246              float32_is_infinity(RtV) ||
1247              float32_is_infinity(RsV);
1248     RxV = check_nan(RxV, RxV, &env->fp_status);
1249     RxV = check_nan(RxV, RsV, &env->fp_status);
1250     RxV = check_nan(RxV, RtV, &env->fp_status);
1251     float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
1252     tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
1253     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1254         RxV = tmp;
1255     }
1256     set_float_exception_flags(0, &env->fp_status);
1257     if (float32_is_infinity(RxV) && !infinp) {
1258         RxV = RxV - 1;
1259     }
1260     if (infminusinf) {
1261         RxV = 0;
1262     }
1263     arch_fpop_end(env);
1264     return RxV;
1265 }
1266 
1267 float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
1268 {
1269     int64_t RddV;
1270     arch_fpop_start(env);
1271     if (float64_is_denormal(RssV) &&
1272         (float64_getexp(RttV) >= 512) &&
1273         float64_is_normal(RttV)) {
1274         RddV = float64_mul(RssV, make_float64(0x4330000000000000),
1275                            &env->fp_status);
1276     } else if (float64_is_denormal(RttV) &&
1277                (float64_getexp(RssV) >= 512) &&
1278                float64_is_normal(RssV)) {
1279         RddV = float64_mul(RssV, make_float64(0x3cb0000000000000),
1280                            &env->fp_status);
1281     } else {
1282         RddV = RssV;
1283     }
1284     arch_fpop_end(env);
1285     return RddV;
1286 }
1287 
1288 float64 HELPER(dfmpyhh)(CPUHexagonState *env, float64 RxxV,
1289                         float64 RssV, float64 RttV)
1290 {
1291     arch_fpop_start(env);
1292     RxxV = internal_mpyhh(RssV, RttV, RxxV, &env->fp_status);
1293     arch_fpop_end(env);
1294     return RxxV;
1295 }
1296 
1297 /* Histogram instructions */
1298 
1299 void HELPER(vhist)(CPUHexagonState *env)
1300 {
1301     MMVector *input = &env->tmp_VRegs[0];
1302 
1303     for (int lane = 0; lane < 8; lane++) {
1304         for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
1305             unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
1306             unsigned char regno = value >> 3;
1307             unsigned char element = value & 7;
1308 
1309             env->VRegs[regno].uh[(sizeof(MMVector) / 16) * lane + element]++;
1310         }
1311     }
1312 }
1313 
1314 void HELPER(vhistq)(CPUHexagonState *env)
1315 {
1316     MMVector *input = &env->tmp_VRegs[0];
1317 
1318     for (int lane = 0; lane < 8; lane++) {
1319         for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
1320             unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
1321             unsigned char regno = value >> 3;
1322             unsigned char element = value & 7;
1323 
1324             if (fGETQBIT(env->qtmp, sizeof(MMVector) / 8 * lane + i)) {
1325                 env->VRegs[regno].uh[
1326                     (sizeof(MMVector) / 16) * lane + element]++;
1327             }
1328         }
1329     }
1330 }
1331 
1332 void HELPER(vwhist256)(CPUHexagonState *env)
1333 {
1334     MMVector *input = &env->tmp_VRegs[0];
1335 
1336     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1337         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1338         unsigned int weight = fGETUBYTE(1, input->h[i]);
1339         unsigned int vindex = (bucket >> 3) & 0x1F;
1340         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1341 
1342         env->VRegs[vindex].uh[elindex] =
1343             env->VRegs[vindex].uh[elindex] + weight;
1344     }
1345 }
1346 
1347 void HELPER(vwhist256q)(CPUHexagonState *env)
1348 {
1349     MMVector *input = &env->tmp_VRegs[0];
1350 
1351     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1352         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1353         unsigned int weight = fGETUBYTE(1, input->h[i]);
1354         unsigned int vindex = (bucket >> 3) & 0x1F;
1355         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1356 
1357         if (fGETQBIT(env->qtmp, 2 * i)) {
1358             env->VRegs[vindex].uh[elindex] =
1359                 env->VRegs[vindex].uh[elindex] + weight;
1360         }
1361     }
1362 }
1363 
1364 void HELPER(vwhist256_sat)(CPUHexagonState *env)
1365 {
1366     MMVector *input = &env->tmp_VRegs[0];
1367 
1368     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1369         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1370         unsigned int weight = fGETUBYTE(1, input->h[i]);
1371         unsigned int vindex = (bucket >> 3) & 0x1F;
1372         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1373 
1374         env->VRegs[vindex].uh[elindex] =
1375             fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
1376     }
1377 }
1378 
1379 void HELPER(vwhist256q_sat)(CPUHexagonState *env)
1380 {
1381     MMVector *input = &env->tmp_VRegs[0];
1382 
1383     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1384         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1385         unsigned int weight = fGETUBYTE(1, input->h[i]);
1386         unsigned int vindex = (bucket >> 3) & 0x1F;
1387         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1388 
1389         if (fGETQBIT(env->qtmp, 2 * i)) {
1390             env->VRegs[vindex].uh[elindex] =
1391                 fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
1392         }
1393     }
1394 }
1395 
1396 void HELPER(vwhist128)(CPUHexagonState *env)
1397 {
1398     MMVector *input = &env->tmp_VRegs[0];
1399 
1400     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1401         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1402         unsigned int weight = fGETUBYTE(1, input->h[i]);
1403         unsigned int vindex = (bucket >> 3) & 0x1F;
1404         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1405 
1406         env->VRegs[vindex].uw[elindex] =
1407             env->VRegs[vindex].uw[elindex] + weight;
1408     }
1409 }
1410 
1411 void HELPER(vwhist128q)(CPUHexagonState *env)
1412 {
1413     MMVector *input = &env->tmp_VRegs[0];
1414 
1415     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1416         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1417         unsigned int weight = fGETUBYTE(1, input->h[i]);
1418         unsigned int vindex = (bucket >> 3) & 0x1F;
1419         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1420 
1421         if (fGETQBIT(env->qtmp, 2 * i)) {
1422             env->VRegs[vindex].uw[elindex] =
1423                 env->VRegs[vindex].uw[elindex] + weight;
1424         }
1425     }
1426 }
1427 
1428 void HELPER(vwhist128m)(CPUHexagonState *env, int32_t uiV)
1429 {
1430     MMVector *input = &env->tmp_VRegs[0];
1431 
1432     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1433         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1434         unsigned int weight = fGETUBYTE(1, input->h[i]);
1435         unsigned int vindex = (bucket >> 3) & 0x1F;
1436         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1437 
1438         if ((bucket & 1) == uiV) {
1439             env->VRegs[vindex].uw[elindex] =
1440                 env->VRegs[vindex].uw[elindex] + weight;
1441         }
1442     }
1443 }
1444 
1445 void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV)
1446 {
1447     MMVector *input = &env->tmp_VRegs[0];
1448 
1449     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1450         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1451         unsigned int weight = fGETUBYTE(1, input->h[i]);
1452         unsigned int vindex = (bucket >> 3) & 0x1F;
1453         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1454 
1455         if (((bucket & 1) == uiV) && fGETQBIT(env->qtmp, 2 * i)) {
1456             env->VRegs[vindex].uw[elindex] =
1457                 env->VRegs[vindex].uw[elindex] + weight;
1458         }
1459     }
1460 }
1461 
1462 static void cancel_slot(CPUHexagonState *env, uint32_t slot)
1463 {
1464     HEX_DEBUG_LOG("Slot %d cancelled\n", slot);
1465     env->slot_cancelled |= (1 << slot);
1466 }
1467 
1468 /* These macros can be referenced in the generated helper functions */
1469 #define warn(...) /* Nothing */
1470 #define fatal(...) g_assert_not_reached();
1471 
1472 #define BOGUS_HELPER(tag) \
1473     printf("ERROR: bogus helper: " #tag "\n")
1474 
1475 #include "helper_funcs_generated.c.inc"
1476