xref: /openbmc/qemu/target/hexagon/op_helper.c (revision 6c187695)
1 /*
2  *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "qemu/log.h"
20 #include "exec/exec-all.h"
21 #include "exec/cpu_ldst.h"
22 #include "exec/helper-proto.h"
23 #include "fpu/softfloat.h"
24 #include "cpu.h"
25 #include "internal.h"
26 #include "macros.h"
27 #include "arch.h"
28 #include "hex_arch_types.h"
29 #include "fma_emu.h"
30 #include "mmvec/mmvec.h"
31 #include "mmvec/macros.h"
32 
33 #define SF_BIAS        127
34 #define SF_MANTBITS    23
35 
36 /* Exceptions processing helpers */
37 static G_NORETURN
38 void do_raise_exception_err(CPUHexagonState *env,
39                             uint32_t exception,
40                             uintptr_t pc)
41 {
42     CPUState *cs = env_cpu(env);
43     qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception);
44     cs->exception_index = exception;
45     cpu_loop_exit_restore(cs, pc);
46 }
47 
48 G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp)
49 {
50     do_raise_exception_err(env, excp, 0);
51 }
52 
53 static void log_reg_write(CPUHexagonState *env, int rnum,
54                           target_ulong val, uint32_t slot)
55 {
56     HEX_DEBUG_LOG("log_reg_write[%d] = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")",
57                   rnum, val, val);
58     if (val == env->gpr[rnum]) {
59         HEX_DEBUG_LOG(" NO CHANGE");
60     }
61     HEX_DEBUG_LOG("\n");
62 
63     env->new_value[rnum] = val;
64     if (HEX_DEBUG) {
65         /* Do this so HELPER(debug_commit_end) will know */
66         env->reg_written[rnum] = 1;
67     }
68 }
69 
70 static void log_pred_write(CPUHexagonState *env, int pnum, target_ulong val)
71 {
72     HEX_DEBUG_LOG("log_pred_write[%d] = " TARGET_FMT_ld
73                   " (0x" TARGET_FMT_lx ")\n",
74                   pnum, val, val);
75 
76     /* Multiple writes to the same preg are and'ed together */
77     if (env->pred_written & (1 << pnum)) {
78         env->new_pred_value[pnum] &= val & 0xff;
79     } else {
80         env->new_pred_value[pnum] = val & 0xff;
81         env->pred_written |= 1 << pnum;
82     }
83 }
84 
85 static void log_store32(CPUHexagonState *env, target_ulong addr,
86                         target_ulong val, int width, int slot)
87 {
88     HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
89                   ", %" PRId32 " [0x08%" PRIx32 "])\n",
90                   width, addr, val, val);
91     env->mem_log_stores[slot].va = addr;
92     env->mem_log_stores[slot].width = width;
93     env->mem_log_stores[slot].data32 = val;
94 }
95 
96 static void log_store64(CPUHexagonState *env, target_ulong addr,
97                         int64_t val, int width, int slot)
98 {
99     HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
100                   ", %" PRId64 " [0x016%" PRIx64 "])\n",
101                    width, addr, val, val);
102     env->mem_log_stores[slot].va = addr;
103     env->mem_log_stores[slot].width = width;
104     env->mem_log_stores[slot].data64 = val;
105 }
106 
107 static void write_new_pc(CPUHexagonState *env, target_ulong addr)
108 {
109     HEX_DEBUG_LOG("write_new_pc(0x" TARGET_FMT_lx ")\n", addr);
110 
111     /*
112      * If more than one branch is taken in a packet, only the first one
113      * is actually done.
114      */
115     if (env->branch_taken) {
116         HEX_DEBUG_LOG("INFO: multiple branches taken in same packet, "
117                       "ignoring the second one\n");
118     } else {
119         fCHECK_PCALIGN(addr);
120         env->branch_taken = 1;
121         env->next_PC = addr;
122     }
123 }
124 
125 /* Handy place to set a breakpoint */
126 void HELPER(debug_start_packet)(CPUHexagonState *env)
127 {
128     HEX_DEBUG_LOG("Start packet: pc = 0x" TARGET_FMT_lx "\n",
129                   env->gpr[HEX_REG_PC]);
130 
131     for (int i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
132         env->reg_written[i] = 0;
133     }
134 }
135 
136 /* Checks for bookkeeping errors between disassembly context and runtime */
137 void HELPER(debug_check_store_width)(CPUHexagonState *env, int slot, int check)
138 {
139     if (env->mem_log_stores[slot].width != check) {
140         HEX_DEBUG_LOG("ERROR: %d != %d\n",
141                       env->mem_log_stores[slot].width, check);
142         g_assert_not_reached();
143     }
144 }
145 
146 void HELPER(commit_store)(CPUHexagonState *env, int slot_num)
147 {
148     uintptr_t ra = GETPC();
149     uint8_t width = env->mem_log_stores[slot_num].width;
150     target_ulong va = env->mem_log_stores[slot_num].va;
151 
152     switch (width) {
153     case 1:
154         cpu_stb_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
155         break;
156     case 2:
157         cpu_stw_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
158         break;
159     case 4:
160         cpu_stl_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
161         break;
162     case 8:
163         cpu_stq_data_ra(env, va, env->mem_log_stores[slot_num].data64, ra);
164         break;
165     default:
166         g_assert_not_reached();
167     }
168 }
169 
170 void HELPER(gather_store)(CPUHexagonState *env, uint32_t addr, int slot)
171 {
172     mem_gather_store(env, addr, slot);
173 }
174 
175 void HELPER(commit_hvx_stores)(CPUHexagonState *env)
176 {
177     uintptr_t ra = GETPC();
178     int i;
179 
180     /* Normal (possibly masked) vector store */
181     for (i = 0; i < VSTORES_MAX; i++) {
182         if (env->vstore_pending[i]) {
183             env->vstore_pending[i] = 0;
184             target_ulong va = env->vstore[i].va;
185             int size = env->vstore[i].size;
186             for (int j = 0; j < size; j++) {
187                 if (test_bit(j, env->vstore[i].mask)) {
188                     cpu_stb_data_ra(env, va + j, env->vstore[i].data.ub[j], ra);
189                 }
190             }
191         }
192     }
193 
194     /* Scatter store */
195     if (env->vtcm_pending) {
196         env->vtcm_pending = false;
197         if (env->vtcm_log.op) {
198             /* Need to perform the scatter read/modify/write at commit time */
199             if (env->vtcm_log.op_size == 2) {
200                 SCATTER_OP_WRITE_TO_MEM(uint16_t);
201             } else if (env->vtcm_log.op_size == 4) {
202                 /* Word Scatter += */
203                 SCATTER_OP_WRITE_TO_MEM(uint32_t);
204             } else {
205                 g_assert_not_reached();
206             }
207         } else {
208             for (i = 0; i < sizeof(MMVector); i++) {
209                 if (test_bit(i, env->vtcm_log.mask)) {
210                     cpu_stb_data_ra(env, env->vtcm_log.va[i],
211                                     env->vtcm_log.data.ub[i], ra);
212                     clear_bit(i, env->vtcm_log.mask);
213                     env->vtcm_log.data.ub[i] = 0;
214                 }
215 
216             }
217         }
218     }
219 }
220 
221 static void print_store(CPUHexagonState *env, int slot)
222 {
223     if (!(env->slot_cancelled & (1 << slot))) {
224         uint8_t width = env->mem_log_stores[slot].width;
225         if (width == 1) {
226             uint32_t data = env->mem_log_stores[slot].data32 & 0xff;
227             HEX_DEBUG_LOG("\tmemb[0x" TARGET_FMT_lx "] = %" PRId32
228                           " (0x%02" PRIx32 ")\n",
229                           env->mem_log_stores[slot].va, data, data);
230         } else if (width == 2) {
231             uint32_t data = env->mem_log_stores[slot].data32 & 0xffff;
232             HEX_DEBUG_LOG("\tmemh[0x" TARGET_FMT_lx "] = %" PRId32
233                           " (0x%04" PRIx32 ")\n",
234                           env->mem_log_stores[slot].va, data, data);
235         } else if (width == 4) {
236             uint32_t data = env->mem_log_stores[slot].data32;
237             HEX_DEBUG_LOG("\tmemw[0x" TARGET_FMT_lx "] = %" PRId32
238                           " (0x%08" PRIx32 ")\n",
239                           env->mem_log_stores[slot].va, data, data);
240         } else if (width == 8) {
241             HEX_DEBUG_LOG("\tmemd[0x" TARGET_FMT_lx "] = %" PRId64
242                           " (0x%016" PRIx64 ")\n",
243                           env->mem_log_stores[slot].va,
244                           env->mem_log_stores[slot].data64,
245                           env->mem_log_stores[slot].data64);
246         } else {
247             HEX_DEBUG_LOG("\tBad store width %d\n", width);
248             g_assert_not_reached();
249         }
250     }
251 }
252 
253 /* This function is a handy place to set a breakpoint */
254 void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1)
255 {
256     bool reg_printed = false;
257     bool pred_printed = false;
258     int i;
259 
260     HEX_DEBUG_LOG("Packet committed: pc = 0x" TARGET_FMT_lx "\n",
261                   env->this_PC);
262     HEX_DEBUG_LOG("slot_cancelled = %d\n", env->slot_cancelled);
263 
264     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
265         if (env->reg_written[i]) {
266             if (!reg_printed) {
267                 HEX_DEBUG_LOG("Regs written\n");
268                 reg_printed = true;
269             }
270             HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n",
271                           i, env->new_value[i], env->new_value[i]);
272         }
273     }
274 
275     for (i = 0; i < NUM_PREGS; i++) {
276         if (env->pred_written & (1 << i)) {
277             if (!pred_printed) {
278                 HEX_DEBUG_LOG("Predicates written\n");
279                 pred_printed = true;
280             }
281             HEX_DEBUG_LOG("\tp%d = 0x" TARGET_FMT_lx "\n",
282                           i, env->new_pred_value[i]);
283         }
284     }
285 
286     if (has_st0 || has_st1) {
287         HEX_DEBUG_LOG("Stores\n");
288         if (has_st0) {
289             print_store(env, 0);
290         }
291         if (has_st1) {
292             print_store(env, 1);
293         }
294     }
295 
296     HEX_DEBUG_LOG("Next PC = " TARGET_FMT_lx "\n", env->next_PC);
297     HEX_DEBUG_LOG("Exec counters: pkt = " TARGET_FMT_lx
298                   ", insn = " TARGET_FMT_lx
299                   ", hvx = " TARGET_FMT_lx "\n",
300                   env->gpr[HEX_REG_QEMU_PKT_CNT],
301                   env->gpr[HEX_REG_QEMU_INSN_CNT],
302                   env->gpr[HEX_REG_QEMU_HVX_CNT]);
303 
304 }
305 
306 int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS)
307 {
308     uint32_t K_const = extract32(M, 24, 4);
309     uint32_t length = extract32(M, 0, 17);
310     uint32_t new_ptr = RxV + offset;
311     uint32_t start_addr;
312     uint32_t end_addr;
313 
314     if (K_const == 0 && length >= 4) {
315         start_addr = CS;
316         end_addr = start_addr + length;
317     } else {
318         /*
319          * Versions v3 and earlier used the K value to specify a power-of-2 size
320          * 2^(K+2) that is greater than the buffer length
321          */
322         int32_t mask = (1 << (K_const + 2)) - 1;
323         start_addr = RxV & (~mask);
324         end_addr = start_addr | length;
325     }
326 
327     if (new_ptr >= end_addr) {
328         new_ptr -= length;
329     } else if (new_ptr < start_addr) {
330         new_ptr += length;
331     }
332 
333     return new_ptr;
334 }
335 
336 uint32_t HELPER(fbrev)(uint32_t addr)
337 {
338     /*
339      *  Bit reverse the low 16 bits of the address
340      */
341     return deposit32(addr, 0, 16, revbit16(addr));
342 }
343 
344 static float32 build_float32(uint8_t sign, uint32_t exp, uint32_t mant)
345 {
346     return make_float32(
347         ((sign & 1) << 31) |
348         ((exp & 0xff) << SF_MANTBITS) |
349         (mant & ((1 << SF_MANTBITS) - 1)));
350 }
351 
352 /*
353  * sfrecipa, sfinvsqrta have two 32-bit results
354  *     r0,p0=sfrecipa(r1,r2)
355  *     r0,p0=sfinvsqrta(r1)
356  *
357  * Since helpers can only return a single value, we pack the two results
358  * into a 64-bit value.
359  */
360 uint64_t HELPER(sfrecipa)(CPUHexagonState *env, float32 RsV, float32 RtV)
361 {
362     int32_t PeV = 0;
363     float32 RdV;
364     int idx;
365     int adjust;
366     int mant;
367     int exp;
368 
369     arch_fpop_start(env);
370     if (arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status)) {
371         PeV = adjust;
372         idx = (RtV >> 16) & 0x7f;
373         mant = (recip_lookup_table[idx] << 15) | 1;
374         exp = SF_BIAS - (float32_getexp(RtV) - SF_BIAS) - 1;
375         RdV = build_float32(extract32(RtV, 31, 1), exp, mant);
376     }
377     arch_fpop_end(env);
378     return ((uint64_t)RdV << 32) | PeV;
379 }
380 
381 uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV)
382 {
383     int PeV = 0;
384     float32 RdV;
385     int idx;
386     int adjust;
387     int mant;
388     int exp;
389 
390     arch_fpop_start(env);
391     if (arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status)) {
392         PeV = adjust;
393         idx = (RsV >> 17) & 0x7f;
394         mant = (invsqrt_lookup_table[idx] << 15);
395         exp = SF_BIAS - ((float32_getexp(RsV) - SF_BIAS) >> 1) - 1;
396         RdV = build_float32(extract32(RsV, 31, 1), exp, mant);
397     }
398     arch_fpop_end(env);
399     return ((uint64_t)RdV << 32) | PeV;
400 }
401 
402 int64_t HELPER(vacsh_val)(CPUHexagonState *env,
403                            int64_t RxxV, int64_t RssV, int64_t RttV)
404 {
405     for (int i = 0; i < 4; i++) {
406         int xv = sextract64(RxxV, i * 16, 16);
407         int sv = sextract64(RssV, i * 16, 16);
408         int tv = sextract64(RttV, i * 16, 16);
409         int max;
410         xv = xv + tv;
411         sv = sv - tv;
412         max = xv > sv ? xv : sv;
413         /* Note that fSATH can set the OVF bit in usr */
414         RxxV = deposit64(RxxV, i * 16, 16, fSATH(max));
415     }
416     return RxxV;
417 }
418 
419 int32_t HELPER(vacsh_pred)(CPUHexagonState *env,
420                            int64_t RxxV, int64_t RssV, int64_t RttV)
421 {
422     int32_t PeV = 0;
423     for (int i = 0; i < 4; i++) {
424         int xv = sextract64(RxxV, i * 16, 16);
425         int sv = sextract64(RssV, i * 16, 16);
426         int tv = sextract64(RttV, i * 16, 16);
427         xv = xv + tv;
428         sv = sv - tv;
429         PeV = deposit32(PeV, i * 2, 1, (xv > sv));
430         PeV = deposit32(PeV, i * 2 + 1, 1, (xv > sv));
431     }
432     return PeV;
433 }
434 
435 static void probe_store(CPUHexagonState *env, int slot, int mmu_idx)
436 {
437     if (!(env->slot_cancelled & (1 << slot))) {
438         size1u_t width = env->mem_log_stores[slot].width;
439         target_ulong va = env->mem_log_stores[slot].va;
440         uintptr_t ra = GETPC();
441         probe_write(env, va, width, mmu_idx, ra);
442     }
443 }
444 
445 /*
446  * Called from a mem_noshuf packet to make sure the load doesn't
447  * raise an exception
448  */
449 void HELPER(probe_noshuf_load)(CPUHexagonState *env, target_ulong va,
450                                int size, int mmu_idx)
451 {
452     uintptr_t retaddr = GETPC();
453     probe_read(env, va, size, mmu_idx, retaddr);
454 }
455 
456 /* Called during packet commit when there are two scalar stores */
457 void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int mmu_idx)
458 {
459     probe_store(env, 0, mmu_idx);
460 }
461 
462 void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx)
463 {
464     uintptr_t retaddr = GETPC();
465     int i;
466 
467     /* Normal (possibly masked) vector store */
468     for (i = 0; i < VSTORES_MAX; i++) {
469         if (env->vstore_pending[i]) {
470             target_ulong va = env->vstore[i].va;
471             int size = env->vstore[i].size;
472             for (int j = 0; j < size; j++) {
473                 if (test_bit(j, env->vstore[i].mask)) {
474                     probe_write(env, va + j, 1, mmu_idx, retaddr);
475                 }
476             }
477         }
478     }
479 
480     /* Scatter store */
481     if (env->vtcm_pending) {
482         if (env->vtcm_log.op) {
483             /* Need to perform the scatter read/modify/write at commit time */
484             if (env->vtcm_log.op_size == 2) {
485                 SCATTER_OP_PROBE_MEM(size2u_t, mmu_idx, retaddr);
486             } else if (env->vtcm_log.op_size == 4) {
487                 /* Word Scatter += */
488                 SCATTER_OP_PROBE_MEM(size4u_t, mmu_idx, retaddr);
489             } else {
490                 g_assert_not_reached();
491             }
492         } else {
493             for (int i = 0; i < sizeof(MMVector); i++) {
494                 if (test_bit(i, env->vtcm_log.mask)) {
495                     probe_write(env, env->vtcm_log.va[i], 1, mmu_idx, retaddr);
496                 }
497 
498             }
499         }
500     }
501 }
502 
503 void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask,
504                                          int mmu_idx)
505 {
506     bool has_st0        = (mask >> 0) & 1;
507     bool has_st1        = (mask >> 1) & 1;
508     bool has_hvx_stores = (mask >> 2) & 1;
509 
510     if (has_st0) {
511         probe_store(env, 0, mmu_idx);
512     }
513     if (has_st1) {
514         probe_store(env, 1, mmu_idx);
515     }
516     if (has_hvx_stores) {
517         HELPER(probe_hvx_stores)(env, mmu_idx);
518     }
519 }
520 
521 /*
522  * mem_noshuf
523  * Section 5.5 of the Hexagon V67 Programmer's Reference Manual
524  *
525  * If the load is in slot 0 and there is a store in slot1 (that
526  * wasn't cancelled), we have to do the store first.
527  */
528 static void check_noshuf(CPUHexagonState *env, uint32_t slot,
529                          target_ulong vaddr, int size)
530 {
531     if (slot == 0 && env->pkt_has_store_s1 &&
532         ((env->slot_cancelled & (1 << 1)) == 0)) {
533         HELPER(probe_noshuf_load)(env, vaddr, size, MMU_USER_IDX);
534         HELPER(commit_store)(env, 1);
535     }
536 }
537 
538 static uint8_t mem_load1(CPUHexagonState *env, uint32_t slot,
539                          target_ulong vaddr)
540 {
541     uintptr_t ra = GETPC();
542     check_noshuf(env, slot, vaddr, 1);
543     return cpu_ldub_data_ra(env, vaddr, ra);
544 }
545 
546 static uint16_t mem_load2(CPUHexagonState *env, uint32_t slot,
547                           target_ulong vaddr)
548 {
549     uintptr_t ra = GETPC();
550     check_noshuf(env, slot, vaddr, 2);
551     return cpu_lduw_data_ra(env, vaddr, ra);
552 }
553 
554 static uint32_t mem_load4(CPUHexagonState *env, uint32_t slot,
555                           target_ulong vaddr)
556 {
557     uintptr_t ra = GETPC();
558     check_noshuf(env, slot, vaddr, 4);
559     return cpu_ldl_data_ra(env, vaddr, ra);
560 }
561 
562 static uint64_t mem_load8(CPUHexagonState *env, uint32_t slot,
563                           target_ulong vaddr)
564 {
565     uintptr_t ra = GETPC();
566     check_noshuf(env, slot, vaddr, 8);
567     return cpu_ldq_data_ra(env, vaddr, ra);
568 }
569 
570 /* Floating point */
571 float64 HELPER(conv_sf2df)(CPUHexagonState *env, float32 RsV)
572 {
573     float64 out_f64;
574     arch_fpop_start(env);
575     out_f64 = float32_to_float64(RsV, &env->fp_status);
576     arch_fpop_end(env);
577     return out_f64;
578 }
579 
580 float32 HELPER(conv_df2sf)(CPUHexagonState *env, float64 RssV)
581 {
582     float32 out_f32;
583     arch_fpop_start(env);
584     out_f32 = float64_to_float32(RssV, &env->fp_status);
585     arch_fpop_end(env);
586     return out_f32;
587 }
588 
589 float32 HELPER(conv_uw2sf)(CPUHexagonState *env, int32_t RsV)
590 {
591     float32 RdV;
592     arch_fpop_start(env);
593     RdV = uint32_to_float32(RsV, &env->fp_status);
594     arch_fpop_end(env);
595     return RdV;
596 }
597 
598 float64 HELPER(conv_uw2df)(CPUHexagonState *env, int32_t RsV)
599 {
600     float64 RddV;
601     arch_fpop_start(env);
602     RddV = uint32_to_float64(RsV, &env->fp_status);
603     arch_fpop_end(env);
604     return RddV;
605 }
606 
607 float32 HELPER(conv_w2sf)(CPUHexagonState *env, int32_t RsV)
608 {
609     float32 RdV;
610     arch_fpop_start(env);
611     RdV = int32_to_float32(RsV, &env->fp_status);
612     arch_fpop_end(env);
613     return RdV;
614 }
615 
616 float64 HELPER(conv_w2df)(CPUHexagonState *env, int32_t RsV)
617 {
618     float64 RddV;
619     arch_fpop_start(env);
620     RddV = int32_to_float64(RsV, &env->fp_status);
621     arch_fpop_end(env);
622     return RddV;
623 }
624 
625 float32 HELPER(conv_ud2sf)(CPUHexagonState *env, int64_t RssV)
626 {
627     float32 RdV;
628     arch_fpop_start(env);
629     RdV = uint64_to_float32(RssV, &env->fp_status);
630     arch_fpop_end(env);
631     return RdV;
632 }
633 
634 float64 HELPER(conv_ud2df)(CPUHexagonState *env, int64_t RssV)
635 {
636     float64 RddV;
637     arch_fpop_start(env);
638     RddV = uint64_to_float64(RssV, &env->fp_status);
639     arch_fpop_end(env);
640     return RddV;
641 }
642 
643 float32 HELPER(conv_d2sf)(CPUHexagonState *env, int64_t RssV)
644 {
645     float32 RdV;
646     arch_fpop_start(env);
647     RdV = int64_to_float32(RssV, &env->fp_status);
648     arch_fpop_end(env);
649     return RdV;
650 }
651 
652 float64 HELPER(conv_d2df)(CPUHexagonState *env, int64_t RssV)
653 {
654     float64 RddV;
655     arch_fpop_start(env);
656     RddV = int64_to_float64(RssV, &env->fp_status);
657     arch_fpop_end(env);
658     return RddV;
659 }
660 
661 uint32_t HELPER(conv_sf2uw)(CPUHexagonState *env, float32 RsV)
662 {
663     uint32_t RdV;
664     arch_fpop_start(env);
665     /* Hexagon checks the sign before rounding */
666     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
667         float_raise(float_flag_invalid, &env->fp_status);
668         RdV = 0;
669     } else {
670         RdV = float32_to_uint32(RsV, &env->fp_status);
671     }
672     arch_fpop_end(env);
673     return RdV;
674 }
675 
676 int32_t HELPER(conv_sf2w)(CPUHexagonState *env, float32 RsV)
677 {
678     int32_t RdV;
679     arch_fpop_start(env);
680     /* Hexagon returns -1 for NaN */
681     if (float32_is_any_nan(RsV)) {
682         float_raise(float_flag_invalid, &env->fp_status);
683         RdV = -1;
684     } else {
685         RdV = float32_to_int32(RsV, &env->fp_status);
686     }
687     arch_fpop_end(env);
688     return RdV;
689 }
690 
691 uint64_t HELPER(conv_sf2ud)(CPUHexagonState *env, float32 RsV)
692 {
693     uint64_t RddV;
694     arch_fpop_start(env);
695     /* Hexagon checks the sign before rounding */
696     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
697         float_raise(float_flag_invalid, &env->fp_status);
698         RddV = 0;
699     } else {
700         RddV = float32_to_uint64(RsV, &env->fp_status);
701     }
702     arch_fpop_end(env);
703     return RddV;
704 }
705 
706 int64_t HELPER(conv_sf2d)(CPUHexagonState *env, float32 RsV)
707 {
708     int64_t RddV;
709     arch_fpop_start(env);
710     /* Hexagon returns -1 for NaN */
711     if (float32_is_any_nan(RsV)) {
712         float_raise(float_flag_invalid, &env->fp_status);
713         RddV = -1;
714     } else {
715         RddV = float32_to_int64(RsV, &env->fp_status);
716     }
717     arch_fpop_end(env);
718     return RddV;
719 }
720 
721 uint32_t HELPER(conv_df2uw)(CPUHexagonState *env, float64 RssV)
722 {
723     uint32_t RdV;
724     arch_fpop_start(env);
725     /* Hexagon checks the sign before rounding */
726     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
727         float_raise(float_flag_invalid, &env->fp_status);
728         RdV = 0;
729     } else {
730         RdV = float64_to_uint32(RssV, &env->fp_status);
731     }
732     arch_fpop_end(env);
733     return RdV;
734 }
735 
736 int32_t HELPER(conv_df2w)(CPUHexagonState *env, float64 RssV)
737 {
738     int32_t RdV;
739     arch_fpop_start(env);
740     /* Hexagon returns -1 for NaN */
741     if (float64_is_any_nan(RssV)) {
742         float_raise(float_flag_invalid, &env->fp_status);
743         RdV = -1;
744     } else {
745         RdV = float64_to_int32(RssV, &env->fp_status);
746     }
747     arch_fpop_end(env);
748     return RdV;
749 }
750 
751 uint64_t HELPER(conv_df2ud)(CPUHexagonState *env, float64 RssV)
752 {
753     uint64_t RddV;
754     arch_fpop_start(env);
755     /* Hexagon checks the sign before rounding */
756     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
757         float_raise(float_flag_invalid, &env->fp_status);
758         RddV = 0;
759     } else {
760         RddV = float64_to_uint64(RssV, &env->fp_status);
761     }
762     arch_fpop_end(env);
763     return RddV;
764 }
765 
766 int64_t HELPER(conv_df2d)(CPUHexagonState *env, float64 RssV)
767 {
768     int64_t RddV;
769     arch_fpop_start(env);
770     /* Hexagon returns -1 for NaN */
771     if (float64_is_any_nan(RssV)) {
772         float_raise(float_flag_invalid, &env->fp_status);
773         RddV = -1;
774     } else {
775         RddV = float64_to_int64(RssV, &env->fp_status);
776     }
777     arch_fpop_end(env);
778     return RddV;
779 }
780 
781 uint32_t HELPER(conv_sf2uw_chop)(CPUHexagonState *env, float32 RsV)
782 {
783     uint32_t RdV;
784     arch_fpop_start(env);
785     /* Hexagon checks the sign before rounding */
786     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
787         float_raise(float_flag_invalid, &env->fp_status);
788         RdV = 0;
789     } else {
790         RdV = float32_to_uint32_round_to_zero(RsV, &env->fp_status);
791     }
792     arch_fpop_end(env);
793     return RdV;
794 }
795 
796 int32_t HELPER(conv_sf2w_chop)(CPUHexagonState *env, float32 RsV)
797 {
798     int32_t RdV;
799     arch_fpop_start(env);
800     /* Hexagon returns -1 for NaN */
801     if (float32_is_any_nan(RsV)) {
802         float_raise(float_flag_invalid, &env->fp_status);
803         RdV = -1;
804     } else {
805         RdV = float32_to_int32_round_to_zero(RsV, &env->fp_status);
806     }
807     arch_fpop_end(env);
808     return RdV;
809 }
810 
811 uint64_t HELPER(conv_sf2ud_chop)(CPUHexagonState *env, float32 RsV)
812 {
813     uint64_t RddV;
814     arch_fpop_start(env);
815     /* Hexagon checks the sign before rounding */
816     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
817         float_raise(float_flag_invalid, &env->fp_status);
818         RddV = 0;
819     } else {
820         RddV = float32_to_uint64_round_to_zero(RsV, &env->fp_status);
821     }
822     arch_fpop_end(env);
823     return RddV;
824 }
825 
826 int64_t HELPER(conv_sf2d_chop)(CPUHexagonState *env, float32 RsV)
827 {
828     int64_t RddV;
829     arch_fpop_start(env);
830     /* Hexagon returns -1 for NaN */
831     if (float32_is_any_nan(RsV)) {
832         float_raise(float_flag_invalid, &env->fp_status);
833         RddV = -1;
834     } else {
835         RddV = float32_to_int64_round_to_zero(RsV, &env->fp_status);
836     }
837     arch_fpop_end(env);
838     return RddV;
839 }
840 
841 uint32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV)
842 {
843     uint32_t RdV;
844     arch_fpop_start(env);
845     /* Hexagon checks the sign before rounding */
846     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
847         float_raise(float_flag_invalid, &env->fp_status);
848         RdV = 0;
849     } else {
850         RdV = float64_to_uint32_round_to_zero(RssV, &env->fp_status);
851     }
852     arch_fpop_end(env);
853     return RdV;
854 }
855 
856 int32_t HELPER(conv_df2w_chop)(CPUHexagonState *env, float64 RssV)
857 {
858     int32_t RdV;
859     arch_fpop_start(env);
860     /* Hexagon returns -1 for NaN */
861     if (float64_is_any_nan(RssV)) {
862         float_raise(float_flag_invalid, &env->fp_status);
863         RdV = -1;
864     } else {
865         RdV = float64_to_int32_round_to_zero(RssV, &env->fp_status);
866     }
867     arch_fpop_end(env);
868     return RdV;
869 }
870 
871 uint64_t HELPER(conv_df2ud_chop)(CPUHexagonState *env, float64 RssV)
872 {
873     uint64_t RddV;
874     arch_fpop_start(env);
875     /* Hexagon checks the sign before rounding */
876     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
877         float_raise(float_flag_invalid, &env->fp_status);
878         RddV = 0;
879     } else {
880         RddV = float64_to_uint64_round_to_zero(RssV, &env->fp_status);
881     }
882     arch_fpop_end(env);
883     return RddV;
884 }
885 
886 int64_t HELPER(conv_df2d_chop)(CPUHexagonState *env, float64 RssV)
887 {
888     int64_t RddV;
889     arch_fpop_start(env);
890     /* Hexagon returns -1 for NaN */
891     if (float64_is_any_nan(RssV)) {
892         float_raise(float_flag_invalid, &env->fp_status);
893         RddV = -1;
894     } else {
895         RddV = float64_to_int64_round_to_zero(RssV, &env->fp_status);
896     }
897     arch_fpop_end(env);
898     return RddV;
899 }
900 
901 float32 HELPER(sfadd)(CPUHexagonState *env, float32 RsV, float32 RtV)
902 {
903     float32 RdV;
904     arch_fpop_start(env);
905     RdV = float32_add(RsV, RtV, &env->fp_status);
906     arch_fpop_end(env);
907     return RdV;
908 }
909 
910 float32 HELPER(sfsub)(CPUHexagonState *env, float32 RsV, float32 RtV)
911 {
912     float32 RdV;
913     arch_fpop_start(env);
914     RdV = float32_sub(RsV, RtV, &env->fp_status);
915     arch_fpop_end(env);
916     return RdV;
917 }
918 
919 int32_t HELPER(sfcmpeq)(CPUHexagonState *env, float32 RsV, float32 RtV)
920 {
921     int32_t PdV;
922     arch_fpop_start(env);
923     PdV = f8BITSOF(float32_eq_quiet(RsV, RtV, &env->fp_status));
924     arch_fpop_end(env);
925     return PdV;
926 }
927 
928 int32_t HELPER(sfcmpgt)(CPUHexagonState *env, float32 RsV, float32 RtV)
929 {
930     int cmp;
931     int32_t PdV;
932     arch_fpop_start(env);
933     cmp = float32_compare_quiet(RsV, RtV, &env->fp_status);
934     PdV = f8BITSOF(cmp == float_relation_greater);
935     arch_fpop_end(env);
936     return PdV;
937 }
938 
939 int32_t HELPER(sfcmpge)(CPUHexagonState *env, float32 RsV, float32 RtV)
940 {
941     int cmp;
942     int32_t PdV;
943     arch_fpop_start(env);
944     cmp = float32_compare_quiet(RsV, RtV, &env->fp_status);
945     PdV = f8BITSOF(cmp == float_relation_greater ||
946                    cmp == float_relation_equal);
947     arch_fpop_end(env);
948     return PdV;
949 }
950 
951 int32_t HELPER(sfcmpuo)(CPUHexagonState *env, float32 RsV, float32 RtV)
952 {
953     int32_t PdV;
954     arch_fpop_start(env);
955     PdV = f8BITSOF(float32_unordered_quiet(RsV, RtV, &env->fp_status));
956     arch_fpop_end(env);
957     return PdV;
958 }
959 
960 float32 HELPER(sfmax)(CPUHexagonState *env, float32 RsV, float32 RtV)
961 {
962     float32 RdV;
963     arch_fpop_start(env);
964     RdV = float32_maximum_number(RsV, RtV, &env->fp_status);
965     arch_fpop_end(env);
966     return RdV;
967 }
968 
969 float32 HELPER(sfmin)(CPUHexagonState *env, float32 RsV, float32 RtV)
970 {
971     float32 RdV;
972     arch_fpop_start(env);
973     RdV = float32_minimum_number(RsV, RtV, &env->fp_status);
974     arch_fpop_end(env);
975     return RdV;
976 }
977 
978 int32_t HELPER(sfclass)(CPUHexagonState *env, float32 RsV, int32_t uiV)
979 {
980     int32_t PdV = 0;
981     arch_fpop_start(env);
982     if (fGETBIT(0, uiV) && float32_is_zero(RsV)) {
983         PdV = 0xff;
984     }
985     if (fGETBIT(1, uiV) && float32_is_normal(RsV)) {
986         PdV = 0xff;
987     }
988     if (fGETBIT(2, uiV) && float32_is_denormal(RsV)) {
989         PdV = 0xff;
990     }
991     if (fGETBIT(3, uiV) && float32_is_infinity(RsV)) {
992         PdV = 0xff;
993     }
994     if (fGETBIT(4, uiV) && float32_is_any_nan(RsV)) {
995         PdV = 0xff;
996     }
997     set_float_exception_flags(0, &env->fp_status);
998     arch_fpop_end(env);
999     return PdV;
1000 }
1001 
1002 float32 HELPER(sffixupn)(CPUHexagonState *env, float32 RsV, float32 RtV)
1003 {
1004     float32 RdV = 0;
1005     int adjust;
1006     arch_fpop_start(env);
1007     arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status);
1008     RdV = RsV;
1009     arch_fpop_end(env);
1010     return RdV;
1011 }
1012 
1013 float32 HELPER(sffixupd)(CPUHexagonState *env, float32 RsV, float32 RtV)
1014 {
1015     float32 RdV = 0;
1016     int adjust;
1017     arch_fpop_start(env);
1018     arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status);
1019     RdV = RtV;
1020     arch_fpop_end(env);
1021     return RdV;
1022 }
1023 
1024 float32 HELPER(sffixupr)(CPUHexagonState *env, float32 RsV)
1025 {
1026     float32 RdV = 0;
1027     int adjust;
1028     arch_fpop_start(env);
1029     arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status);
1030     RdV = RsV;
1031     arch_fpop_end(env);
1032     return RdV;
1033 }
1034 
1035 float64 HELPER(dfadd)(CPUHexagonState *env, float64 RssV, float64 RttV)
1036 {
1037     float64 RddV;
1038     arch_fpop_start(env);
1039     RddV = float64_add(RssV, RttV, &env->fp_status);
1040     arch_fpop_end(env);
1041     return RddV;
1042 }
1043 
1044 float64 HELPER(dfsub)(CPUHexagonState *env, float64 RssV, float64 RttV)
1045 {
1046     float64 RddV;
1047     arch_fpop_start(env);
1048     RddV = float64_sub(RssV, RttV, &env->fp_status);
1049     arch_fpop_end(env);
1050     return RddV;
1051 }
1052 
1053 float64 HELPER(dfmax)(CPUHexagonState *env, float64 RssV, float64 RttV)
1054 {
1055     float64 RddV;
1056     arch_fpop_start(env);
1057     RddV = float64_maximum_number(RssV, RttV, &env->fp_status);
1058     arch_fpop_end(env);
1059     return RddV;
1060 }
1061 
1062 float64 HELPER(dfmin)(CPUHexagonState *env, float64 RssV, float64 RttV)
1063 {
1064     float64 RddV;
1065     arch_fpop_start(env);
1066     RddV = float64_minimum_number(RssV, RttV, &env->fp_status);
1067     arch_fpop_end(env);
1068     return RddV;
1069 }
1070 
1071 int32_t HELPER(dfcmpeq)(CPUHexagonState *env, float64 RssV, float64 RttV)
1072 {
1073     int32_t PdV;
1074     arch_fpop_start(env);
1075     PdV = f8BITSOF(float64_eq_quiet(RssV, RttV, &env->fp_status));
1076     arch_fpop_end(env);
1077     return PdV;
1078 }
1079 
1080 int32_t HELPER(dfcmpgt)(CPUHexagonState *env, float64 RssV, float64 RttV)
1081 {
1082     int cmp;
1083     int32_t PdV;
1084     arch_fpop_start(env);
1085     cmp = float64_compare_quiet(RssV, RttV, &env->fp_status);
1086     PdV = f8BITSOF(cmp == float_relation_greater);
1087     arch_fpop_end(env);
1088     return PdV;
1089 }
1090 
1091 int32_t HELPER(dfcmpge)(CPUHexagonState *env, float64 RssV, float64 RttV)
1092 {
1093     int cmp;
1094     int32_t PdV;
1095     arch_fpop_start(env);
1096     cmp = float64_compare_quiet(RssV, RttV, &env->fp_status);
1097     PdV = f8BITSOF(cmp == float_relation_greater ||
1098                    cmp == float_relation_equal);
1099     arch_fpop_end(env);
1100     return PdV;
1101 }
1102 
1103 int32_t HELPER(dfcmpuo)(CPUHexagonState *env, float64 RssV, float64 RttV)
1104 {
1105     int32_t PdV;
1106     arch_fpop_start(env);
1107     PdV = f8BITSOF(float64_unordered_quiet(RssV, RttV, &env->fp_status));
1108     arch_fpop_end(env);
1109     return PdV;
1110 }
1111 
1112 int32_t HELPER(dfclass)(CPUHexagonState *env, float64 RssV, int32_t uiV)
1113 {
1114     int32_t PdV = 0;
1115     arch_fpop_start(env);
1116     if (fGETBIT(0, uiV) && float64_is_zero(RssV)) {
1117         PdV = 0xff;
1118     }
1119     if (fGETBIT(1, uiV) && float64_is_normal(RssV)) {
1120         PdV = 0xff;
1121     }
1122     if (fGETBIT(2, uiV) && float64_is_denormal(RssV)) {
1123         PdV = 0xff;
1124     }
1125     if (fGETBIT(3, uiV) && float64_is_infinity(RssV)) {
1126         PdV = 0xff;
1127     }
1128     if (fGETBIT(4, uiV) && float64_is_any_nan(RssV)) {
1129         PdV = 0xff;
1130     }
1131     set_float_exception_flags(0, &env->fp_status);
1132     arch_fpop_end(env);
1133     return PdV;
1134 }
1135 
1136 float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
1137 {
1138     float32 RdV;
1139     arch_fpop_start(env);
1140     RdV = internal_mpyf(RsV, RtV, &env->fp_status);
1141     arch_fpop_end(env);
1142     return RdV;
1143 }
1144 
1145 float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
1146                       float32 RsV, float32 RtV)
1147 {
1148     arch_fpop_start(env);
1149     RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
1150     arch_fpop_end(env);
1151     return RxV;
1152 }
1153 
1154 static bool is_zero_prod(float32 a, float32 b)
1155 {
1156     return ((float32_is_zero(a) && is_finite(b)) ||
1157             (float32_is_zero(b) && is_finite(a)));
1158 }
1159 
1160 static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
1161 {
1162     float32 ret = dst;
1163     if (float32_is_any_nan(x)) {
1164         if (extract32(x, 22, 1) == 0) {
1165             float_raise(float_flag_invalid, fp_status);
1166         }
1167         ret = make_float32(0xffffffff);    /* nan */
1168     }
1169     return ret;
1170 }
1171 
1172 float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
1173                          float32 RsV, float32 RtV, float32 PuV)
1174 {
1175     size4s_t tmp;
1176     arch_fpop_start(env);
1177     RxV = check_nan(RxV, RxV, &env->fp_status);
1178     RxV = check_nan(RxV, RsV, &env->fp_status);
1179     RxV = check_nan(RxV, RtV, &env->fp_status);
1180     tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
1181     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1182         RxV = tmp;
1183     }
1184     arch_fpop_end(env);
1185     return RxV;
1186 }
1187 
1188 float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
1189                       float32 RsV, float32 RtV)
1190 {
1191     float32 neg_RsV;
1192     arch_fpop_start(env);
1193     neg_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
1194     RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
1195     arch_fpop_end(env);
1196     return RxV;
1197 }
1198 
1199 static bool is_inf_prod(int32_t a, int32_t b)
1200 {
1201     return (float32_is_infinity(a) && float32_is_infinity(b)) ||
1202            (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
1203            (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
1204 }
1205 
1206 float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
1207                           float32 RsV, float32 RtV)
1208 {
1209     bool infinp;
1210     bool infminusinf;
1211     float32 tmp;
1212 
1213     arch_fpop_start(env);
1214     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
1215     infminusinf = float32_is_infinity(RxV) &&
1216                   is_inf_prod(RsV, RtV) &&
1217                   (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
1218     infinp = float32_is_infinity(RxV) ||
1219              float32_is_infinity(RtV) ||
1220              float32_is_infinity(RsV);
1221     RxV = check_nan(RxV, RxV, &env->fp_status);
1222     RxV = check_nan(RxV, RsV, &env->fp_status);
1223     RxV = check_nan(RxV, RtV, &env->fp_status);
1224     tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
1225     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1226         RxV = tmp;
1227     }
1228     set_float_exception_flags(0, &env->fp_status);
1229     if (float32_is_infinity(RxV) && !infinp) {
1230         RxV = RxV - 1;
1231     }
1232     if (infminusinf) {
1233         RxV = 0;
1234     }
1235     arch_fpop_end(env);
1236     return RxV;
1237 }
1238 
1239 float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
1240                           float32 RsV, float32 RtV)
1241 {
1242     bool infinp;
1243     bool infminusinf;
1244     float32 tmp;
1245 
1246     arch_fpop_start(env);
1247     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
1248     infminusinf = float32_is_infinity(RxV) &&
1249                   is_inf_prod(RsV, RtV) &&
1250                   (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
1251     infinp = float32_is_infinity(RxV) ||
1252              float32_is_infinity(RtV) ||
1253              float32_is_infinity(RsV);
1254     RxV = check_nan(RxV, RxV, &env->fp_status);
1255     RxV = check_nan(RxV, RsV, &env->fp_status);
1256     RxV = check_nan(RxV, RtV, &env->fp_status);
1257     float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
1258     tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
1259     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1260         RxV = tmp;
1261     }
1262     set_float_exception_flags(0, &env->fp_status);
1263     if (float32_is_infinity(RxV) && !infinp) {
1264         RxV = RxV - 1;
1265     }
1266     if (infminusinf) {
1267         RxV = 0;
1268     }
1269     arch_fpop_end(env);
1270     return RxV;
1271 }
1272 
1273 float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
1274 {
1275     int64_t RddV;
1276     arch_fpop_start(env);
1277     if (float64_is_denormal(RssV) &&
1278         (float64_getexp(RttV) >= 512) &&
1279         float64_is_normal(RttV)) {
1280         RddV = float64_mul(RssV, make_float64(0x4330000000000000),
1281                            &env->fp_status);
1282     } else if (float64_is_denormal(RttV) &&
1283                (float64_getexp(RssV) >= 512) &&
1284                float64_is_normal(RssV)) {
1285         RddV = float64_mul(RssV, make_float64(0x3cb0000000000000),
1286                            &env->fp_status);
1287     } else {
1288         RddV = RssV;
1289     }
1290     arch_fpop_end(env);
1291     return RddV;
1292 }
1293 
1294 float64 HELPER(dfmpyhh)(CPUHexagonState *env, float64 RxxV,
1295                         float64 RssV, float64 RttV)
1296 {
1297     arch_fpop_start(env);
1298     RxxV = internal_mpyhh(RssV, RttV, RxxV, &env->fp_status);
1299     arch_fpop_end(env);
1300     return RxxV;
1301 }
1302 
1303 /* Histogram instructions */
1304 
1305 void HELPER(vhist)(CPUHexagonState *env)
1306 {
1307     MMVector *input = &env->tmp_VRegs[0];
1308 
1309     for (int lane = 0; lane < 8; lane++) {
1310         for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
1311             unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
1312             unsigned char regno = value >> 3;
1313             unsigned char element = value & 7;
1314 
1315             env->VRegs[regno].uh[(sizeof(MMVector) / 16) * lane + element]++;
1316         }
1317     }
1318 }
1319 
1320 void HELPER(vhistq)(CPUHexagonState *env)
1321 {
1322     MMVector *input = &env->tmp_VRegs[0];
1323 
1324     for (int lane = 0; lane < 8; lane++) {
1325         for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
1326             unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
1327             unsigned char regno = value >> 3;
1328             unsigned char element = value & 7;
1329 
1330             if (fGETQBIT(env->qtmp, sizeof(MMVector) / 8 * lane + i)) {
1331                 env->VRegs[regno].uh[
1332                     (sizeof(MMVector) / 16) * lane + element]++;
1333             }
1334         }
1335     }
1336 }
1337 
1338 void HELPER(vwhist256)(CPUHexagonState *env)
1339 {
1340     MMVector *input = &env->tmp_VRegs[0];
1341 
1342     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1343         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1344         unsigned int weight = fGETUBYTE(1, input->h[i]);
1345         unsigned int vindex = (bucket >> 3) & 0x1F;
1346         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1347 
1348         env->VRegs[vindex].uh[elindex] =
1349             env->VRegs[vindex].uh[elindex] + weight;
1350     }
1351 }
1352 
1353 void HELPER(vwhist256q)(CPUHexagonState *env)
1354 {
1355     MMVector *input = &env->tmp_VRegs[0];
1356 
1357     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1358         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1359         unsigned int weight = fGETUBYTE(1, input->h[i]);
1360         unsigned int vindex = (bucket >> 3) & 0x1F;
1361         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1362 
1363         if (fGETQBIT(env->qtmp, 2 * i)) {
1364             env->VRegs[vindex].uh[elindex] =
1365                 env->VRegs[vindex].uh[elindex] + weight;
1366         }
1367     }
1368 }
1369 
1370 void HELPER(vwhist256_sat)(CPUHexagonState *env)
1371 {
1372     MMVector *input = &env->tmp_VRegs[0];
1373 
1374     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1375         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1376         unsigned int weight = fGETUBYTE(1, input->h[i]);
1377         unsigned int vindex = (bucket >> 3) & 0x1F;
1378         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1379 
1380         env->VRegs[vindex].uh[elindex] =
1381             fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
1382     }
1383 }
1384 
1385 void HELPER(vwhist256q_sat)(CPUHexagonState *env)
1386 {
1387     MMVector *input = &env->tmp_VRegs[0];
1388 
1389     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1390         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1391         unsigned int weight = fGETUBYTE(1, input->h[i]);
1392         unsigned int vindex = (bucket >> 3) & 0x1F;
1393         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1394 
1395         if (fGETQBIT(env->qtmp, 2 * i)) {
1396             env->VRegs[vindex].uh[elindex] =
1397                 fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
1398         }
1399     }
1400 }
1401 
1402 void HELPER(vwhist128)(CPUHexagonState *env)
1403 {
1404     MMVector *input = &env->tmp_VRegs[0];
1405 
1406     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1407         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1408         unsigned int weight = fGETUBYTE(1, input->h[i]);
1409         unsigned int vindex = (bucket >> 3) & 0x1F;
1410         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1411 
1412         env->VRegs[vindex].uw[elindex] =
1413             env->VRegs[vindex].uw[elindex] + weight;
1414     }
1415 }
1416 
1417 void HELPER(vwhist128q)(CPUHexagonState *env)
1418 {
1419     MMVector *input = &env->tmp_VRegs[0];
1420 
1421     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1422         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1423         unsigned int weight = fGETUBYTE(1, input->h[i]);
1424         unsigned int vindex = (bucket >> 3) & 0x1F;
1425         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1426 
1427         if (fGETQBIT(env->qtmp, 2 * i)) {
1428             env->VRegs[vindex].uw[elindex] =
1429                 env->VRegs[vindex].uw[elindex] + weight;
1430         }
1431     }
1432 }
1433 
1434 void HELPER(vwhist128m)(CPUHexagonState *env, int32_t uiV)
1435 {
1436     MMVector *input = &env->tmp_VRegs[0];
1437 
1438     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1439         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1440         unsigned int weight = fGETUBYTE(1, input->h[i]);
1441         unsigned int vindex = (bucket >> 3) & 0x1F;
1442         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1443 
1444         if ((bucket & 1) == uiV) {
1445             env->VRegs[vindex].uw[elindex] =
1446                 env->VRegs[vindex].uw[elindex] + weight;
1447         }
1448     }
1449 }
1450 
1451 void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV)
1452 {
1453     MMVector *input = &env->tmp_VRegs[0];
1454 
1455     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1456         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1457         unsigned int weight = fGETUBYTE(1, input->h[i]);
1458         unsigned int vindex = (bucket >> 3) & 0x1F;
1459         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1460 
1461         if (((bucket & 1) == uiV) && fGETQBIT(env->qtmp, 2 * i)) {
1462             env->VRegs[vindex].uw[elindex] =
1463                 env->VRegs[vindex].uw[elindex] + weight;
1464         }
1465     }
1466 }
1467 
1468 static void cancel_slot(CPUHexagonState *env, uint32_t slot)
1469 {
1470     HEX_DEBUG_LOG("Slot %d cancelled\n", slot);
1471     env->slot_cancelled |= (1 << slot);
1472 }
1473 
1474 /* These macros can be referenced in the generated helper functions */
1475 #define warn(...) /* Nothing */
1476 #define fatal(...) g_assert_not_reached();
1477 
1478 #define BOGUS_HELPER(tag) \
1479     printf("ERROR: bogus helper: " #tag "\n")
1480 
1481 #include "helper_funcs_generated.c.inc"
1482