xref: /openbmc/qemu/target/hexagon/translate.c (revision f944890dfd4222f091cea8680281e0bf7114f721)
1 /*
2  *  Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #define QEMU_GENERATE
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/helper-gen.h"
24 #include "exec/helper-proto.h"
25 #include "exec/translation-block.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/log.h"
28 #include "internal.h"
29 #include "attribs.h"
30 #include "insn.h"
31 #include "decode.h"
32 #include "translate.h"
33 #include "genptr.h"
34 #include "printinsn.h"
35 
36 #define HELPER_H "helper.h"
37 #include "exec/helper-info.c.inc"
38 #undef  HELPER_H
39 
40 #include "analyze_funcs_generated.c.inc"
41 
42 typedef void (*AnalyzeInsn)(DisasContext *ctx);
43 static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = {
44 #define OPCODE(X)    [X] = analyze_##X
45 #include "opcodes_def_generated.h.inc"
46 #undef OPCODE
47 };
48 
49 TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
50 TCGv hex_pred[NUM_PREGS];
51 TCGv hex_slot_cancelled;
52 TCGv hex_new_value_usr;
53 TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
54 TCGv hex_store_addr[STORES_MAX];
55 TCGv hex_store_width[STORES_MAX];
56 TCGv hex_store_val32[STORES_MAX];
57 TCGv_i64 hex_store_val64[STORES_MAX];
58 TCGv hex_llsc_addr;
59 TCGv hex_llsc_val;
60 TCGv_i64 hex_llsc_val_i64;
61 TCGv hex_vstore_addr[VSTORES_MAX];
62 TCGv hex_vstore_size[VSTORES_MAX];
63 TCGv hex_vstore_pending[VSTORES_MAX];
64 
65 static const char * const hexagon_prednames[] = {
66   "p0", "p1", "p2", "p3"
67 };
68 
69 intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
70                           int num, bool alloc_ok)
71 {
72     intptr_t offset;
73 
74     if (!ctx->need_commit) {
75         return offsetof(CPUHexagonState, VRegs[regnum]);
76     }
77 
78     /* See if it is already allocated */
79     for (int i = 0; i < ctx->future_vregs_idx; i++) {
80         if (ctx->future_vregs_num[i] == regnum) {
81             return offsetof(CPUHexagonState, future_VRegs[i]);
82         }
83     }
84 
85     g_assert(alloc_ok);
86     offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]);
87     for (int i = 0; i < num; i++) {
88         ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++;
89     }
90     ctx->future_vregs_idx += num;
91     g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX);
92     return offset;
93 }
94 
95 intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
96                           int num, bool alloc_ok)
97 {
98     intptr_t offset;
99 
100     /* See if it is already allocated */
101     for (int i = 0; i < ctx->tmp_vregs_idx; i++) {
102         if (ctx->tmp_vregs_num[i] == regnum) {
103             return offsetof(CPUHexagonState, tmp_VRegs[i]);
104         }
105     }
106 
107     g_assert(alloc_ok);
108     offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]);
109     for (int i = 0; i < num; i++) {
110         ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++;
111     }
112     ctx->tmp_vregs_idx += num;
113     g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX);
114     return offset;
115 }
116 
117 static void gen_exception_raw(int excp)
118 {
119     gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp));
120 }
121 
122 static void gen_exec_counters(DisasContext *ctx)
123 {
124     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
125                     hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets);
126     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT],
127                     hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
128     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT],
129                     hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
130 }
131 
132 static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
133 {
134     return translator_use_goto_tb(&ctx->base, dest);
135 }
136 
137 static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest, bool
138                         move_to_pc)
139 {
140     if (use_goto_tb(ctx, dest)) {
141         tcg_gen_goto_tb(idx);
142         if (move_to_pc) {
143             tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
144         }
145         tcg_gen_exit_tb(ctx->base.tb, idx);
146     } else {
147         if (move_to_pc) {
148             tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
149         }
150         tcg_gen_lookup_and_goto_ptr();
151     }
152 }
153 
154 static void gen_end_tb(DisasContext *ctx)
155 {
156     Packet *pkt = ctx->pkt;
157 
158     gen_exec_counters(ctx);
159 
160     if (ctx->branch_cond != TCG_COND_NEVER) {
161         if (ctx->branch_cond != TCG_COND_ALWAYS) {
162             TCGLabel *skip = gen_new_label();
163             tcg_gen_brcondi_tl(ctx->branch_cond, ctx->branch_taken, 0, skip);
164             gen_goto_tb(ctx, 0, ctx->branch_dest, true);
165             gen_set_label(skip);
166             gen_goto_tb(ctx, 1, ctx->next_PC, false);
167         } else {
168             gen_goto_tb(ctx, 0, ctx->branch_dest, true);
169         }
170     } else if (ctx->is_tight_loop &&
171                pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) {
172         /*
173          * When we're in a tight loop, we defer the endloop0 processing
174          * to take advantage of direct block chaining
175          */
176         TCGLabel *skip = gen_new_label();
177         tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip);
178         tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1);
179         gen_goto_tb(ctx, 0, ctx->base.tb->pc, true);
180         gen_set_label(skip);
181         gen_goto_tb(ctx, 1, ctx->next_PC, false);
182     } else {
183         tcg_gen_lookup_and_goto_ptr();
184     }
185 
186     ctx->base.is_jmp = DISAS_NORETURN;
187 }
188 
189 static void gen_exception_end_tb(DisasContext *ctx, int excp)
190 {
191     gen_exec_counters(ctx);
192     tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC);
193     gen_exception_raw(excp);
194     ctx->base.is_jmp = DISAS_NORETURN;
195 
196 }
197 
198 #define PACKET_BUFFER_LEN              1028
199 static void print_pkt(Packet *pkt)
200 {
201     GString *buf = g_string_sized_new(PACKET_BUFFER_LEN);
202     snprint_a_pkt_debug(buf, pkt);
203     HEX_DEBUG_LOG("%s", buf->str);
204     g_string_free(buf, true);
205 }
206 #define HEX_DEBUG_PRINT_PKT(pkt) \
207     do { \
208         if (HEX_DEBUG) { \
209             print_pkt(pkt); \
210         } \
211     } while (0)
212 
213 static int read_packet_words(CPUHexagonState *env, DisasContext *ctx,
214                              uint32_t words[])
215 {
216     bool found_end = false;
217     int nwords, max_words;
218 
219     memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t));
220     for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
221         words[nwords] =
222             translator_ldl(env, &ctx->base,
223                            ctx->base.pc_next + nwords * sizeof(uint32_t));
224         found_end = is_packet_end(words[nwords]);
225     }
226     if (!found_end) {
227         /* Read too many words without finding the end */
228         return 0;
229     }
230 
231     /* Check for page boundary crossing */
232     max_words = -(ctx->base.pc_next | TARGET_PAGE_MASK) / sizeof(uint32_t);
233     if (nwords > max_words) {
234         /* We can only cross a page boundary at the beginning of a TB */
235         g_assert(ctx->base.num_insns == 1);
236     }
237 
238     HEX_DEBUG_LOG("decode_packet: pc = 0x%" VADDR_PRIx "\n",
239                   ctx->base.pc_next);
240     HEX_DEBUG_LOG("    words = { ");
241     for (int i = 0; i < nwords; i++) {
242         HEX_DEBUG_LOG("0x%x, ", words[i]);
243     }
244     HEX_DEBUG_LOG("}\n");
245 
246     return nwords;
247 }
248 
249 static bool check_for_attrib(Packet *pkt, int attrib)
250 {
251     for (int i = 0; i < pkt->num_insns; i++) {
252         if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) {
253             return true;
254         }
255     }
256     return false;
257 }
258 
259 static bool need_slot_cancelled(Packet *pkt)
260 {
261     /* We only need slot_cancelled for conditional store instructions */
262     for (int i = 0; i < pkt->num_insns; i++) {
263         uint16_t opcode = pkt->insn[i].opcode;
264         if (GET_ATTRIB(opcode, A_CONDEXEC) &&
265             GET_ATTRIB(opcode, A_SCALAR_STORE)) {
266             return true;
267         }
268     }
269     return false;
270 }
271 
272 static bool need_next_PC(DisasContext *ctx)
273 {
274     Packet *pkt = ctx->pkt;
275 
276     /* Check for conditional control flow or HW loop end */
277     for (int i = 0; i < pkt->num_insns; i++) {
278         uint16_t opcode = pkt->insn[i].opcode;
279         if (GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) {
280             return true;
281         }
282         if (GET_ATTRIB(opcode, A_HWLOOP0_END) ||
283             GET_ATTRIB(opcode, A_HWLOOP1_END)) {
284             return true;
285         }
286     }
287     return false;
288 }
289 
290 /*
291  * The opcode_analyze functions mark most of the writes in a packet
292  * However, there are some implicit writes marked as attributes
293  * of the applicable instructions.
294  */
295 static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum)
296 {
297     uint16_t opcode = ctx->insn->opcode;
298     if (GET_ATTRIB(opcode, attrib)) {
299         /*
300          * USR is used to set overflow and FP exceptions,
301          * so treat it as conditional
302          */
303         bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) ||
304                              rnum == HEX_REG_USR;
305 
306         /* LC0/LC1 is conditionally written by endloop instructions */
307         if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) &&
308             (opcode == J2_endloop0 ||
309              opcode == J2_endloop1 ||
310              opcode == J2_endloop01)) {
311             is_predicated = true;
312         }
313 
314         ctx_log_reg_write(ctx, rnum, is_predicated);
315     }
316 }
317 
318 static void mark_implicit_reg_writes(DisasContext *ctx)
319 {
320     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP,  HEX_REG_FP);
321     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP,  HEX_REG_SP);
322     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR,  HEX_REG_LR);
323     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0);
324     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0);
325     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1);
326     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1);
327     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR);
328     mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR);
329 }
330 
331 static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum)
332 {
333     if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
334         ctx_log_pred_write(ctx, pnum);
335     }
336 }
337 
338 static void mark_implicit_pred_writes(DisasContext *ctx)
339 {
340     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0);
341     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1);
342     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2);
343     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3);
344 }
345 
346 static bool pkt_raises_exception(Packet *pkt)
347 {
348     if (check_for_attrib(pkt, A_LOAD) ||
349         check_for_attrib(pkt, A_STORE)) {
350         return true;
351     }
352     return false;
353 }
354 
355 static bool need_commit(DisasContext *ctx)
356 {
357     Packet *pkt = ctx->pkt;
358 
359     /*
360      * If the short-circuit property is set to false, we'll always do the commit
361      */
362     if (!ctx->short_circuit) {
363         return true;
364     }
365 
366     if (pkt_raises_exception(pkt)) {
367         return true;
368     }
369 
370     /* Registers with immutability flags require new_value */
371     for (int i = 0; i < ctx->reg_log_idx; i++) {
372         int rnum = ctx->reg_log[i];
373         if (reg_immut_masks[rnum]) {
374             return true;
375         }
376     }
377 
378     /* Floating point instructions are hard-coded to use new_value */
379     if (check_for_attrib(pkt, A_FPOP)) {
380         return true;
381     }
382 
383     if (ctx->read_after_write || ctx->has_hvx_overlap) {
384         return true;
385     }
386 
387     return false;
388 }
389 
390 static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum)
391 {
392     if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
393         ctx_log_pred_read(ctx, pnum);
394     }
395 }
396 
397 static void mark_implicit_pred_reads(DisasContext *ctx)
398 {
399     mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P0, 0);
400     mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P1, 1);
401     mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 2);
402     mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 3);
403 }
404 
405 static void analyze_packet(DisasContext *ctx)
406 {
407     Packet *pkt = ctx->pkt;
408     ctx->read_after_write = false;
409     ctx->has_hvx_overlap = false;
410     for (int i = 0; i < pkt->num_insns; i++) {
411         Insn *insn = &pkt->insn[i];
412         ctx->insn = insn;
413         if (opcode_analyze[insn->opcode]) {
414             opcode_analyze[insn->opcode](ctx);
415         }
416         mark_implicit_reg_writes(ctx);
417         mark_implicit_pred_writes(ctx);
418         mark_implicit_pred_reads(ctx);
419     }
420 
421     ctx->need_commit = need_commit(ctx);
422 }
423 
424 static void gen_start_packet(DisasContext *ctx)
425 {
426     Packet *pkt = ctx->pkt;
427     target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes;
428     int i;
429 
430     /* Clear out the disassembly context */
431     ctx->next_PC = next_PC;
432     ctx->reg_log_idx = 0;
433     bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
434     bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
435     ctx->preg_log_idx = 0;
436     bitmap_zero(ctx->pregs_written, NUM_PREGS);
437     ctx->future_vregs_idx = 0;
438     ctx->tmp_vregs_idx = 0;
439     ctx->vreg_log_idx = 0;
440     bitmap_zero(ctx->vregs_written, NUM_VREGS);
441     bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
442     bitmap_zero(ctx->vregs_updated, NUM_VREGS);
443     bitmap_zero(ctx->vregs_select, NUM_VREGS);
444     bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
445     bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
446     bitmap_zero(ctx->qregs_written, NUM_QREGS);
447     ctx->qreg_log_idx = 0;
448     for (i = 0; i < STORES_MAX; i++) {
449         ctx->store_width[i] = 0;
450     }
451     ctx->s1_store_processed = false;
452     ctx->pre_commit = true;
453     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
454         ctx->new_value[i] = NULL;
455     }
456     for (i = 0; i < NUM_PREGS; i++) {
457         ctx->new_pred_value[i] = NULL;
458     }
459 
460     analyze_packet(ctx);
461 
462     /*
463      * pregs_written is used both in the analyze phase as well as the code
464      * gen phase, so clear it again.
465      */
466     bitmap_zero(ctx->pregs_written, NUM_PREGS);
467 
468     if (HEX_DEBUG) {
469         /* Handy place to set a breakpoint before the packet executes */
470         gen_helper_debug_start_packet(tcg_env);
471     }
472 
473     /* Initialize the runtime state for packet semantics */
474     if (need_slot_cancelled(pkt)) {
475         tcg_gen_movi_tl(hex_slot_cancelled, 0);
476     }
477     ctx->branch_taken = NULL;
478     if (pkt->pkt_has_cof) {
479         ctx->branch_taken = tcg_temp_new();
480         if (pkt->pkt_has_multi_cof) {
481             tcg_gen_movi_tl(ctx->branch_taken, 0);
482         }
483         if (need_next_PC(ctx)) {
484             tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC);
485         }
486     }
487     if (HEX_DEBUG) {
488         ctx->pred_written = tcg_temp_new();
489         tcg_gen_movi_tl(ctx->pred_written, 0);
490     }
491 
492     /* Preload the predicated registers into get_result_gpr(ctx, i) */
493     if (ctx->need_commit &&
494         !bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) {
495         i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
496         while (i < TOTAL_PER_THREAD_REGS) {
497             tcg_gen_mov_tl(get_result_gpr(ctx, i), hex_gpr[i]);
498             i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS,
499                               i + 1);
500         }
501     }
502 
503     /*
504      * Preload the predicated pred registers into ctx->new_pred_value[pred_num]
505      * Only endloop instructions conditionally write to pred registers
506      */
507     if (ctx->need_commit && pkt->pkt_has_endloop) {
508         for (i = 0; i < ctx->preg_log_idx; i++) {
509             int pred_num = ctx->preg_log[i];
510             ctx->new_pred_value[pred_num] = tcg_temp_new();
511             tcg_gen_mov_tl(ctx->new_pred_value[pred_num], hex_pred[pred_num]);
512         }
513     }
514 
515     /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */
516     if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) {
517         i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS);
518         while (i < NUM_VREGS) {
519             const intptr_t VdV_off =
520                 ctx_future_vreg_off(ctx, i, 1, true);
521             intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
522             tcg_gen_gvec_mov(MO_64, VdV_off,
523                              src_off,
524                              sizeof(MMVector),
525                              sizeof(MMVector));
526             i = find_next_bit(ctx->predicated_future_vregs, NUM_VREGS, i + 1);
527         }
528     }
529     if (!bitmap_empty(ctx->predicated_tmp_vregs, NUM_VREGS)) {
530         i = find_first_bit(ctx->predicated_tmp_vregs, NUM_VREGS);
531         while (i < NUM_VREGS) {
532             const intptr_t VdV_off =
533                 ctx_tmp_vreg_off(ctx, i, 1, true);
534             intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
535             tcg_gen_gvec_mov(MO_64, VdV_off,
536                              src_off,
537                              sizeof(MMVector),
538                              sizeof(MMVector));
539             i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1);
540         }
541     }
542 }
543 
544 bool is_gather_store_insn(DisasContext *ctx)
545 {
546     Packet *pkt = ctx->pkt;
547     Insn *insn = ctx->insn;
548     if (GET_ATTRIB(insn->opcode, A_CVI_NEW) &&
549         insn->new_value_producer_slot == 1) {
550         /* Look for gather instruction */
551         for (int i = 0; i < pkt->num_insns; i++) {
552             Insn *in = &pkt->insn[i];
553             if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) {
554                 return true;
555             }
556         }
557     }
558     return false;
559 }
560 
561 static void mark_store_width(DisasContext *ctx)
562 {
563     uint16_t opcode = ctx->insn->opcode;
564     uint32_t slot = ctx->insn->slot;
565     uint8_t width = 0;
566 
567     if (GET_ATTRIB(opcode, A_SCALAR_STORE)) {
568         if (GET_ATTRIB(opcode, A_MEMSIZE_0B)) {
569             return;
570         }
571         if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) {
572             width |= 1;
573         }
574         if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) {
575             width |= 2;
576         }
577         if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) {
578             width |= 4;
579         }
580         if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) {
581             width |= 8;
582         }
583         tcg_debug_assert(is_power_of_2(width));
584         ctx->store_width[slot] = width;
585     }
586 }
587 
588 static void gen_insn(DisasContext *ctx)
589 {
590     if (ctx->insn->generate) {
591         ctx->insn->generate(ctx);
592         mark_store_width(ctx);
593     } else {
594         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE);
595     }
596 }
597 
598 /*
599  * Helpers for generating the packet commit
600  */
601 static void gen_reg_writes(DisasContext *ctx)
602 {
603     int i;
604 
605     /* Early exit if not needed */
606     if (!ctx->need_commit) {
607         return;
608     }
609 
610     for (i = 0; i < ctx->reg_log_idx; i++) {
611         int reg_num = ctx->reg_log[i];
612 
613         tcg_gen_mov_tl(hex_gpr[reg_num], get_result_gpr(ctx, reg_num));
614 
615         /*
616          * ctx->is_tight_loop is set when SA0 points to the beginning of the TB.
617          * If we write to SA0, we have to turn off tight loop handling.
618          */
619         if (reg_num == HEX_REG_SA0) {
620             ctx->is_tight_loop = false;
621         }
622     }
623 }
624 
625 static void gen_pred_writes(DisasContext *ctx)
626 {
627     /* Early exit if not needed or the log is empty */
628     if (!ctx->need_commit || !ctx->preg_log_idx) {
629         return;
630     }
631 
632     for (int i = 0; i < ctx->preg_log_idx; i++) {
633         int pred_num = ctx->preg_log[i];
634         tcg_gen_mov_tl(hex_pred[pred_num], ctx->new_pred_value[pred_num]);
635     }
636 }
637 
638 static void gen_check_store_width(DisasContext *ctx, int slot_num)
639 {
640     if (HEX_DEBUG) {
641         TCGv slot = tcg_constant_tl(slot_num);
642         TCGv check = tcg_constant_tl(ctx->store_width[slot_num]);
643         gen_helper_debug_check_store_width(tcg_env, slot, check);
644     }
645 }
646 
647 static bool slot_is_predicated(Packet *pkt, int slot_num)
648 {
649     for (int i = 0; i < pkt->num_insns; i++) {
650         if (pkt->insn[i].slot == slot_num) {
651             return GET_ATTRIB(pkt->insn[i].opcode, A_CONDEXEC);
652         }
653     }
654     /* If we get to here, we didn't find an instruction in the requested slot */
655     g_assert_not_reached();
656 }
657 
658 void process_store(DisasContext *ctx, int slot_num)
659 {
660     bool is_predicated = slot_is_predicated(ctx->pkt, slot_num);
661     TCGLabel *label_end = NULL;
662 
663     /*
664      * We may have already processed this store
665      * See CHECK_NOSHUF in macros.h
666      */
667     if (slot_num == 1 && ctx->s1_store_processed) {
668         return;
669     }
670     ctx->s1_store_processed = true;
671 
672     if (is_predicated) {
673         TCGv cancelled = tcg_temp_new();
674         label_end = gen_new_label();
675 
676         /* Don't do anything if the slot was cancelled */
677         tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
678         tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
679     }
680     {
681         TCGv address = tcg_temp_new();
682         tcg_gen_mov_tl(address, hex_store_addr[slot_num]);
683 
684         /*
685          * If we know the width from the DisasContext, we can
686          * generate much cleaner code.
687          * Unfortunately, not all instructions execute the fSTORE
688          * macro during code generation.  Anything that uses the
689          * generic helper will have this problem.  Instructions
690          * that use fWRAP to generate proper TCG code will be OK.
691          */
692         switch (ctx->store_width[slot_num]) {
693         case 1:
694             gen_check_store_width(ctx, slot_num);
695             tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
696                                hex_store_addr[slot_num],
697                                ctx->mem_idx, MO_UB);
698             break;
699         case 2:
700             gen_check_store_width(ctx, slot_num);
701             tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
702                                hex_store_addr[slot_num],
703                                ctx->mem_idx, MO_TEUW);
704             break;
705         case 4:
706             gen_check_store_width(ctx, slot_num);
707             tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
708                                hex_store_addr[slot_num],
709                                ctx->mem_idx, MO_TEUL);
710             break;
711         case 8:
712             gen_check_store_width(ctx, slot_num);
713             tcg_gen_qemu_st_i64(hex_store_val64[slot_num],
714                                 hex_store_addr[slot_num],
715                                 ctx->mem_idx, MO_TEUQ);
716             break;
717         default:
718             {
719                 /*
720                  * If we get to here, we don't know the width at
721                  * TCG generation time, we'll use a helper to
722                  * avoid branching based on the width at runtime.
723                  */
724                 TCGv slot = tcg_constant_tl(slot_num);
725                 gen_helper_commit_store(tcg_env, slot);
726             }
727         }
728     }
729     if (is_predicated) {
730         gen_set_label(label_end);
731     }
732 }
733 
734 static void process_store_log(DisasContext *ctx)
735 {
736     /*
737      *  When a packet has two stores, the hardware processes
738      *  slot 1 and then slot 0.  This will be important when
739      *  the memory accesses overlap.
740      */
741     Packet *pkt = ctx->pkt;
742     if (pkt->pkt_has_store_s1) {
743         g_assert(!pkt->pkt_has_dczeroa);
744         process_store(ctx, 1);
745     }
746     if (pkt->pkt_has_store_s0) {
747         g_assert(!pkt->pkt_has_dczeroa);
748         process_store(ctx, 0);
749     }
750 }
751 
752 /* Zero out a 32-bit cache line */
753 static void process_dczeroa(DisasContext *ctx)
754 {
755     if (ctx->pkt->pkt_has_dczeroa) {
756         /* Store 32 bytes of zero starting at (addr & ~0x1f) */
757         TCGv addr = tcg_temp_new();
758         TCGv_i64 zero = tcg_constant_i64(0);
759 
760         tcg_gen_andi_tl(addr, ctx->dczero_addr, ~0x1f);
761         tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
762         tcg_gen_addi_tl(addr, addr, 8);
763         tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
764         tcg_gen_addi_tl(addr, addr, 8);
765         tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
766         tcg_gen_addi_tl(addr, addr, 8);
767         tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
768     }
769 }
770 
771 static bool pkt_has_hvx_store(Packet *pkt)
772 {
773     int i;
774     for (i = 0; i < pkt->num_insns; i++) {
775         int opcode = pkt->insn[i].opcode;
776         if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) {
777             return true;
778         }
779     }
780     return false;
781 }
782 
783 static void gen_commit_hvx(DisasContext *ctx)
784 {
785     int i;
786 
787     /* Early exit if not needed */
788     if (!ctx->need_commit) {
789         g_assert(!pkt_has_hvx_store(ctx->pkt));
790         return;
791     }
792 
793     /*
794      *    for (i = 0; i < ctx->vreg_log_idx; i++) {
795      *        int rnum = ctx->vreg_log[i];
796      *        env->VRegs[rnum] = env->future_VRegs[rnum];
797      *    }
798      */
799     for (i = 0; i < ctx->vreg_log_idx; i++) {
800         int rnum = ctx->vreg_log[i];
801         intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]);
802         intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false);
803         size_t size = sizeof(MMVector);
804 
805         tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
806     }
807 
808     /*
809      *    for (i = 0; i < ctx->qreg_log_idx; i++) {
810      *        int rnum = ctx->qreg_log[i];
811      *        env->QRegs[rnum] = env->future_QRegs[rnum];
812      *    }
813      */
814     for (i = 0; i < ctx->qreg_log_idx; i++) {
815         int rnum = ctx->qreg_log[i];
816         intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]);
817         intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]);
818         size_t size = sizeof(MMQReg);
819 
820         tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
821     }
822 
823     if (pkt_has_hvx_store(ctx->pkt)) {
824         gen_helper_commit_hvx_stores(tcg_env);
825     }
826 }
827 
828 static void update_exec_counters(DisasContext *ctx)
829 {
830     Packet *pkt = ctx->pkt;
831     int num_insns = pkt->num_insns;
832     int num_real_insns = 0;
833     int num_hvx_insns = 0;
834 
835     for (int i = 0; i < num_insns; i++) {
836         if (!pkt->insn[i].is_endloop &&
837             !pkt->insn[i].part1 &&
838             !GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) {
839             num_real_insns++;
840         }
841         if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) {
842             num_hvx_insns++;
843         }
844     }
845 
846     ctx->num_packets++;
847     ctx->num_insns += num_real_insns;
848     ctx->num_hvx_insns += num_hvx_insns;
849 }
850 
851 static void gen_commit_packet(DisasContext *ctx)
852 {
853     /*
854      * If there is more than one store in a packet, make sure they are all OK
855      * before proceeding with the rest of the packet commit.
856      *
857      * dczeroa has to be the only store operation in the packet, so we go
858      * ahead and process that first.
859      *
860      * When there is an HVX store, there can also be a scalar store in either
861      * slot 0 or slot1, so we create a mask for the helper to indicate what
862      * work to do.
863      *
864      * When there are two scalar stores, we probe the one in slot 0.
865      *
866      * Note that we don't call the probe helper for packets with only one
867      * store.  Therefore, we call process_store_log before anything else
868      * involved in committing the packet.
869      */
870     Packet *pkt = ctx->pkt;
871     bool has_store_s0 = pkt->pkt_has_store_s0;
872     bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed);
873     bool has_hvx_store = pkt_has_hvx_store(pkt);
874     if (pkt->pkt_has_dczeroa) {
875         /*
876          * The dczeroa will be the store in slot 0, check that we don't have
877          * a store in slot 1 or an HVX store.
878          */
879         g_assert(!has_store_s1 && !has_hvx_store);
880         process_dczeroa(ctx);
881     } else if (has_hvx_store) {
882         if (!has_store_s0 && !has_store_s1) {
883             TCGv mem_idx = tcg_constant_tl(ctx->mem_idx);
884             gen_helper_probe_hvx_stores(tcg_env, mem_idx);
885         } else {
886             int mask = 0;
887 
888             if (has_store_s0) {
889                 mask =
890                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 1);
891             }
892             if (has_store_s1) {
893                 mask =
894                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1);
895             }
896             if (has_hvx_store) {
897                 mask =
898                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
899                                HAS_HVX_STORES, 1);
900             }
901             if (has_store_s0 && slot_is_predicated(pkt, 0)) {
902                 mask =
903                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
904                                S0_IS_PRED, 1);
905             }
906             if (has_store_s1 && slot_is_predicated(pkt, 1)) {
907                 mask =
908                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
909                                S1_IS_PRED, 1);
910             }
911             mask = FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, MMU_IDX,
912                               ctx->mem_idx);
913             gen_helper_probe_pkt_scalar_hvx_stores(tcg_env,
914                                                    tcg_constant_tl(mask));
915         }
916     } else if (has_store_s0 && has_store_s1) {
917         /*
918          * process_store_log will execute the slot 1 store first,
919          * so we only have to probe the store in slot 0
920          */
921         int args = 0;
922         args =
923             FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, ctx->mem_idx);
924         if (slot_is_predicated(pkt, 0)) {
925             args =
926                 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 1);
927         }
928         TCGv args_tcgv = tcg_constant_tl(args);
929         gen_helper_probe_pkt_scalar_store_s0(tcg_env, args_tcgv);
930     }
931 
932     process_store_log(ctx);
933 
934     gen_reg_writes(ctx);
935     gen_pred_writes(ctx);
936     if (pkt->pkt_has_hvx) {
937         gen_commit_hvx(ctx);
938     }
939     update_exec_counters(ctx);
940     if (HEX_DEBUG) {
941         TCGv has_st0 =
942             tcg_constant_tl(pkt->pkt_has_store_s0 && !pkt->pkt_has_dczeroa);
943         TCGv has_st1 =
944             tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa);
945 
946         /* Handy place to set a breakpoint at the end of execution */
947         gen_helper_debug_commit_end(tcg_env, tcg_constant_tl(ctx->pkt->pc),
948                                     ctx->pred_written, has_st0, has_st1);
949     }
950 
951     if (pkt->vhist_insn != NULL) {
952         ctx->pre_commit = false;
953         ctx->insn = pkt->vhist_insn;
954         pkt->vhist_insn->generate(ctx);
955     }
956 
957     if (pkt->pkt_has_cof) {
958         gen_end_tb(ctx);
959     }
960 }
961 
962 static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
963 {
964     uint32_t words[PACKET_WORDS_MAX];
965     int nwords;
966     Packet pkt;
967     int i;
968 
969     nwords = read_packet_words(env, ctx, words);
970     if (!nwords) {
971         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
972         return;
973     }
974 
975     ctx->pkt = &pkt;
976     if (decode_packet(ctx, nwords, words, &pkt, false) > 0) {
977         pkt.pc = ctx->base.pc_next;
978         HEX_DEBUG_PRINT_PKT(&pkt);
979         gen_start_packet(ctx);
980         for (i = 0; i < pkt.num_insns; i++) {
981             ctx->insn = &pkt.insn[i];
982             gen_insn(ctx);
983         }
984         gen_commit_packet(ctx);
985         ctx->base.pc_next += pkt.encod_pkt_size_in_bytes;
986     } else {
987         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
988     }
989 }
990 
991 static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
992                                           CPUState *cs)
993 {
994     DisasContext *ctx = container_of(dcbase, DisasContext, base);
995     HexagonCPU *hex_cpu = env_archcpu(cpu_env(cs));
996     uint32_t hex_flags = dcbase->tb->flags;
997 
998     ctx->mem_idx = MMU_USER_IDX;
999     ctx->num_packets = 0;
1000     ctx->num_insns = 0;
1001     ctx->num_hvx_insns = 0;
1002     ctx->branch_cond = TCG_COND_NEVER;
1003     ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
1004     ctx->short_circuit = hex_cpu->short_circuit;
1005 }
1006 
1007 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
1008 {
1009 }
1010 
1011 static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
1012 {
1013     DisasContext *ctx = container_of(dcbase, DisasContext, base);
1014 
1015     tcg_gen_insn_start(ctx->base.pc_next);
1016 }
1017 
1018 static bool pkt_crosses_page(CPUHexagonState *env, DisasContext *ctx)
1019 {
1020     target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
1021     bool found_end = false;
1022     int nwords;
1023 
1024     for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
1025         uint32_t word = translator_ldl(env, &ctx->base,
1026                             ctx->base.pc_next + nwords * sizeof(uint32_t));
1027         found_end = is_packet_end(word);
1028     }
1029     uint32_t next_ptr =  ctx->base.pc_next + nwords * sizeof(uint32_t);
1030     return found_end && next_ptr - page_start >= TARGET_PAGE_SIZE;
1031 }
1032 
1033 static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu)
1034 {
1035     DisasContext *ctx = container_of(dcbase, DisasContext, base);
1036     CPUHexagonState *env = cpu_env(cpu);
1037 
1038     decode_and_translate_packet(env, ctx);
1039 
1040     if (ctx->base.is_jmp == DISAS_NEXT) {
1041         target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
1042         target_ulong bytes_max = PACKET_WORDS_MAX * sizeof(target_ulong);
1043 
1044         if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE ||
1045             (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE - bytes_max &&
1046              pkt_crosses_page(env, ctx))) {
1047             ctx->base.is_jmp = DISAS_TOO_MANY;
1048         }
1049 
1050         /*
1051          * The CPU log is used to compare against LLDB single stepping,
1052          * so end the TLB after every packet.
1053          */
1054         HexagonCPU *hex_cpu = env_archcpu(env);
1055         if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
1056             ctx->base.is_jmp = DISAS_TOO_MANY;
1057         }
1058     }
1059 }
1060 
1061 static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
1062 {
1063     DisasContext *ctx = container_of(dcbase, DisasContext, base);
1064 
1065     switch (ctx->base.is_jmp) {
1066     case DISAS_TOO_MANY:
1067         gen_exec_counters(ctx);
1068         tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next);
1069         tcg_gen_exit_tb(NULL, 0);
1070         break;
1071     case DISAS_NORETURN:
1072         break;
1073     default:
1074         g_assert_not_reached();
1075     }
1076 }
1077 
1078 static const TranslatorOps hexagon_tr_ops = {
1079     .init_disas_context = hexagon_tr_init_disas_context,
1080     .tb_start           = hexagon_tr_tb_start,
1081     .insn_start         = hexagon_tr_insn_start,
1082     .translate_insn     = hexagon_tr_translate_packet,
1083     .tb_stop            = hexagon_tr_tb_stop,
1084 };
1085 
1086 void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
1087                            vaddr pc, void *host_pc)
1088 {
1089     DisasContext ctx;
1090 
1091     translator_loop(cs, tb, max_insns, pc, host_pc,
1092                     &hexagon_tr_ops, &ctx.base);
1093 }
1094 
1095 #define NAME_LEN               64
1096 static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
1097 static char store_addr_names[STORES_MAX][NAME_LEN];
1098 static char store_width_names[STORES_MAX][NAME_LEN];
1099 static char store_val32_names[STORES_MAX][NAME_LEN];
1100 static char store_val64_names[STORES_MAX][NAME_LEN];
1101 static char vstore_addr_names[VSTORES_MAX][NAME_LEN];
1102 static char vstore_size_names[VSTORES_MAX][NAME_LEN];
1103 static char vstore_pending_names[VSTORES_MAX][NAME_LEN];
1104 
1105 void hexagon_translate_init(void)
1106 {
1107     int i;
1108 
1109     opcode_init();
1110 
1111     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
1112         hex_gpr[i] = tcg_global_mem_new(tcg_env,
1113             offsetof(CPUHexagonState, gpr[i]),
1114             hexagon_regnames[i]);
1115 
1116         if (HEX_DEBUG) {
1117             snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s",
1118                      hexagon_regnames[i]);
1119             hex_reg_written[i] = tcg_global_mem_new(tcg_env,
1120                 offsetof(CPUHexagonState, reg_written[i]),
1121                 reg_written_names[i]);
1122         }
1123     }
1124     hex_new_value_usr = tcg_global_mem_new(tcg_env,
1125         offsetof(CPUHexagonState, new_value_usr), "new_value_usr");
1126 
1127     for (i = 0; i < NUM_PREGS; i++) {
1128         hex_pred[i] = tcg_global_mem_new(tcg_env,
1129             offsetof(CPUHexagonState, pred[i]),
1130             hexagon_prednames[i]);
1131     }
1132     hex_slot_cancelled = tcg_global_mem_new(tcg_env,
1133         offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled");
1134     hex_llsc_addr = tcg_global_mem_new(tcg_env,
1135         offsetof(CPUHexagonState, llsc_addr), "llsc_addr");
1136     hex_llsc_val = tcg_global_mem_new(tcg_env,
1137         offsetof(CPUHexagonState, llsc_val), "llsc_val");
1138     hex_llsc_val_i64 = tcg_global_mem_new_i64(tcg_env,
1139         offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
1140     for (i = 0; i < STORES_MAX; i++) {
1141         snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
1142         hex_store_addr[i] = tcg_global_mem_new(tcg_env,
1143             offsetof(CPUHexagonState, mem_log_stores[i].va),
1144             store_addr_names[i]);
1145 
1146         snprintf(store_width_names[i], NAME_LEN, "store_width_%d", i);
1147         hex_store_width[i] = tcg_global_mem_new(tcg_env,
1148             offsetof(CPUHexagonState, mem_log_stores[i].width),
1149             store_width_names[i]);
1150 
1151         snprintf(store_val32_names[i], NAME_LEN, "store_val32_%d", i);
1152         hex_store_val32[i] = tcg_global_mem_new(tcg_env,
1153             offsetof(CPUHexagonState, mem_log_stores[i].data32),
1154             store_val32_names[i]);
1155 
1156         snprintf(store_val64_names[i], NAME_LEN, "store_val64_%d", i);
1157         hex_store_val64[i] = tcg_global_mem_new_i64(tcg_env,
1158             offsetof(CPUHexagonState, mem_log_stores[i].data64),
1159             store_val64_names[i]);
1160     }
1161     for (i = 0; i < VSTORES_MAX; i++) {
1162         snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i);
1163         hex_vstore_addr[i] = tcg_global_mem_new(tcg_env,
1164             offsetof(CPUHexagonState, vstore[i].va),
1165             vstore_addr_names[i]);
1166 
1167         snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i);
1168         hex_vstore_size[i] = tcg_global_mem_new(tcg_env,
1169             offsetof(CPUHexagonState, vstore[i].size),
1170             vstore_size_names[i]);
1171 
1172         snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i);
1173         hex_vstore_pending[i] = tcg_global_mem_new(tcg_env,
1174             offsetof(CPUHexagonState, vstore_pending[i]),
1175             vstore_pending_names[i]);
1176     }
1177 }
1178