xref: /openbmc/qemu/target/hexagon/translate.c (revision 51e47cf8)
1 /*
2  *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #define QEMU_GENERATE
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/cpu_ldst.h"
24 #include "exec/log.h"
25 #include "internal.h"
26 #include "attribs.h"
27 #include "insn.h"
28 #include "decode.h"
29 #include "translate.h"
30 #include "printinsn.h"
31 
32 #include "analyze_funcs_generated.c.inc"
33 
34 typedef void (*AnalyzeInsn)(DisasContext *ctx);
35 static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = {
36 #define OPCODE(X)    [X] = analyze_##X
37 #include "opcodes_def_generated.h.inc"
38 #undef OPCODE
39 };
40 
41 TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
42 TCGv hex_pred[NUM_PREGS];
43 TCGv hex_this_PC;
44 TCGv hex_slot_cancelled;
45 TCGv hex_branch_taken;
46 TCGv hex_new_value[TOTAL_PER_THREAD_REGS];
47 TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
48 TCGv hex_new_pred_value[NUM_PREGS];
49 TCGv hex_pred_written;
50 TCGv hex_store_addr[STORES_MAX];
51 TCGv hex_store_width[STORES_MAX];
52 TCGv hex_store_val32[STORES_MAX];
53 TCGv_i64 hex_store_val64[STORES_MAX];
54 TCGv hex_pkt_has_store_s1;
55 TCGv hex_dczero_addr;
56 TCGv hex_llsc_addr;
57 TCGv hex_llsc_val;
58 TCGv_i64 hex_llsc_val_i64;
59 TCGv hex_vstore_addr[VSTORES_MAX];
60 TCGv hex_vstore_size[VSTORES_MAX];
61 TCGv hex_vstore_pending[VSTORES_MAX];
62 
63 static const char * const hexagon_prednames[] = {
64   "p0", "p1", "p2", "p3"
65 };
66 
67 intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
68                           int num, bool alloc_ok)
69 {
70     intptr_t offset;
71 
72     /* See if it is already allocated */
73     for (int i = 0; i < ctx->future_vregs_idx; i++) {
74         if (ctx->future_vregs_num[i] == regnum) {
75             return offsetof(CPUHexagonState, future_VRegs[i]);
76         }
77     }
78 
79     g_assert(alloc_ok);
80     offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]);
81     for (int i = 0; i < num; i++) {
82         ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++;
83     }
84     ctx->future_vregs_idx += num;
85     g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX);
86     return offset;
87 }
88 
89 intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
90                           int num, bool alloc_ok)
91 {
92     intptr_t offset;
93 
94     /* See if it is already allocated */
95     for (int i = 0; i < ctx->tmp_vregs_idx; i++) {
96         if (ctx->tmp_vregs_num[i] == regnum) {
97             return offsetof(CPUHexagonState, tmp_VRegs[i]);
98         }
99     }
100 
101     g_assert(alloc_ok);
102     offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]);
103     for (int i = 0; i < num; i++) {
104         ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++;
105     }
106     ctx->tmp_vregs_idx += num;
107     g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX);
108     return offset;
109 }
110 
111 static void gen_exception_raw(int excp)
112 {
113     gen_helper_raise_exception(cpu_env, tcg_constant_i32(excp));
114 }
115 
116 static void gen_exec_counters(DisasContext *ctx)
117 {
118     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
119                     hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets);
120     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT],
121                     hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
122     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT],
123                     hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
124 }
125 
126 static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
127 {
128     return translator_use_goto_tb(&ctx->base, dest);
129 }
130 
131 static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest, bool
132                         move_to_pc)
133 {
134     if (use_goto_tb(ctx, dest)) {
135         tcg_gen_goto_tb(idx);
136         if (move_to_pc) {
137             tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
138         }
139         tcg_gen_exit_tb(ctx->base.tb, idx);
140     } else {
141         if (move_to_pc) {
142             tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
143         }
144         tcg_gen_lookup_and_goto_ptr();
145     }
146 }
147 
148 static void gen_end_tb(DisasContext *ctx)
149 {
150     Packet *pkt = ctx->pkt;
151 
152     gen_exec_counters(ctx);
153 
154     if (ctx->branch_cond != TCG_COND_NEVER) {
155         if (ctx->branch_cond != TCG_COND_ALWAYS) {
156             TCGLabel *skip = gen_new_label();
157             tcg_gen_brcondi_tl(ctx->branch_cond, hex_branch_taken, 0, skip);
158             gen_goto_tb(ctx, 0, ctx->branch_dest, true);
159             gen_set_label(skip);
160             gen_goto_tb(ctx, 1, ctx->next_PC, false);
161         } else {
162             gen_goto_tb(ctx, 0, ctx->branch_dest, true);
163         }
164     } else if (ctx->is_tight_loop &&
165                pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) {
166         /*
167          * When we're in a tight loop, we defer the endloop0 processing
168          * to take advantage of direct block chaining
169          */
170         TCGLabel *skip = gen_new_label();
171         tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip);
172         tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1);
173         gen_goto_tb(ctx, 0, ctx->base.tb->pc, true);
174         gen_set_label(skip);
175         gen_goto_tb(ctx, 1, ctx->next_PC, false);
176     } else {
177         tcg_gen_lookup_and_goto_ptr();
178     }
179 
180     ctx->base.is_jmp = DISAS_NORETURN;
181 }
182 
183 static void gen_exception_end_tb(DisasContext *ctx, int excp)
184 {
185     gen_exec_counters(ctx);
186     tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC);
187     gen_exception_raw(excp);
188     ctx->base.is_jmp = DISAS_NORETURN;
189 
190 }
191 
192 #define PACKET_BUFFER_LEN              1028
193 static void print_pkt(Packet *pkt)
194 {
195     GString *buf = g_string_sized_new(PACKET_BUFFER_LEN);
196     snprint_a_pkt_debug(buf, pkt);
197     HEX_DEBUG_LOG("%s", buf->str);
198     g_string_free(buf, true);
199 }
200 #define HEX_DEBUG_PRINT_PKT(pkt) \
201     do { \
202         if (HEX_DEBUG) { \
203             print_pkt(pkt); \
204         } \
205     } while (0)
206 
207 static int read_packet_words(CPUHexagonState *env, DisasContext *ctx,
208                              uint32_t words[])
209 {
210     bool found_end = false;
211     int nwords, max_words;
212 
213     memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t));
214     for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
215         words[nwords] =
216             translator_ldl(env, &ctx->base,
217                            ctx->base.pc_next + nwords * sizeof(uint32_t));
218         found_end = is_packet_end(words[nwords]);
219     }
220     if (!found_end) {
221         /* Read too many words without finding the end */
222         return 0;
223     }
224 
225     /* Check for page boundary crossing */
226     max_words = -(ctx->base.pc_next | TARGET_PAGE_MASK) / sizeof(uint32_t);
227     if (nwords > max_words) {
228         /* We can only cross a page boundary at the beginning of a TB */
229         g_assert(ctx->base.num_insns == 1);
230     }
231 
232     HEX_DEBUG_LOG("decode_packet: pc = 0x%x\n", ctx->base.pc_next);
233     HEX_DEBUG_LOG("    words = { ");
234     for (int i = 0; i < nwords; i++) {
235         HEX_DEBUG_LOG("0x%x, ", words[i]);
236     }
237     HEX_DEBUG_LOG("}\n");
238 
239     return nwords;
240 }
241 
242 static bool check_for_attrib(Packet *pkt, int attrib)
243 {
244     for (int i = 0; i < pkt->num_insns; i++) {
245         if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) {
246             return true;
247         }
248     }
249     return false;
250 }
251 
252 static bool need_slot_cancelled(Packet *pkt)
253 {
254     /* We only need slot_cancelled for conditional store instructions */
255     for (int i = 0; i < pkt->num_insns; i++) {
256         uint16_t opcode = pkt->insn[i].opcode;
257         if (GET_ATTRIB(opcode, A_CONDEXEC) &&
258             GET_ATTRIB(opcode, A_SCALAR_STORE)) {
259             return true;
260         }
261     }
262     return false;
263 }
264 
265 static bool need_pred_written(Packet *pkt)
266 {
267     return check_for_attrib(pkt, A_WRITES_PRED_REG);
268 }
269 
270 static bool need_next_PC(DisasContext *ctx)
271 {
272     Packet *pkt = ctx->pkt;
273 
274     /* Check for conditional control flow or HW loop end */
275     for (int i = 0; i < pkt->num_insns; i++) {
276         uint16_t opcode = pkt->insn[i].opcode;
277         if (GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) {
278             return true;
279         }
280         if (GET_ATTRIB(opcode, A_HWLOOP0_END) ||
281             GET_ATTRIB(opcode, A_HWLOOP1_END)) {
282             return true;
283         }
284     }
285     return false;
286 }
287 
288 /*
289  * The opcode_analyze functions mark most of the writes in a packet
290  * However, there are some implicit writes marked as attributes
291  * of the applicable instructions.
292  */
293 static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum)
294 {
295     uint16_t opcode = ctx->insn->opcode;
296     if (GET_ATTRIB(opcode, attrib)) {
297         /*
298          * USR is used to set overflow and FP exceptions,
299          * so treat it as conditional
300          */
301         bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) ||
302                              rnum == HEX_REG_USR;
303 
304         /* LC0/LC1 is conditionally written by endloop instructions */
305         if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) &&
306             (opcode == J2_endloop0 ||
307              opcode == J2_endloop1 ||
308              opcode == J2_endloop01)) {
309             is_predicated = true;
310         }
311 
312         ctx_log_reg_write(ctx, rnum, is_predicated);
313     }
314 }
315 
316 static void mark_implicit_reg_writes(DisasContext *ctx)
317 {
318     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP,  HEX_REG_FP);
319     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP,  HEX_REG_SP);
320     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR,  HEX_REG_LR);
321     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0);
322     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0);
323     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1);
324     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1);
325     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR);
326     mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR);
327 }
328 
329 static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum)
330 {
331     if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
332         ctx_log_pred_write(ctx, pnum);
333     }
334 }
335 
336 static void mark_implicit_pred_writes(DisasContext *ctx)
337 {
338     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0);
339     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1);
340     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2);
341     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3);
342 }
343 
344 static void analyze_packet(DisasContext *ctx)
345 {
346     Packet *pkt = ctx->pkt;
347     ctx->need_pkt_has_store_s1 = false;
348     for (int i = 0; i < pkt->num_insns; i++) {
349         Insn *insn = &pkt->insn[i];
350         ctx->insn = insn;
351         if (opcode_analyze[insn->opcode]) {
352             opcode_analyze[insn->opcode](ctx);
353         }
354         mark_implicit_reg_writes(ctx);
355         mark_implicit_pred_writes(ctx);
356     }
357 }
358 
359 static void gen_start_packet(DisasContext *ctx)
360 {
361     Packet *pkt = ctx->pkt;
362     target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes;
363     int i;
364 
365     /* Clear out the disassembly context */
366     ctx->next_PC = next_PC;
367     ctx->reg_log_idx = 0;
368     bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
369     bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
370     ctx->preg_log_idx = 0;
371     bitmap_zero(ctx->pregs_written, NUM_PREGS);
372     ctx->future_vregs_idx = 0;
373     ctx->tmp_vregs_idx = 0;
374     ctx->vreg_log_idx = 0;
375     bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
376     bitmap_zero(ctx->vregs_updated, NUM_VREGS);
377     bitmap_zero(ctx->vregs_select, NUM_VREGS);
378     bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
379     bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
380     ctx->qreg_log_idx = 0;
381     for (i = 0; i < STORES_MAX; i++) {
382         ctx->store_width[i] = 0;
383     }
384     ctx->s1_store_processed = false;
385     ctx->pre_commit = true;
386 
387     analyze_packet(ctx);
388 
389     if (ctx->need_pkt_has_store_s1) {
390         tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1);
391     }
392 
393     /*
394      * pregs_written is used both in the analyze phase as well as the code
395      * gen phase, so clear it again.
396      */
397     bitmap_zero(ctx->pregs_written, NUM_PREGS);
398 
399     if (HEX_DEBUG) {
400         /* Handy place to set a breakpoint before the packet executes */
401         gen_helper_debug_start_packet(cpu_env);
402         tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next);
403     }
404 
405     /* Initialize the runtime state for packet semantics */
406     if (need_slot_cancelled(pkt)) {
407         tcg_gen_movi_tl(hex_slot_cancelled, 0);
408     }
409     if (pkt->pkt_has_cof) {
410         if (pkt->pkt_has_multi_cof) {
411             tcg_gen_movi_tl(hex_branch_taken, 0);
412         }
413         if (need_next_PC(ctx)) {
414             tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC);
415         }
416     }
417     if (need_pred_written(pkt)) {
418         tcg_gen_movi_tl(hex_pred_written, 0);
419     }
420 
421     /* Preload the predicated registers into hex_new_value[i] */
422     if (!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) {
423         int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
424         while (i < TOTAL_PER_THREAD_REGS) {
425             tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]);
426             i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS,
427                               i + 1);
428         }
429     }
430 
431     /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */
432     if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) {
433         int i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS);
434         while (i < NUM_VREGS) {
435             const intptr_t VdV_off =
436                 ctx_future_vreg_off(ctx, i, 1, true);
437             intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
438             tcg_gen_gvec_mov(MO_64, VdV_off,
439                              src_off,
440                              sizeof(MMVector),
441                              sizeof(MMVector));
442             i = find_next_bit(ctx->predicated_future_vregs, NUM_VREGS, i + 1);
443         }
444     }
445     if (!bitmap_empty(ctx->predicated_tmp_vregs, NUM_VREGS)) {
446         int i = find_first_bit(ctx->predicated_tmp_vregs, NUM_VREGS);
447         while (i < NUM_VREGS) {
448             const intptr_t VdV_off =
449                 ctx_tmp_vreg_off(ctx, i, 1, true);
450             intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
451             tcg_gen_gvec_mov(MO_64, VdV_off,
452                              src_off,
453                              sizeof(MMVector),
454                              sizeof(MMVector));
455             i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1);
456         }
457     }
458 }
459 
460 bool is_gather_store_insn(DisasContext *ctx)
461 {
462     Packet *pkt = ctx->pkt;
463     Insn *insn = ctx->insn;
464     if (GET_ATTRIB(insn->opcode, A_CVI_NEW) &&
465         insn->new_value_producer_slot == 1) {
466         /* Look for gather instruction */
467         for (int i = 0; i < pkt->num_insns; i++) {
468             Insn *in = &pkt->insn[i];
469             if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) {
470                 return true;
471             }
472         }
473     }
474     return false;
475 }
476 
477 static void mark_store_width(DisasContext *ctx)
478 {
479     uint16_t opcode = ctx->insn->opcode;
480     uint32_t slot = ctx->insn->slot;
481     uint8_t width = 0;
482 
483     if (GET_ATTRIB(opcode, A_SCALAR_STORE)) {
484         if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) {
485             width |= 1;
486         }
487         if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) {
488             width |= 2;
489         }
490         if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) {
491             width |= 4;
492         }
493         if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) {
494             width |= 8;
495         }
496         tcg_debug_assert(is_power_of_2(width));
497         ctx->store_width[slot] = width;
498     }
499 }
500 
501 static void gen_insn(DisasContext *ctx)
502 {
503     if (ctx->insn->generate) {
504         ctx->insn->generate(ctx);
505         mark_store_width(ctx);
506     } else {
507         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE);
508     }
509 }
510 
511 /*
512  * Helpers for generating the packet commit
513  */
514 static void gen_reg_writes(DisasContext *ctx)
515 {
516     int i;
517 
518     for (i = 0; i < ctx->reg_log_idx; i++) {
519         int reg_num = ctx->reg_log[i];
520 
521         tcg_gen_mov_tl(hex_gpr[reg_num], hex_new_value[reg_num]);
522 
523         /*
524          * ctx->is_tight_loop is set when SA0 points to the beginning of the TB.
525          * If we write to SA0, we have to turn off tight loop handling.
526          */
527         if (reg_num == HEX_REG_SA0) {
528             ctx->is_tight_loop = false;
529         }
530     }
531 }
532 
533 static void gen_pred_writes(DisasContext *ctx)
534 {
535     int i;
536 
537     /* Early exit if the log is empty */
538     if (!ctx->preg_log_idx) {
539         return;
540     }
541 
542     /*
543      * Only endloop instructions will conditionally
544      * write a predicate.  If there are no endloop
545      * instructions, we can use the non-conditional
546      * write of the predicates.
547      */
548     if (ctx->pkt->pkt_has_endloop) {
549         TCGv zero = tcg_constant_tl(0);
550         TCGv pred_written = tcg_temp_new();
551         for (i = 0; i < ctx->preg_log_idx; i++) {
552             int pred_num = ctx->preg_log[i];
553 
554             tcg_gen_andi_tl(pred_written, hex_pred_written, 1 << pred_num);
555             tcg_gen_movcond_tl(TCG_COND_NE, hex_pred[pred_num],
556                                pred_written, zero,
557                                hex_new_pred_value[pred_num],
558                                hex_pred[pred_num]);
559         }
560     } else {
561         for (i = 0; i < ctx->preg_log_idx; i++) {
562             int pred_num = ctx->preg_log[i];
563             tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]);
564             if (HEX_DEBUG) {
565                 /* Do this so HELPER(debug_commit_end) will know */
566                 tcg_gen_ori_tl(hex_pred_written, hex_pred_written,
567                                1 << pred_num);
568             }
569         }
570     }
571 }
572 
573 static void gen_check_store_width(DisasContext *ctx, int slot_num)
574 {
575     if (HEX_DEBUG) {
576         TCGv slot = tcg_constant_tl(slot_num);
577         TCGv check = tcg_constant_tl(ctx->store_width[slot_num]);
578         gen_helper_debug_check_store_width(cpu_env, slot, check);
579     }
580 }
581 
582 static bool slot_is_predicated(Packet *pkt, int slot_num)
583 {
584     for (int i = 0; i < pkt->num_insns; i++) {
585         if (pkt->insn[i].slot == slot_num) {
586             return GET_ATTRIB(pkt->insn[i].opcode, A_CONDEXEC);
587         }
588     }
589     /* If we get to here, we didn't find an instruction in the requested slot */
590     g_assert_not_reached();
591 }
592 
593 void process_store(DisasContext *ctx, int slot_num)
594 {
595     bool is_predicated = slot_is_predicated(ctx->pkt, slot_num);
596     TCGLabel *label_end = NULL;
597 
598     /*
599      * We may have already processed this store
600      * See CHECK_NOSHUF in macros.h
601      */
602     if (slot_num == 1 && ctx->s1_store_processed) {
603         return;
604     }
605     ctx->s1_store_processed = true;
606 
607     if (is_predicated) {
608         TCGv cancelled = tcg_temp_new();
609         label_end = gen_new_label();
610 
611         /* Don't do anything if the slot was cancelled */
612         tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
613         tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
614     }
615     {
616         TCGv address = tcg_temp_new();
617         tcg_gen_mov_tl(address, hex_store_addr[slot_num]);
618 
619         /*
620          * If we know the width from the DisasContext, we can
621          * generate much cleaner code.
622          * Unfortunately, not all instructions execute the fSTORE
623          * macro during code generation.  Anything that uses the
624          * generic helper will have this problem.  Instructions
625          * that use fWRAP to generate proper TCG code will be OK.
626          */
627         switch (ctx->store_width[slot_num]) {
628         case 1:
629             gen_check_store_width(ctx, slot_num);
630             tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
631                                hex_store_addr[slot_num],
632                                ctx->mem_idx, MO_UB);
633             break;
634         case 2:
635             gen_check_store_width(ctx, slot_num);
636             tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
637                                hex_store_addr[slot_num],
638                                ctx->mem_idx, MO_TEUW);
639             break;
640         case 4:
641             gen_check_store_width(ctx, slot_num);
642             tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
643                                hex_store_addr[slot_num],
644                                ctx->mem_idx, MO_TEUL);
645             break;
646         case 8:
647             gen_check_store_width(ctx, slot_num);
648             tcg_gen_qemu_st_i64(hex_store_val64[slot_num],
649                                 hex_store_addr[slot_num],
650                                 ctx->mem_idx, MO_TEUQ);
651             break;
652         default:
653             {
654                 /*
655                  * If we get to here, we don't know the width at
656                  * TCG generation time, we'll use a helper to
657                  * avoid branching based on the width at runtime.
658                  */
659                 TCGv slot = tcg_constant_tl(slot_num);
660                 gen_helper_commit_store(cpu_env, slot);
661             }
662         }
663     }
664     if (is_predicated) {
665         gen_set_label(label_end);
666     }
667 }
668 
669 static void process_store_log(DisasContext *ctx)
670 {
671     /*
672      *  When a packet has two stores, the hardware processes
673      *  slot 1 and then slot 0.  This will be important when
674      *  the memory accesses overlap.
675      */
676     Packet *pkt = ctx->pkt;
677     if (pkt->pkt_has_store_s1) {
678         g_assert(!pkt->pkt_has_dczeroa);
679         process_store(ctx, 1);
680     }
681     if (pkt->pkt_has_store_s0) {
682         g_assert(!pkt->pkt_has_dczeroa);
683         process_store(ctx, 0);
684     }
685 }
686 
687 /* Zero out a 32-bit cache line */
688 static void process_dczeroa(DisasContext *ctx)
689 {
690     if (ctx->pkt->pkt_has_dczeroa) {
691         /* Store 32 bytes of zero starting at (addr & ~0x1f) */
692         TCGv addr = tcg_temp_new();
693         TCGv_i64 zero = tcg_constant_i64(0);
694 
695         tcg_gen_andi_tl(addr, hex_dczero_addr, ~0x1f);
696         tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
697         tcg_gen_addi_tl(addr, addr, 8);
698         tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
699         tcg_gen_addi_tl(addr, addr, 8);
700         tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
701         tcg_gen_addi_tl(addr, addr, 8);
702         tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
703     }
704 }
705 
706 static bool pkt_has_hvx_store(Packet *pkt)
707 {
708     int i;
709     for (i = 0; i < pkt->num_insns; i++) {
710         int opcode = pkt->insn[i].opcode;
711         if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) {
712             return true;
713         }
714     }
715     return false;
716 }
717 
718 static void gen_commit_hvx(DisasContext *ctx)
719 {
720     int i;
721 
722     /*
723      *    for (i = 0; i < ctx->vreg_log_idx; i++) {
724      *        int rnum = ctx->vreg_log[i];
725      *        env->VRegs[rnum] = env->future_VRegs[rnum];
726      *    }
727      */
728     for (i = 0; i < ctx->vreg_log_idx; i++) {
729         int rnum = ctx->vreg_log[i];
730         intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]);
731         intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false);
732         size_t size = sizeof(MMVector);
733 
734         tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
735     }
736 
737     /*
738      *    for (i = 0; i < ctx->qreg_log_idx; i++) {
739      *        int rnum = ctx->qreg_log[i];
740      *        env->QRegs[rnum] = env->future_QRegs[rnum];
741      *    }
742      */
743     for (i = 0; i < ctx->qreg_log_idx; i++) {
744         int rnum = ctx->qreg_log[i];
745         intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]);
746         intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]);
747         size_t size = sizeof(MMQReg);
748 
749         tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
750     }
751 
752     if (pkt_has_hvx_store(ctx->pkt)) {
753         gen_helper_commit_hvx_stores(cpu_env);
754     }
755 }
756 
757 static void update_exec_counters(DisasContext *ctx)
758 {
759     Packet *pkt = ctx->pkt;
760     int num_insns = pkt->num_insns;
761     int num_real_insns = 0;
762     int num_hvx_insns = 0;
763 
764     for (int i = 0; i < num_insns; i++) {
765         if (!pkt->insn[i].is_endloop &&
766             !pkt->insn[i].part1 &&
767             !GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) {
768             num_real_insns++;
769         }
770         if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) {
771             num_hvx_insns++;
772         }
773     }
774 
775     ctx->num_packets++;
776     ctx->num_insns += num_real_insns;
777     ctx->num_hvx_insns += num_hvx_insns;
778 }
779 
780 static void gen_commit_packet(DisasContext *ctx)
781 {
782     /*
783      * If there is more than one store in a packet, make sure they are all OK
784      * before proceeding with the rest of the packet commit.
785      *
786      * dczeroa has to be the only store operation in the packet, so we go
787      * ahead and process that first.
788      *
789      * When there is an HVX store, there can also be a scalar store in either
790      * slot 0 or slot1, so we create a mask for the helper to indicate what
791      * work to do.
792      *
793      * When there are two scalar stores, we probe the one in slot 0.
794      *
795      * Note that we don't call the probe helper for packets with only one
796      * store.  Therefore, we call process_store_log before anything else
797      * involved in committing the packet.
798      */
799     Packet *pkt = ctx->pkt;
800     bool has_store_s0 = pkt->pkt_has_store_s0;
801     bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed);
802     bool has_hvx_store = pkt_has_hvx_store(pkt);
803     if (pkt->pkt_has_dczeroa) {
804         /*
805          * The dczeroa will be the store in slot 0, check that we don't have
806          * a store in slot 1 or an HVX store.
807          */
808         g_assert(!has_store_s1 && !has_hvx_store);
809         process_dczeroa(ctx);
810     } else if (has_hvx_store) {
811         if (!has_store_s0 && !has_store_s1) {
812             TCGv mem_idx = tcg_constant_tl(ctx->mem_idx);
813             gen_helper_probe_hvx_stores(cpu_env, mem_idx);
814         } else {
815             int mask = 0;
816 
817             if (has_store_s0) {
818                 mask =
819                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 1);
820             }
821             if (has_store_s1) {
822                 mask =
823                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1);
824             }
825             if (has_hvx_store) {
826                 mask =
827                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
828                                HAS_HVX_STORES, 1);
829             }
830             if (has_store_s0 && slot_is_predicated(pkt, 0)) {
831                 mask =
832                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
833                                S0_IS_PRED, 1);
834             }
835             if (has_store_s1 && slot_is_predicated(pkt, 1)) {
836                 mask =
837                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
838                                S1_IS_PRED, 1);
839             }
840             mask = FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, MMU_IDX,
841                               ctx->mem_idx);
842             gen_helper_probe_pkt_scalar_hvx_stores(cpu_env,
843                                                    tcg_constant_tl(mask));
844         }
845     } else if (has_store_s0 && has_store_s1) {
846         /*
847          * process_store_log will execute the slot 1 store first,
848          * so we only have to probe the store in slot 0
849          */
850         int args = 0;
851         args =
852             FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, ctx->mem_idx);
853         if (slot_is_predicated(pkt, 0)) {
854             args =
855                 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 1);
856         }
857         TCGv args_tcgv = tcg_constant_tl(args);
858         gen_helper_probe_pkt_scalar_store_s0(cpu_env, args_tcgv);
859     }
860 
861     process_store_log(ctx);
862 
863     gen_reg_writes(ctx);
864     gen_pred_writes(ctx);
865     if (pkt->pkt_has_hvx) {
866         gen_commit_hvx(ctx);
867     }
868     update_exec_counters(ctx);
869     if (HEX_DEBUG) {
870         TCGv has_st0 =
871             tcg_constant_tl(pkt->pkt_has_store_s0 && !pkt->pkt_has_dczeroa);
872         TCGv has_st1 =
873             tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa);
874 
875         /* Handy place to set a breakpoint at the end of execution */
876         gen_helper_debug_commit_end(cpu_env, has_st0, has_st1);
877     }
878 
879     if (pkt->vhist_insn != NULL) {
880         ctx->pre_commit = false;
881         ctx->insn = pkt->vhist_insn;
882         pkt->vhist_insn->generate(ctx);
883     }
884 
885     if (pkt->pkt_has_cof) {
886         gen_end_tb(ctx);
887     }
888 }
889 
890 static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
891 {
892     uint32_t words[PACKET_WORDS_MAX];
893     int nwords;
894     Packet pkt;
895     int i;
896 
897     nwords = read_packet_words(env, ctx, words);
898     if (!nwords) {
899         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
900         return;
901     }
902 
903     if (decode_packet(nwords, words, &pkt, false) > 0) {
904         pkt.pc = ctx->base.pc_next;
905         HEX_DEBUG_PRINT_PKT(&pkt);
906         ctx->pkt = &pkt;
907         gen_start_packet(ctx);
908         for (i = 0; i < pkt.num_insns; i++) {
909             ctx->insn = &pkt.insn[i];
910             gen_insn(ctx);
911         }
912         gen_commit_packet(ctx);
913         ctx->base.pc_next += pkt.encod_pkt_size_in_bytes;
914     } else {
915         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
916     }
917 }
918 
919 static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
920                                           CPUState *cs)
921 {
922     DisasContext *ctx = container_of(dcbase, DisasContext, base);
923     uint32_t hex_flags = dcbase->tb->flags;
924 
925     ctx->mem_idx = MMU_USER_IDX;
926     ctx->num_packets = 0;
927     ctx->num_insns = 0;
928     ctx->num_hvx_insns = 0;
929     ctx->branch_cond = TCG_COND_NEVER;
930     ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
931 }
932 
933 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
934 {
935 }
936 
937 static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
938 {
939     DisasContext *ctx = container_of(dcbase, DisasContext, base);
940 
941     tcg_gen_insn_start(ctx->base.pc_next);
942 }
943 
944 static bool pkt_crosses_page(CPUHexagonState *env, DisasContext *ctx)
945 {
946     target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
947     bool found_end = false;
948     int nwords;
949 
950     for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
951         uint32_t word = cpu_ldl_code(env,
952                             ctx->base.pc_next + nwords * sizeof(uint32_t));
953         found_end = is_packet_end(word);
954     }
955     uint32_t next_ptr =  ctx->base.pc_next + nwords * sizeof(uint32_t);
956     return found_end && next_ptr - page_start >= TARGET_PAGE_SIZE;
957 }
958 
959 static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu)
960 {
961     DisasContext *ctx = container_of(dcbase, DisasContext, base);
962     CPUHexagonState *env = cpu->env_ptr;
963 
964     decode_and_translate_packet(env, ctx);
965 
966     if (ctx->base.is_jmp == DISAS_NEXT) {
967         target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
968         target_ulong bytes_max = PACKET_WORDS_MAX * sizeof(target_ulong);
969 
970         if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE ||
971             (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE - bytes_max &&
972              pkt_crosses_page(env, ctx))) {
973             ctx->base.is_jmp = DISAS_TOO_MANY;
974         }
975 
976         /*
977          * The CPU log is used to compare against LLDB single stepping,
978          * so end the TLB after every packet.
979          */
980         HexagonCPU *hex_cpu = env_archcpu(env);
981         if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
982             ctx->base.is_jmp = DISAS_TOO_MANY;
983         }
984     }
985 }
986 
987 static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
988 {
989     DisasContext *ctx = container_of(dcbase, DisasContext, base);
990 
991     switch (ctx->base.is_jmp) {
992     case DISAS_TOO_MANY:
993         gen_exec_counters(ctx);
994         tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next);
995         tcg_gen_exit_tb(NULL, 0);
996         break;
997     case DISAS_NORETURN:
998         break;
999     default:
1000         g_assert_not_reached();
1001     }
1002 }
1003 
1004 static void hexagon_tr_disas_log(const DisasContextBase *dcbase,
1005                                  CPUState *cpu, FILE *logfile)
1006 {
1007     fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first));
1008     target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size);
1009 }
1010 
1011 
1012 static const TranslatorOps hexagon_tr_ops = {
1013     .init_disas_context = hexagon_tr_init_disas_context,
1014     .tb_start           = hexagon_tr_tb_start,
1015     .insn_start         = hexagon_tr_insn_start,
1016     .translate_insn     = hexagon_tr_translate_packet,
1017     .tb_stop            = hexagon_tr_tb_stop,
1018     .disas_log          = hexagon_tr_disas_log,
1019 };
1020 
1021 void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
1022                            target_ulong pc, void *host_pc)
1023 {
1024     DisasContext ctx;
1025 
1026     translator_loop(cs, tb, max_insns, pc, host_pc,
1027                     &hexagon_tr_ops, &ctx.base);
1028 }
1029 
1030 #define NAME_LEN               64
1031 static char new_value_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
1032 static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
1033 static char new_pred_value_names[NUM_PREGS][NAME_LEN];
1034 static char store_addr_names[STORES_MAX][NAME_LEN];
1035 static char store_width_names[STORES_MAX][NAME_LEN];
1036 static char store_val32_names[STORES_MAX][NAME_LEN];
1037 static char store_val64_names[STORES_MAX][NAME_LEN];
1038 static char vstore_addr_names[VSTORES_MAX][NAME_LEN];
1039 static char vstore_size_names[VSTORES_MAX][NAME_LEN];
1040 static char vstore_pending_names[VSTORES_MAX][NAME_LEN];
1041 
1042 void hexagon_translate_init(void)
1043 {
1044     int i;
1045 
1046     opcode_init();
1047 
1048     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
1049         hex_gpr[i] = tcg_global_mem_new(cpu_env,
1050             offsetof(CPUHexagonState, gpr[i]),
1051             hexagon_regnames[i]);
1052 
1053         snprintf(new_value_names[i], NAME_LEN, "new_%s", hexagon_regnames[i]);
1054         hex_new_value[i] = tcg_global_mem_new(cpu_env,
1055             offsetof(CPUHexagonState, new_value[i]),
1056             new_value_names[i]);
1057 
1058         if (HEX_DEBUG) {
1059             snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s",
1060                      hexagon_regnames[i]);
1061             hex_reg_written[i] = tcg_global_mem_new(cpu_env,
1062                 offsetof(CPUHexagonState, reg_written[i]),
1063                 reg_written_names[i]);
1064         }
1065     }
1066     for (i = 0; i < NUM_PREGS; i++) {
1067         hex_pred[i] = tcg_global_mem_new(cpu_env,
1068             offsetof(CPUHexagonState, pred[i]),
1069             hexagon_prednames[i]);
1070 
1071         snprintf(new_pred_value_names[i], NAME_LEN, "new_pred_%s",
1072                  hexagon_prednames[i]);
1073         hex_new_pred_value[i] = tcg_global_mem_new(cpu_env,
1074             offsetof(CPUHexagonState, new_pred_value[i]),
1075             new_pred_value_names[i]);
1076     }
1077     hex_pred_written = tcg_global_mem_new(cpu_env,
1078         offsetof(CPUHexagonState, pred_written), "pred_written");
1079     hex_this_PC = tcg_global_mem_new(cpu_env,
1080         offsetof(CPUHexagonState, this_PC), "this_PC");
1081     hex_slot_cancelled = tcg_global_mem_new(cpu_env,
1082         offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled");
1083     hex_branch_taken = tcg_global_mem_new(cpu_env,
1084         offsetof(CPUHexagonState, branch_taken), "branch_taken");
1085     hex_pkt_has_store_s1 = tcg_global_mem_new(cpu_env,
1086         offsetof(CPUHexagonState, pkt_has_store_s1), "pkt_has_store_s1");
1087     hex_dczero_addr = tcg_global_mem_new(cpu_env,
1088         offsetof(CPUHexagonState, dczero_addr), "dczero_addr");
1089     hex_llsc_addr = tcg_global_mem_new(cpu_env,
1090         offsetof(CPUHexagonState, llsc_addr), "llsc_addr");
1091     hex_llsc_val = tcg_global_mem_new(cpu_env,
1092         offsetof(CPUHexagonState, llsc_val), "llsc_val");
1093     hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env,
1094         offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
1095     for (i = 0; i < STORES_MAX; i++) {
1096         snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
1097         hex_store_addr[i] = tcg_global_mem_new(cpu_env,
1098             offsetof(CPUHexagonState, mem_log_stores[i].va),
1099             store_addr_names[i]);
1100 
1101         snprintf(store_width_names[i], NAME_LEN, "store_width_%d", i);
1102         hex_store_width[i] = tcg_global_mem_new(cpu_env,
1103             offsetof(CPUHexagonState, mem_log_stores[i].width),
1104             store_width_names[i]);
1105 
1106         snprintf(store_val32_names[i], NAME_LEN, "store_val32_%d", i);
1107         hex_store_val32[i] = tcg_global_mem_new(cpu_env,
1108             offsetof(CPUHexagonState, mem_log_stores[i].data32),
1109             store_val32_names[i]);
1110 
1111         snprintf(store_val64_names[i], NAME_LEN, "store_val64_%d", i);
1112         hex_store_val64[i] = tcg_global_mem_new_i64(cpu_env,
1113             offsetof(CPUHexagonState, mem_log_stores[i].data64),
1114             store_val64_names[i]);
1115     }
1116     for (int i = 0; i < VSTORES_MAX; i++) {
1117         snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i);
1118         hex_vstore_addr[i] = tcg_global_mem_new(cpu_env,
1119             offsetof(CPUHexagonState, vstore[i].va),
1120             vstore_addr_names[i]);
1121 
1122         snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i);
1123         hex_vstore_size[i] = tcg_global_mem_new(cpu_env,
1124             offsetof(CPUHexagonState, vstore[i].size),
1125             vstore_size_names[i]);
1126 
1127         snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i);
1128         hex_vstore_pending[i] = tcg_global_mem_new(cpu_env,
1129             offsetof(CPUHexagonState, vstore_pending[i]),
1130             vstore_pending_names[i]);
1131     }
1132 }
1133