xref: /openbmc/qemu/target/hexagon/translate.c (revision 1770b2f2d3d6fe8f1e2d61692692264cac44340d)
1 /*
2  *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #define QEMU_GENERATE
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/cpu_ldst.h"
24 #include "exec/log.h"
25 #include "internal.h"
26 #include "attribs.h"
27 #include "insn.h"
28 #include "decode.h"
29 #include "translate.h"
30 #include "printinsn.h"
31 
32 TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
33 TCGv hex_pred[NUM_PREGS];
34 TCGv hex_this_PC;
35 TCGv hex_slot_cancelled;
36 TCGv hex_branch_taken;
37 TCGv hex_new_value[TOTAL_PER_THREAD_REGS];
38 TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
39 TCGv hex_new_pred_value[NUM_PREGS];
40 TCGv hex_pred_written;
41 TCGv hex_store_addr[STORES_MAX];
42 TCGv hex_store_width[STORES_MAX];
43 TCGv hex_store_val32[STORES_MAX];
44 TCGv_i64 hex_store_val64[STORES_MAX];
45 TCGv hex_pkt_has_store_s1;
46 TCGv hex_dczero_addr;
47 TCGv hex_llsc_addr;
48 TCGv hex_llsc_val;
49 TCGv_i64 hex_llsc_val_i64;
50 TCGv hex_VRegs_updated;
51 TCGv hex_QRegs_updated;
52 TCGv hex_vstore_addr[VSTORES_MAX];
53 TCGv hex_vstore_size[VSTORES_MAX];
54 TCGv hex_vstore_pending[VSTORES_MAX];
55 
56 static const char * const hexagon_prednames[] = {
57   "p0", "p1", "p2", "p3"
58 };
59 
60 intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
61                           int num, bool alloc_ok)
62 {
63     intptr_t offset;
64 
65     /* See if it is already allocated */
66     for (int i = 0; i < ctx->future_vregs_idx; i++) {
67         if (ctx->future_vregs_num[i] == regnum) {
68             return offsetof(CPUHexagonState, future_VRegs[i]);
69         }
70     }
71 
72     g_assert(alloc_ok);
73     offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]);
74     for (int i = 0; i < num; i++) {
75         ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++;
76     }
77     ctx->future_vregs_idx += num;
78     g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX);
79     return offset;
80 }
81 
82 intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
83                           int num, bool alloc_ok)
84 {
85     intptr_t offset;
86 
87     /* See if it is already allocated */
88     for (int i = 0; i < ctx->tmp_vregs_idx; i++) {
89         if (ctx->tmp_vregs_num[i] == regnum) {
90             return offsetof(CPUHexagonState, tmp_VRegs[i]);
91         }
92     }
93 
94     g_assert(alloc_ok);
95     offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]);
96     for (int i = 0; i < num; i++) {
97         ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++;
98     }
99     ctx->tmp_vregs_idx += num;
100     g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX);
101     return offset;
102 }
103 
104 static void gen_exception_raw(int excp)
105 {
106     gen_helper_raise_exception(cpu_env, tcg_constant_i32(excp));
107 }
108 
109 static void gen_exec_counters(DisasContext *ctx)
110 {
111     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
112                     hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets);
113     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT],
114                     hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
115     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT],
116                     hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
117 }
118 
119 static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
120 {
121     return translator_use_goto_tb(&ctx->base, dest);
122 }
123 
124 static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest)
125 {
126     if (use_goto_tb(ctx, dest)) {
127         tcg_gen_goto_tb(idx);
128         tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
129         tcg_gen_exit_tb(ctx->base.tb, idx);
130     } else {
131         tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
132         tcg_gen_lookup_and_goto_ptr();
133     }
134 }
135 
136 static void gen_end_tb(DisasContext *ctx)
137 {
138     Packet *pkt = ctx->pkt;
139 
140     gen_exec_counters(ctx);
141 
142     if (ctx->branch_cond != TCG_COND_NEVER) {
143         if (ctx->branch_cond != TCG_COND_ALWAYS) {
144             TCGLabel *skip = gen_new_label();
145             tcg_gen_brcondi_tl(ctx->branch_cond, hex_branch_taken, 0, skip);
146             gen_goto_tb(ctx, 0, ctx->branch_dest);
147             gen_set_label(skip);
148             gen_goto_tb(ctx, 1, ctx->next_PC);
149         } else {
150             gen_goto_tb(ctx, 0, ctx->branch_dest);
151         }
152     } else if (ctx->is_tight_loop &&
153                pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) {
154         /*
155          * When we're in a tight loop, we defer the endloop0 processing
156          * to take advantage of direct block chaining
157          */
158         TCGLabel *skip = gen_new_label();
159         tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip);
160         tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1);
161         gen_goto_tb(ctx, 0, ctx->base.tb->pc);
162         gen_set_label(skip);
163         gen_goto_tb(ctx, 1, ctx->next_PC);
164     } else {
165         tcg_gen_lookup_and_goto_ptr();
166     }
167 
168     ctx->base.is_jmp = DISAS_NORETURN;
169 }
170 
171 static void gen_exception_end_tb(DisasContext *ctx, int excp)
172 {
173     gen_exec_counters(ctx);
174     tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC);
175     gen_exception_raw(excp);
176     ctx->base.is_jmp = DISAS_NORETURN;
177 
178 }
179 
180 #define PACKET_BUFFER_LEN              1028
181 static void print_pkt(Packet *pkt)
182 {
183     GString *buf = g_string_sized_new(PACKET_BUFFER_LEN);
184     snprint_a_pkt_debug(buf, pkt);
185     HEX_DEBUG_LOG("%s", buf->str);
186     g_string_free(buf, true);
187 }
188 #define HEX_DEBUG_PRINT_PKT(pkt) \
189     do { \
190         if (HEX_DEBUG) { \
191             print_pkt(pkt); \
192         } \
193     } while (0)
194 
195 static int read_packet_words(CPUHexagonState *env, DisasContext *ctx,
196                              uint32_t words[])
197 {
198     bool found_end = false;
199     int nwords, max_words;
200 
201     memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t));
202     for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
203         words[nwords] =
204             translator_ldl(env, &ctx->base,
205                            ctx->base.pc_next + nwords * sizeof(uint32_t));
206         found_end = is_packet_end(words[nwords]);
207     }
208     if (!found_end) {
209         /* Read too many words without finding the end */
210         return 0;
211     }
212 
213     /* Check for page boundary crossing */
214     max_words = -(ctx->base.pc_next | TARGET_PAGE_MASK) / sizeof(uint32_t);
215     if (nwords > max_words) {
216         /* We can only cross a page boundary at the beginning of a TB */
217         g_assert(ctx->base.num_insns == 1);
218     }
219 
220     HEX_DEBUG_LOG("decode_packet: pc = 0x%x\n", ctx->base.pc_next);
221     HEX_DEBUG_LOG("    words = { ");
222     for (int i = 0; i < nwords; i++) {
223         HEX_DEBUG_LOG("0x%x, ", words[i]);
224     }
225     HEX_DEBUG_LOG("}\n");
226 
227     return nwords;
228 }
229 
230 static bool check_for_attrib(Packet *pkt, int attrib)
231 {
232     for (int i = 0; i < pkt->num_insns; i++) {
233         if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) {
234             return true;
235         }
236     }
237     return false;
238 }
239 
240 static bool need_slot_cancelled(Packet *pkt)
241 {
242     return check_for_attrib(pkt, A_CONDEXEC);
243 }
244 
245 static bool need_pred_written(Packet *pkt)
246 {
247     return check_for_attrib(pkt, A_WRITES_PRED_REG);
248 }
249 
250 static bool need_next_PC(DisasContext *ctx)
251 {
252     Packet *pkt = ctx->pkt;
253 
254     /* Check for conditional control flow or HW loop end */
255     for (int i = 0; i < pkt->num_insns; i++) {
256         uint16_t opcode = pkt->insn[i].opcode;
257         if (GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) {
258             return true;
259         }
260         if (GET_ATTRIB(opcode, A_HWLOOP0_END) ||
261             GET_ATTRIB(opcode, A_HWLOOP1_END)) {
262             return true;
263         }
264     }
265     return false;
266 }
267 
268 static void gen_start_packet(DisasContext *ctx)
269 {
270     Packet *pkt = ctx->pkt;
271     target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes;
272     int i;
273 
274     /* Clear out the disassembly context */
275     ctx->next_PC = next_PC;
276     ctx->reg_log_idx = 0;
277     bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
278     ctx->preg_log_idx = 0;
279     bitmap_zero(ctx->pregs_written, NUM_PREGS);
280     ctx->future_vregs_idx = 0;
281     ctx->tmp_vregs_idx = 0;
282     ctx->vreg_log_idx = 0;
283     bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
284     bitmap_zero(ctx->vregs_updated, NUM_VREGS);
285     bitmap_zero(ctx->vregs_select, NUM_VREGS);
286     ctx->qreg_log_idx = 0;
287     for (i = 0; i < STORES_MAX; i++) {
288         ctx->store_width[i] = 0;
289     }
290     tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1);
291     ctx->s1_store_processed = false;
292     ctx->pre_commit = true;
293 
294     if (HEX_DEBUG) {
295         /* Handy place to set a breakpoint before the packet executes */
296         gen_helper_debug_start_packet(cpu_env);
297         tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next);
298     }
299 
300     /* Initialize the runtime state for packet semantics */
301     if (need_slot_cancelled(pkt)) {
302         tcg_gen_movi_tl(hex_slot_cancelled, 0);
303     }
304     if (pkt->pkt_has_cof) {
305         if (pkt->pkt_has_multi_cof) {
306             tcg_gen_movi_tl(hex_branch_taken, 0);
307         }
308         if (need_next_PC(ctx)) {
309             tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC);
310         }
311     }
312     if (need_pred_written(pkt)) {
313         tcg_gen_movi_tl(hex_pred_written, 0);
314     }
315 
316     if (pkt->pkt_has_hvx) {
317         tcg_gen_movi_tl(hex_VRegs_updated, 0);
318         tcg_gen_movi_tl(hex_QRegs_updated, 0);
319     }
320 }
321 
322 bool is_gather_store_insn(DisasContext *ctx)
323 {
324     Packet *pkt = ctx->pkt;
325     Insn *insn = ctx->insn;
326     if (GET_ATTRIB(insn->opcode, A_CVI_NEW) &&
327         insn->new_value_producer_slot == 1) {
328         /* Look for gather instruction */
329         for (int i = 0; i < pkt->num_insns; i++) {
330             Insn *in = &pkt->insn[i];
331             if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) {
332                 return true;
333             }
334         }
335     }
336     return false;
337 }
338 
339 /*
340  * The LOG_*_WRITE macros mark most of the writes in a packet
341  * However, there are some implicit writes marked as attributes
342  * of the applicable instructions.
343  */
344 static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum)
345 {
346     uint16_t opcode = ctx->insn->opcode;
347     if (GET_ATTRIB(opcode, attrib)) {
348         /*
349          * USR is used to set overflow and FP exceptions,
350          * so treat it as conditional
351          */
352         bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) ||
353                              rnum == HEX_REG_USR;
354 
355         /* LC0/LC1 is conditionally written by endloop instructions */
356         if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) &&
357             (opcode == J2_endloop0 ||
358              opcode == J2_endloop1 ||
359              opcode == J2_endloop01)) {
360             is_predicated = true;
361         }
362 
363         if (is_predicated && !is_preloaded(ctx, rnum)) {
364             tcg_gen_mov_tl(hex_new_value[rnum], hex_gpr[rnum]);
365         }
366 
367         ctx_log_reg_write(ctx, rnum);
368     }
369 }
370 
371 static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum)
372 {
373     if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
374         ctx_log_pred_write(ctx, pnum);
375     }
376 }
377 
378 static void mark_implicit_reg_writes(DisasContext *ctx)
379 {
380     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP,  HEX_REG_FP);
381     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP,  HEX_REG_SP);
382     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR,  HEX_REG_LR);
383     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0);
384     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0);
385     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1);
386     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1);
387     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR);
388     mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR);
389 }
390 
391 static void mark_implicit_pred_writes(DisasContext *ctx)
392 {
393     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0);
394     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1);
395     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2);
396     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3);
397 }
398 
399 static void mark_store_width(DisasContext *ctx)
400 {
401     uint16_t opcode = ctx->insn->opcode;
402     uint32_t slot = ctx->insn->slot;
403     uint8_t width = 0;
404 
405     if (GET_ATTRIB(opcode, A_SCALAR_STORE)) {
406         if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) {
407             width |= 1;
408         }
409         if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) {
410             width |= 2;
411         }
412         if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) {
413             width |= 4;
414         }
415         if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) {
416             width |= 8;
417         }
418         tcg_debug_assert(is_power_of_2(width));
419         ctx->store_width[slot] = width;
420     }
421 }
422 
423 static void gen_insn(DisasContext *ctx)
424 {
425     if (ctx->insn->generate) {
426         mark_implicit_reg_writes(ctx);
427         ctx->insn->generate(ctx);
428         mark_implicit_pred_writes(ctx);
429         mark_store_width(ctx);
430     } else {
431         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE);
432     }
433 }
434 
435 /*
436  * Helpers for generating the packet commit
437  */
438 static void gen_reg_writes(DisasContext *ctx)
439 {
440     int i;
441 
442     for (i = 0; i < ctx->reg_log_idx; i++) {
443         int reg_num = ctx->reg_log[i];
444 
445         tcg_gen_mov_tl(hex_gpr[reg_num], hex_new_value[reg_num]);
446 
447         /*
448          * ctx->is_tight_loop is set when SA0 points to the beginning of the TB.
449          * If we write to SA0, we have to turn off tight loop handling.
450          */
451         if (reg_num == HEX_REG_SA0) {
452             ctx->is_tight_loop = false;
453         }
454     }
455 }
456 
457 static void gen_pred_writes(DisasContext *ctx)
458 {
459     int i;
460 
461     /* Early exit if the log is empty */
462     if (!ctx->preg_log_idx) {
463         return;
464     }
465 
466     /*
467      * Only endloop instructions will conditionally
468      * write a predicate.  If there are no endloop
469      * instructions, we can use the non-conditional
470      * write of the predicates.
471      */
472     if (ctx->pkt->pkt_has_endloop) {
473         TCGv zero = tcg_constant_tl(0);
474         TCGv pred_written = tcg_temp_new();
475         for (i = 0; i < ctx->preg_log_idx; i++) {
476             int pred_num = ctx->preg_log[i];
477 
478             tcg_gen_andi_tl(pred_written, hex_pred_written, 1 << pred_num);
479             tcg_gen_movcond_tl(TCG_COND_NE, hex_pred[pred_num],
480                                pred_written, zero,
481                                hex_new_pred_value[pred_num],
482                                hex_pred[pred_num]);
483         }
484         tcg_temp_free(pred_written);
485     } else {
486         for (i = 0; i < ctx->preg_log_idx; i++) {
487             int pred_num = ctx->preg_log[i];
488             tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]);
489             if (HEX_DEBUG) {
490                 /* Do this so HELPER(debug_commit_end) will know */
491                 tcg_gen_ori_tl(hex_pred_written, hex_pred_written,
492                                1 << pred_num);
493             }
494         }
495     }
496 }
497 
498 static void gen_check_store_width(DisasContext *ctx, int slot_num)
499 {
500     if (HEX_DEBUG) {
501         TCGv slot = tcg_constant_tl(slot_num);
502         TCGv check = tcg_constant_tl(ctx->store_width[slot_num]);
503         gen_helper_debug_check_store_width(cpu_env, slot, check);
504     }
505 }
506 
507 static bool slot_is_predicated(Packet *pkt, int slot_num)
508 {
509     for (int i = 0; i < pkt->num_insns; i++) {
510         if (pkt->insn[i].slot == slot_num) {
511             return GET_ATTRIB(pkt->insn[i].opcode, A_CONDEXEC);
512         }
513     }
514     /* If we get to here, we didn't find an instruction in the requested slot */
515     g_assert_not_reached();
516 }
517 
518 void process_store(DisasContext *ctx, int slot_num)
519 {
520     bool is_predicated = slot_is_predicated(ctx->pkt, slot_num);
521     TCGLabel *label_end = NULL;
522 
523     /*
524      * We may have already processed this store
525      * See CHECK_NOSHUF in macros.h
526      */
527     if (slot_num == 1 && ctx->s1_store_processed) {
528         return;
529     }
530     ctx->s1_store_processed = true;
531 
532     if (is_predicated) {
533         TCGv cancelled = tcg_temp_new();
534         label_end = gen_new_label();
535 
536         /* Don't do anything if the slot was cancelled */
537         tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
538         tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
539         tcg_temp_free(cancelled);
540     }
541     {
542         TCGv address = tcg_temp_local_new();
543         tcg_gen_mov_tl(address, hex_store_addr[slot_num]);
544 
545         /*
546          * If we know the width from the DisasContext, we can
547          * generate much cleaner code.
548          * Unfortunately, not all instructions execute the fSTORE
549          * macro during code generation.  Anything that uses the
550          * generic helper will have this problem.  Instructions
551          * that use fWRAP to generate proper TCG code will be OK.
552          */
553         switch (ctx->store_width[slot_num]) {
554         case 1:
555             gen_check_store_width(ctx, slot_num);
556             tcg_gen_qemu_st8(hex_store_val32[slot_num],
557                              hex_store_addr[slot_num],
558                              ctx->mem_idx);
559             break;
560         case 2:
561             gen_check_store_width(ctx, slot_num);
562             tcg_gen_qemu_st16(hex_store_val32[slot_num],
563                               hex_store_addr[slot_num],
564                               ctx->mem_idx);
565             break;
566         case 4:
567             gen_check_store_width(ctx, slot_num);
568             tcg_gen_qemu_st32(hex_store_val32[slot_num],
569                               hex_store_addr[slot_num],
570                               ctx->mem_idx);
571             break;
572         case 8:
573             gen_check_store_width(ctx, slot_num);
574             tcg_gen_qemu_st64(hex_store_val64[slot_num],
575                               hex_store_addr[slot_num],
576                               ctx->mem_idx);
577             break;
578         default:
579             {
580                 /*
581                  * If we get to here, we don't know the width at
582                  * TCG generation time, we'll use a helper to
583                  * avoid branching based on the width at runtime.
584                  */
585                 TCGv slot = tcg_constant_tl(slot_num);
586                 gen_helper_commit_store(cpu_env, slot);
587             }
588         }
589         tcg_temp_free(address);
590     }
591     if (is_predicated) {
592         gen_set_label(label_end);
593     }
594 }
595 
596 static void process_store_log(DisasContext *ctx)
597 {
598     /*
599      *  When a packet has two stores, the hardware processes
600      *  slot 1 and then slot 0.  This will be important when
601      *  the memory accesses overlap.
602      */
603     Packet *pkt = ctx->pkt;
604     if (pkt->pkt_has_store_s1) {
605         g_assert(!pkt->pkt_has_dczeroa);
606         process_store(ctx, 1);
607     }
608     if (pkt->pkt_has_store_s0) {
609         g_assert(!pkt->pkt_has_dczeroa);
610         process_store(ctx, 0);
611     }
612 }
613 
614 /* Zero out a 32-bit cache line */
615 static void process_dczeroa(DisasContext *ctx)
616 {
617     if (ctx->pkt->pkt_has_dczeroa) {
618         /* Store 32 bytes of zero starting at (addr & ~0x1f) */
619         TCGv addr = tcg_temp_new();
620         TCGv_i64 zero = tcg_constant_i64(0);
621 
622         tcg_gen_andi_tl(addr, hex_dczero_addr, ~0x1f);
623         tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
624         tcg_gen_addi_tl(addr, addr, 8);
625         tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
626         tcg_gen_addi_tl(addr, addr, 8);
627         tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
628         tcg_gen_addi_tl(addr, addr, 8);
629         tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
630 
631         tcg_temp_free(addr);
632     }
633 }
634 
635 static bool pkt_has_hvx_store(Packet *pkt)
636 {
637     int i;
638     for (i = 0; i < pkt->num_insns; i++) {
639         int opcode = pkt->insn[i].opcode;
640         if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) {
641             return true;
642         }
643     }
644     return false;
645 }
646 
647 static void gen_commit_hvx(DisasContext *ctx)
648 {
649     int i;
650 
651     /*
652      *    for (i = 0; i < ctx->vreg_log_idx; i++) {
653      *        int rnum = ctx->vreg_log[i];
654      *        if (ctx->vreg_is_predicated[i]) {
655      *            if (env->VRegs_updated & (1 << rnum)) {
656      *                env->VRegs[rnum] = env->future_VRegs[rnum];
657      *            }
658      *        } else {
659      *            env->VRegs[rnum] = env->future_VRegs[rnum];
660      *        }
661      *    }
662      */
663     for (i = 0; i < ctx->vreg_log_idx; i++) {
664         int rnum = ctx->vreg_log[i];
665         bool is_predicated = ctx->vreg_is_predicated[i];
666         intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]);
667         intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false);
668         size_t size = sizeof(MMVector);
669 
670         if (is_predicated) {
671             TCGv cmp = tcg_temp_new();
672             TCGLabel *label_skip = gen_new_label();
673 
674             tcg_gen_andi_tl(cmp, hex_VRegs_updated, 1 << rnum);
675             tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip);
676             tcg_temp_free(cmp);
677             tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
678             gen_set_label(label_skip);
679         } else {
680             tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
681         }
682     }
683 
684     /*
685      *    for (i = 0; i < ctx->qreg_log_idx; i++) {
686      *        int rnum = ctx->qreg_log[i];
687      *        if (ctx->qreg_is_predicated[i]) {
688      *            if (env->QRegs_updated) & (1 << rnum)) {
689      *                env->QRegs[rnum] = env->future_QRegs[rnum];
690      *            }
691      *        } else {
692      *            env->QRegs[rnum] = env->future_QRegs[rnum];
693      *        }
694      *    }
695      */
696     for (i = 0; i < ctx->qreg_log_idx; i++) {
697         int rnum = ctx->qreg_log[i];
698         bool is_predicated = ctx->qreg_is_predicated[i];
699         intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]);
700         intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]);
701         size_t size = sizeof(MMQReg);
702 
703         if (is_predicated) {
704             TCGv cmp = tcg_temp_new();
705             TCGLabel *label_skip = gen_new_label();
706 
707             tcg_gen_andi_tl(cmp, hex_QRegs_updated, 1 << rnum);
708             tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip);
709             tcg_temp_free(cmp);
710             tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
711             gen_set_label(label_skip);
712         } else {
713             tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
714         }
715     }
716 
717     if (pkt_has_hvx_store(ctx->pkt)) {
718         gen_helper_commit_hvx_stores(cpu_env);
719     }
720 }
721 
722 static void update_exec_counters(DisasContext *ctx)
723 {
724     Packet *pkt = ctx->pkt;
725     int num_insns = pkt->num_insns;
726     int num_real_insns = 0;
727     int num_hvx_insns = 0;
728 
729     for (int i = 0; i < num_insns; i++) {
730         if (!pkt->insn[i].is_endloop &&
731             !pkt->insn[i].part1 &&
732             !GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) {
733             num_real_insns++;
734         }
735         if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) {
736             num_hvx_insns++;
737         }
738     }
739 
740     ctx->num_packets++;
741     ctx->num_insns += num_real_insns;
742     ctx->num_hvx_insns += num_hvx_insns;
743 }
744 
745 static void gen_commit_packet(DisasContext *ctx)
746 {
747     /*
748      * If there is more than one store in a packet, make sure they are all OK
749      * before proceeding with the rest of the packet commit.
750      *
751      * dczeroa has to be the only store operation in the packet, so we go
752      * ahead and process that first.
753      *
754      * When there is an HVX store, there can also be a scalar store in either
755      * slot 0 or slot1, so we create a mask for the helper to indicate what
756      * work to do.
757      *
758      * When there are two scalar stores, we probe the one in slot 0.
759      *
760      * Note that we don't call the probe helper for packets with only one
761      * store.  Therefore, we call process_store_log before anything else
762      * involved in committing the packet.
763      */
764     Packet *pkt = ctx->pkt;
765     bool has_store_s0 = pkt->pkt_has_store_s0;
766     bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed);
767     bool has_hvx_store = pkt_has_hvx_store(pkt);
768     if (pkt->pkt_has_dczeroa) {
769         /*
770          * The dczeroa will be the store in slot 0, check that we don't have
771          * a store in slot 1 or an HVX store.
772          */
773         g_assert(!has_store_s1 && !has_hvx_store);
774         process_dczeroa(ctx);
775     } else if (has_hvx_store) {
776         TCGv mem_idx = tcg_constant_tl(ctx->mem_idx);
777 
778         if (!has_store_s0 && !has_store_s1) {
779             gen_helper_probe_hvx_stores(cpu_env, mem_idx);
780         } else {
781             int mask = 0;
782             TCGv mask_tcgv;
783 
784             if (has_store_s0) {
785                 mask |= (1 << 0);
786             }
787             if (has_store_s1) {
788                 mask |= (1 << 1);
789             }
790             if (has_hvx_store) {
791                 mask |= (1 << 2);
792             }
793             mask_tcgv = tcg_constant_tl(mask);
794             gen_helper_probe_pkt_scalar_hvx_stores(cpu_env, mask_tcgv, mem_idx);
795         }
796     } else if (has_store_s0 && has_store_s1) {
797         /*
798          * process_store_log will execute the slot 1 store first,
799          * so we only have to probe the store in slot 0
800          */
801         TCGv mem_idx = tcg_constant_tl(ctx->mem_idx);
802         gen_helper_probe_pkt_scalar_store_s0(cpu_env, mem_idx);
803     }
804 
805     process_store_log(ctx);
806 
807     gen_reg_writes(ctx);
808     gen_pred_writes(ctx);
809     if (pkt->pkt_has_hvx) {
810         gen_commit_hvx(ctx);
811     }
812     update_exec_counters(ctx);
813     if (HEX_DEBUG) {
814         TCGv has_st0 =
815             tcg_constant_tl(pkt->pkt_has_store_s0 && !pkt->pkt_has_dczeroa);
816         TCGv has_st1 =
817             tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa);
818 
819         /* Handy place to set a breakpoint at the end of execution */
820         gen_helper_debug_commit_end(cpu_env, has_st0, has_st1);
821     }
822 
823     if (pkt->vhist_insn != NULL) {
824         ctx->pre_commit = false;
825         ctx->insn = pkt->vhist_insn;
826         pkt->vhist_insn->generate(ctx);
827     }
828 
829     if (pkt->pkt_has_cof) {
830         gen_end_tb(ctx);
831     }
832 }
833 
834 static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
835 {
836     uint32_t words[PACKET_WORDS_MAX];
837     int nwords;
838     Packet pkt;
839     int i;
840 
841     nwords = read_packet_words(env, ctx, words);
842     if (!nwords) {
843         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
844         return;
845     }
846 
847     if (decode_packet(nwords, words, &pkt, false) > 0) {
848         pkt.pc = ctx->base.pc_next;
849         HEX_DEBUG_PRINT_PKT(&pkt);
850         ctx->pkt = &pkt;
851         gen_start_packet(ctx);
852         for (i = 0; i < pkt.num_insns; i++) {
853             ctx->insn = &pkt.insn[i];
854             gen_insn(ctx);
855         }
856         gen_commit_packet(ctx);
857         ctx->base.pc_next += pkt.encod_pkt_size_in_bytes;
858     } else {
859         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
860     }
861 }
862 
863 static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
864                                           CPUState *cs)
865 {
866     DisasContext *ctx = container_of(dcbase, DisasContext, base);
867     uint32_t hex_flags = dcbase->tb->flags;
868 
869     ctx->mem_idx = MMU_USER_IDX;
870     ctx->num_packets = 0;
871     ctx->num_insns = 0;
872     ctx->num_hvx_insns = 0;
873     ctx->branch_cond = TCG_COND_NEVER;
874     ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
875 }
876 
877 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
878 {
879 }
880 
881 static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
882 {
883     DisasContext *ctx = container_of(dcbase, DisasContext, base);
884 
885     tcg_gen_insn_start(ctx->base.pc_next);
886 }
887 
888 static bool pkt_crosses_page(CPUHexagonState *env, DisasContext *ctx)
889 {
890     target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
891     bool found_end = false;
892     int nwords;
893 
894     for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
895         uint32_t word = cpu_ldl_code(env,
896                             ctx->base.pc_next + nwords * sizeof(uint32_t));
897         found_end = is_packet_end(word);
898     }
899     uint32_t next_ptr =  ctx->base.pc_next + nwords * sizeof(uint32_t);
900     return found_end && next_ptr - page_start >= TARGET_PAGE_SIZE;
901 }
902 
903 static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu)
904 {
905     DisasContext *ctx = container_of(dcbase, DisasContext, base);
906     CPUHexagonState *env = cpu->env_ptr;
907 
908     decode_and_translate_packet(env, ctx);
909 
910     if (ctx->base.is_jmp == DISAS_NEXT) {
911         target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
912         target_ulong bytes_max = PACKET_WORDS_MAX * sizeof(target_ulong);
913 
914         if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE ||
915             (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE - bytes_max &&
916              pkt_crosses_page(env, ctx))) {
917             ctx->base.is_jmp = DISAS_TOO_MANY;
918         }
919 
920         /*
921          * The CPU log is used to compare against LLDB single stepping,
922          * so end the TLB after every packet.
923          */
924         HexagonCPU *hex_cpu = env_archcpu(env);
925         if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
926             ctx->base.is_jmp = DISAS_TOO_MANY;
927         }
928     }
929 }
930 
931 static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
932 {
933     DisasContext *ctx = container_of(dcbase, DisasContext, base);
934 
935     switch (ctx->base.is_jmp) {
936     case DISAS_TOO_MANY:
937         gen_exec_counters(ctx);
938         tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next);
939         tcg_gen_exit_tb(NULL, 0);
940         break;
941     case DISAS_NORETURN:
942         break;
943     default:
944         g_assert_not_reached();
945     }
946 }
947 
948 static void hexagon_tr_disas_log(const DisasContextBase *dcbase,
949                                  CPUState *cpu, FILE *logfile)
950 {
951     fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first));
952     target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size);
953 }
954 
955 
956 static const TranslatorOps hexagon_tr_ops = {
957     .init_disas_context = hexagon_tr_init_disas_context,
958     .tb_start           = hexagon_tr_tb_start,
959     .insn_start         = hexagon_tr_insn_start,
960     .translate_insn     = hexagon_tr_translate_packet,
961     .tb_stop            = hexagon_tr_tb_stop,
962     .disas_log          = hexagon_tr_disas_log,
963 };
964 
965 void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
966                            target_ulong pc, void *host_pc)
967 {
968     DisasContext ctx;
969 
970     translator_loop(cs, tb, max_insns, pc, host_pc,
971                     &hexagon_tr_ops, &ctx.base);
972 }
973 
974 #define NAME_LEN               64
975 static char new_value_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
976 static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
977 static char new_pred_value_names[NUM_PREGS][NAME_LEN];
978 static char store_addr_names[STORES_MAX][NAME_LEN];
979 static char store_width_names[STORES_MAX][NAME_LEN];
980 static char store_val32_names[STORES_MAX][NAME_LEN];
981 static char store_val64_names[STORES_MAX][NAME_LEN];
982 static char vstore_addr_names[VSTORES_MAX][NAME_LEN];
983 static char vstore_size_names[VSTORES_MAX][NAME_LEN];
984 static char vstore_pending_names[VSTORES_MAX][NAME_LEN];
985 
986 void hexagon_translate_init(void)
987 {
988     int i;
989 
990     opcode_init();
991 
992     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
993         hex_gpr[i] = tcg_global_mem_new(cpu_env,
994             offsetof(CPUHexagonState, gpr[i]),
995             hexagon_regnames[i]);
996 
997         snprintf(new_value_names[i], NAME_LEN, "new_%s", hexagon_regnames[i]);
998         hex_new_value[i] = tcg_global_mem_new(cpu_env,
999             offsetof(CPUHexagonState, new_value[i]),
1000             new_value_names[i]);
1001 
1002         if (HEX_DEBUG) {
1003             snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s",
1004                      hexagon_regnames[i]);
1005             hex_reg_written[i] = tcg_global_mem_new(cpu_env,
1006                 offsetof(CPUHexagonState, reg_written[i]),
1007                 reg_written_names[i]);
1008         }
1009     }
1010     for (i = 0; i < NUM_PREGS; i++) {
1011         hex_pred[i] = tcg_global_mem_new(cpu_env,
1012             offsetof(CPUHexagonState, pred[i]),
1013             hexagon_prednames[i]);
1014 
1015         snprintf(new_pred_value_names[i], NAME_LEN, "new_pred_%s",
1016                  hexagon_prednames[i]);
1017         hex_new_pred_value[i] = tcg_global_mem_new(cpu_env,
1018             offsetof(CPUHexagonState, new_pred_value[i]),
1019             new_pred_value_names[i]);
1020     }
1021     hex_pred_written = tcg_global_mem_new(cpu_env,
1022         offsetof(CPUHexagonState, pred_written), "pred_written");
1023     hex_this_PC = tcg_global_mem_new(cpu_env,
1024         offsetof(CPUHexagonState, this_PC), "this_PC");
1025     hex_slot_cancelled = tcg_global_mem_new(cpu_env,
1026         offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled");
1027     hex_branch_taken = tcg_global_mem_new(cpu_env,
1028         offsetof(CPUHexagonState, branch_taken), "branch_taken");
1029     hex_pkt_has_store_s1 = tcg_global_mem_new(cpu_env,
1030         offsetof(CPUHexagonState, pkt_has_store_s1), "pkt_has_store_s1");
1031     hex_dczero_addr = tcg_global_mem_new(cpu_env,
1032         offsetof(CPUHexagonState, dczero_addr), "dczero_addr");
1033     hex_llsc_addr = tcg_global_mem_new(cpu_env,
1034         offsetof(CPUHexagonState, llsc_addr), "llsc_addr");
1035     hex_llsc_val = tcg_global_mem_new(cpu_env,
1036         offsetof(CPUHexagonState, llsc_val), "llsc_val");
1037     hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env,
1038         offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
1039     hex_VRegs_updated = tcg_global_mem_new(cpu_env,
1040         offsetof(CPUHexagonState, VRegs_updated), "VRegs_updated");
1041     hex_QRegs_updated = tcg_global_mem_new(cpu_env,
1042         offsetof(CPUHexagonState, QRegs_updated), "QRegs_updated");
1043     for (i = 0; i < STORES_MAX; i++) {
1044         snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
1045         hex_store_addr[i] = tcg_global_mem_new(cpu_env,
1046             offsetof(CPUHexagonState, mem_log_stores[i].va),
1047             store_addr_names[i]);
1048 
1049         snprintf(store_width_names[i], NAME_LEN, "store_width_%d", i);
1050         hex_store_width[i] = tcg_global_mem_new(cpu_env,
1051             offsetof(CPUHexagonState, mem_log_stores[i].width),
1052             store_width_names[i]);
1053 
1054         snprintf(store_val32_names[i], NAME_LEN, "store_val32_%d", i);
1055         hex_store_val32[i] = tcg_global_mem_new(cpu_env,
1056             offsetof(CPUHexagonState, mem_log_stores[i].data32),
1057             store_val32_names[i]);
1058 
1059         snprintf(store_val64_names[i], NAME_LEN, "store_val64_%d", i);
1060         hex_store_val64[i] = tcg_global_mem_new_i64(cpu_env,
1061             offsetof(CPUHexagonState, mem_log_stores[i].data64),
1062             store_val64_names[i]);
1063     }
1064     for (int i = 0; i < VSTORES_MAX; i++) {
1065         snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i);
1066         hex_vstore_addr[i] = tcg_global_mem_new(cpu_env,
1067             offsetof(CPUHexagonState, vstore[i].va),
1068             vstore_addr_names[i]);
1069 
1070         snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i);
1071         hex_vstore_size[i] = tcg_global_mem_new(cpu_env,
1072             offsetof(CPUHexagonState, vstore[i].size),
1073             vstore_size_names[i]);
1074 
1075         snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i);
1076         hex_vstore_pending[i] = tcg_global_mem_new(cpu_env,
1077             offsetof(CPUHexagonState, vstore_pending[i]),
1078             vstore_pending_names[i]);
1079     }
1080 }
1081