xref: /openbmc/qemu/target/hexagon/translate.c (revision b9f0326b)
1 /*
2  *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #define QEMU_GENERATE
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/cpu_ldst.h"
24 #include "exec/log.h"
25 #include "internal.h"
26 #include "attribs.h"
27 #include "insn.h"
28 #include "decode.h"
29 #include "translate.h"
30 #include "printinsn.h"
31 
32 #include "analyze_funcs_generated.c.inc"
33 
34 typedef void (*AnalyzeInsn)(DisasContext *ctx);
35 static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = {
36 #define OPCODE(X)    [X] = analyze_##X
37 #include "opcodes_def_generated.h.inc"
38 #undef OPCODE
39 };
40 
41 TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
42 TCGv hex_pred[NUM_PREGS];
43 TCGv hex_this_PC;
44 TCGv hex_slot_cancelled;
45 TCGv hex_branch_taken;
46 TCGv hex_new_value[TOTAL_PER_THREAD_REGS];
47 TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
48 TCGv hex_new_pred_value[NUM_PREGS];
49 TCGv hex_pred_written;
50 TCGv hex_store_addr[STORES_MAX];
51 TCGv hex_store_width[STORES_MAX];
52 TCGv hex_store_val32[STORES_MAX];
53 TCGv_i64 hex_store_val64[STORES_MAX];
54 TCGv hex_pkt_has_store_s1;
55 TCGv hex_dczero_addr;
56 TCGv hex_llsc_addr;
57 TCGv hex_llsc_val;
58 TCGv_i64 hex_llsc_val_i64;
59 TCGv hex_vstore_addr[VSTORES_MAX];
60 TCGv hex_vstore_size[VSTORES_MAX];
61 TCGv hex_vstore_pending[VSTORES_MAX];
62 
63 static const char * const hexagon_prednames[] = {
64   "p0", "p1", "p2", "p3"
65 };
66 
67 intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
68                           int num, bool alloc_ok)
69 {
70     intptr_t offset;
71 
72     /* See if it is already allocated */
73     for (int i = 0; i < ctx->future_vregs_idx; i++) {
74         if (ctx->future_vregs_num[i] == regnum) {
75             return offsetof(CPUHexagonState, future_VRegs[i]);
76         }
77     }
78 
79     g_assert(alloc_ok);
80     offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]);
81     for (int i = 0; i < num; i++) {
82         ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++;
83     }
84     ctx->future_vregs_idx += num;
85     g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX);
86     return offset;
87 }
88 
89 intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
90                           int num, bool alloc_ok)
91 {
92     intptr_t offset;
93 
94     /* See if it is already allocated */
95     for (int i = 0; i < ctx->tmp_vregs_idx; i++) {
96         if (ctx->tmp_vregs_num[i] == regnum) {
97             return offsetof(CPUHexagonState, tmp_VRegs[i]);
98         }
99     }
100 
101     g_assert(alloc_ok);
102     offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]);
103     for (int i = 0; i < num; i++) {
104         ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++;
105     }
106     ctx->tmp_vregs_idx += num;
107     g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX);
108     return offset;
109 }
110 
111 static void gen_exception_raw(int excp)
112 {
113     gen_helper_raise_exception(cpu_env, tcg_constant_i32(excp));
114 }
115 
116 static void gen_exec_counters(DisasContext *ctx)
117 {
118     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
119                     hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets);
120     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT],
121                     hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
122     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT],
123                     hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
124 }
125 
126 static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
127 {
128     return translator_use_goto_tb(&ctx->base, dest);
129 }
130 
131 static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest, bool
132                         move_to_pc)
133 {
134     if (use_goto_tb(ctx, dest)) {
135         tcg_gen_goto_tb(idx);
136         if (move_to_pc) {
137             tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
138         }
139         tcg_gen_exit_tb(ctx->base.tb, idx);
140     } else {
141         if (move_to_pc) {
142             tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
143         }
144         tcg_gen_lookup_and_goto_ptr();
145     }
146 }
147 
148 static void gen_end_tb(DisasContext *ctx)
149 {
150     Packet *pkt = ctx->pkt;
151 
152     gen_exec_counters(ctx);
153 
154     if (ctx->branch_cond != TCG_COND_NEVER) {
155         if (ctx->branch_cond != TCG_COND_ALWAYS) {
156             TCGLabel *skip = gen_new_label();
157             tcg_gen_brcondi_tl(ctx->branch_cond, hex_branch_taken, 0, skip);
158             gen_goto_tb(ctx, 0, ctx->branch_dest, true);
159             gen_set_label(skip);
160             gen_goto_tb(ctx, 1, ctx->next_PC, false);
161         } else {
162             gen_goto_tb(ctx, 0, ctx->branch_dest, true);
163         }
164     } else if (ctx->is_tight_loop &&
165                pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) {
166         /*
167          * When we're in a tight loop, we defer the endloop0 processing
168          * to take advantage of direct block chaining
169          */
170         TCGLabel *skip = gen_new_label();
171         tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip);
172         tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1);
173         gen_goto_tb(ctx, 0, ctx->base.tb->pc, true);
174         gen_set_label(skip);
175         gen_goto_tb(ctx, 1, ctx->next_PC, false);
176     } else {
177         tcg_gen_lookup_and_goto_ptr();
178     }
179 
180     ctx->base.is_jmp = DISAS_NORETURN;
181 }
182 
183 static void gen_exception_end_tb(DisasContext *ctx, int excp)
184 {
185     gen_exec_counters(ctx);
186     tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC);
187     gen_exception_raw(excp);
188     ctx->base.is_jmp = DISAS_NORETURN;
189 
190 }
191 
192 #define PACKET_BUFFER_LEN              1028
193 static void print_pkt(Packet *pkt)
194 {
195     GString *buf = g_string_sized_new(PACKET_BUFFER_LEN);
196     snprint_a_pkt_debug(buf, pkt);
197     HEX_DEBUG_LOG("%s", buf->str);
198     g_string_free(buf, true);
199 }
200 #define HEX_DEBUG_PRINT_PKT(pkt) \
201     do { \
202         if (HEX_DEBUG) { \
203             print_pkt(pkt); \
204         } \
205     } while (0)
206 
207 static int read_packet_words(CPUHexagonState *env, DisasContext *ctx,
208                              uint32_t words[])
209 {
210     bool found_end = false;
211     int nwords, max_words;
212 
213     memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t));
214     for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
215         words[nwords] =
216             translator_ldl(env, &ctx->base,
217                            ctx->base.pc_next + nwords * sizeof(uint32_t));
218         found_end = is_packet_end(words[nwords]);
219     }
220     if (!found_end) {
221         /* Read too many words without finding the end */
222         return 0;
223     }
224 
225     /* Check for page boundary crossing */
226     max_words = -(ctx->base.pc_next | TARGET_PAGE_MASK) / sizeof(uint32_t);
227     if (nwords > max_words) {
228         /* We can only cross a page boundary at the beginning of a TB */
229         g_assert(ctx->base.num_insns == 1);
230     }
231 
232     HEX_DEBUG_LOG("decode_packet: pc = 0x%x\n", ctx->base.pc_next);
233     HEX_DEBUG_LOG("    words = { ");
234     for (int i = 0; i < nwords; i++) {
235         HEX_DEBUG_LOG("0x%x, ", words[i]);
236     }
237     HEX_DEBUG_LOG("}\n");
238 
239     return nwords;
240 }
241 
242 static G_GNUC_UNUSED bool check_for_attrib(Packet *pkt, int attrib)
243 {
244     for (int i = 0; i < pkt->num_insns; i++) {
245         if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) {
246             return true;
247         }
248     }
249     return false;
250 }
251 
252 static bool need_slot_cancelled(Packet *pkt)
253 {
254     /* We only need slot_cancelled for conditional store instructions */
255     for (int i = 0; i < pkt->num_insns; i++) {
256         uint16_t opcode = pkt->insn[i].opcode;
257         if (GET_ATTRIB(opcode, A_CONDEXEC) &&
258             GET_ATTRIB(opcode, A_SCALAR_STORE)) {
259             return true;
260         }
261     }
262     return false;
263 }
264 
265 static bool need_next_PC(DisasContext *ctx)
266 {
267     Packet *pkt = ctx->pkt;
268 
269     /* Check for conditional control flow or HW loop end */
270     for (int i = 0; i < pkt->num_insns; i++) {
271         uint16_t opcode = pkt->insn[i].opcode;
272         if (GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) {
273             return true;
274         }
275         if (GET_ATTRIB(opcode, A_HWLOOP0_END) ||
276             GET_ATTRIB(opcode, A_HWLOOP1_END)) {
277             return true;
278         }
279     }
280     return false;
281 }
282 
283 /*
284  * The opcode_analyze functions mark most of the writes in a packet
285  * However, there are some implicit writes marked as attributes
286  * of the applicable instructions.
287  */
288 static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum)
289 {
290     uint16_t opcode = ctx->insn->opcode;
291     if (GET_ATTRIB(opcode, attrib)) {
292         /*
293          * USR is used to set overflow and FP exceptions,
294          * so treat it as conditional
295          */
296         bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) ||
297                              rnum == HEX_REG_USR;
298 
299         /* LC0/LC1 is conditionally written by endloop instructions */
300         if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) &&
301             (opcode == J2_endloop0 ||
302              opcode == J2_endloop1 ||
303              opcode == J2_endloop01)) {
304             is_predicated = true;
305         }
306 
307         ctx_log_reg_write(ctx, rnum, is_predicated);
308     }
309 }
310 
311 static void mark_implicit_reg_writes(DisasContext *ctx)
312 {
313     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP,  HEX_REG_FP);
314     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP,  HEX_REG_SP);
315     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR,  HEX_REG_LR);
316     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0);
317     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0);
318     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1);
319     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1);
320     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR);
321     mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR);
322 }
323 
324 static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum)
325 {
326     if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
327         ctx_log_pred_write(ctx, pnum);
328     }
329 }
330 
331 static void mark_implicit_pred_writes(DisasContext *ctx)
332 {
333     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0);
334     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1);
335     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2);
336     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3);
337 }
338 
339 static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum)
340 {
341     if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
342         ctx_log_pred_read(ctx, pnum);
343     }
344 }
345 
346 static void mark_implicit_pred_reads(DisasContext *ctx)
347 {
348     mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P0, 0);
349     mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P1, 1);
350     mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 2);
351     mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 3);
352 }
353 
354 static void analyze_packet(DisasContext *ctx)
355 {
356     Packet *pkt = ctx->pkt;
357     ctx->need_pkt_has_store_s1 = false;
358     for (int i = 0; i < pkt->num_insns; i++) {
359         Insn *insn = &pkt->insn[i];
360         ctx->insn = insn;
361         if (opcode_analyze[insn->opcode]) {
362             opcode_analyze[insn->opcode](ctx);
363         }
364         mark_implicit_reg_writes(ctx);
365         mark_implicit_pred_writes(ctx);
366         mark_implicit_pred_reads(ctx);
367     }
368 }
369 
370 static void gen_start_packet(DisasContext *ctx)
371 {
372     Packet *pkt = ctx->pkt;
373     target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes;
374     int i;
375 
376     /* Clear out the disassembly context */
377     ctx->next_PC = next_PC;
378     ctx->reg_log_idx = 0;
379     bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
380     bitmap_zero(ctx->regs_read, TOTAL_PER_THREAD_REGS);
381     bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
382     ctx->preg_log_idx = 0;
383     bitmap_zero(ctx->pregs_written, NUM_PREGS);
384     bitmap_zero(ctx->pregs_read, NUM_PREGS);
385     ctx->future_vregs_idx = 0;
386     ctx->tmp_vregs_idx = 0;
387     ctx->vreg_log_idx = 0;
388     bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
389     bitmap_zero(ctx->vregs_updated, NUM_VREGS);
390     bitmap_zero(ctx->vregs_select, NUM_VREGS);
391     bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
392     bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
393     bitmap_zero(ctx->vregs_read, NUM_VREGS);
394     bitmap_zero(ctx->qregs_read, NUM_QREGS);
395     ctx->qreg_log_idx = 0;
396     for (i = 0; i < STORES_MAX; i++) {
397         ctx->store_width[i] = 0;
398     }
399     ctx->s1_store_processed = false;
400     ctx->pre_commit = true;
401 
402     analyze_packet(ctx);
403 
404     if (ctx->need_pkt_has_store_s1) {
405         tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1);
406     }
407 
408     /*
409      * pregs_written is used both in the analyze phase as well as the code
410      * gen phase, so clear it again.
411      */
412     bitmap_zero(ctx->pregs_written, NUM_PREGS);
413 
414     if (HEX_DEBUG) {
415         /* Handy place to set a breakpoint before the packet executes */
416         gen_helper_debug_start_packet(cpu_env);
417         tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next);
418     }
419 
420     /* Initialize the runtime state for packet semantics */
421     if (need_slot_cancelled(pkt)) {
422         tcg_gen_movi_tl(hex_slot_cancelled, 0);
423     }
424     if (pkt->pkt_has_cof) {
425         if (pkt->pkt_has_multi_cof) {
426             tcg_gen_movi_tl(hex_branch_taken, 0);
427         }
428         if (need_next_PC(ctx)) {
429             tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC);
430         }
431     }
432     if (HEX_DEBUG) {
433         tcg_gen_movi_tl(hex_pred_written, 0);
434     }
435 
436     /* Preload the predicated registers into hex_new_value[i] */
437     if (!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) {
438         int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
439         while (i < TOTAL_PER_THREAD_REGS) {
440             tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]);
441             i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS,
442                               i + 1);
443         }
444     }
445 
446     /*
447      * Preload the predicated pred registers into hex_new_pred_value[pred_num]
448      * Only endloop instructions conditionally write to pred registers
449      */
450     if (pkt->pkt_has_endloop) {
451         for (int i = 0; i < ctx->preg_log_idx; i++) {
452             int pred_num = ctx->preg_log[i];
453             tcg_gen_mov_tl(hex_new_pred_value[pred_num], hex_pred[pred_num]);
454         }
455     }
456 
457     /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */
458     if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) {
459         int i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS);
460         while (i < NUM_VREGS) {
461             const intptr_t VdV_off =
462                 ctx_future_vreg_off(ctx, i, 1, true);
463             intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
464             tcg_gen_gvec_mov(MO_64, VdV_off,
465                              src_off,
466                              sizeof(MMVector),
467                              sizeof(MMVector));
468             i = find_next_bit(ctx->predicated_future_vregs, NUM_VREGS, i + 1);
469         }
470     }
471     if (!bitmap_empty(ctx->predicated_tmp_vregs, NUM_VREGS)) {
472         int i = find_first_bit(ctx->predicated_tmp_vregs, NUM_VREGS);
473         while (i < NUM_VREGS) {
474             const intptr_t VdV_off =
475                 ctx_tmp_vreg_off(ctx, i, 1, true);
476             intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
477             tcg_gen_gvec_mov(MO_64, VdV_off,
478                              src_off,
479                              sizeof(MMVector),
480                              sizeof(MMVector));
481             i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1);
482         }
483     }
484 }
485 
486 bool is_gather_store_insn(DisasContext *ctx)
487 {
488     Packet *pkt = ctx->pkt;
489     Insn *insn = ctx->insn;
490     if (GET_ATTRIB(insn->opcode, A_CVI_NEW) &&
491         insn->new_value_producer_slot == 1) {
492         /* Look for gather instruction */
493         for (int i = 0; i < pkt->num_insns; i++) {
494             Insn *in = &pkt->insn[i];
495             if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) {
496                 return true;
497             }
498         }
499     }
500     return false;
501 }
502 
503 static void mark_store_width(DisasContext *ctx)
504 {
505     uint16_t opcode = ctx->insn->opcode;
506     uint32_t slot = ctx->insn->slot;
507     uint8_t width = 0;
508 
509     if (GET_ATTRIB(opcode, A_SCALAR_STORE)) {
510         if (GET_ATTRIB(opcode, A_MEMSIZE_0B)) {
511             return;
512         }
513         if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) {
514             width |= 1;
515         }
516         if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) {
517             width |= 2;
518         }
519         if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) {
520             width |= 4;
521         }
522         if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) {
523             width |= 8;
524         }
525         tcg_debug_assert(is_power_of_2(width));
526         ctx->store_width[slot] = width;
527     }
528 }
529 
530 static void gen_insn(DisasContext *ctx)
531 {
532     if (ctx->insn->generate) {
533         ctx->insn->generate(ctx);
534         mark_store_width(ctx);
535     } else {
536         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE);
537     }
538 }
539 
540 /*
541  * Helpers for generating the packet commit
542  */
543 static void gen_reg_writes(DisasContext *ctx)
544 {
545     int i;
546 
547     for (i = 0; i < ctx->reg_log_idx; i++) {
548         int reg_num = ctx->reg_log[i];
549 
550         tcg_gen_mov_tl(hex_gpr[reg_num], hex_new_value[reg_num]);
551 
552         /*
553          * ctx->is_tight_loop is set when SA0 points to the beginning of the TB.
554          * If we write to SA0, we have to turn off tight loop handling.
555          */
556         if (reg_num == HEX_REG_SA0) {
557             ctx->is_tight_loop = false;
558         }
559     }
560 }
561 
562 static void gen_pred_writes(DisasContext *ctx)
563 {
564     /* Early exit if the log is empty */
565     if (!ctx->preg_log_idx) {
566         return;
567     }
568 
569     for (int i = 0; i < ctx->preg_log_idx; i++) {
570         int pred_num = ctx->preg_log[i];
571         tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]);
572     }
573 }
574 
575 static void gen_check_store_width(DisasContext *ctx, int slot_num)
576 {
577     if (HEX_DEBUG) {
578         TCGv slot = tcg_constant_tl(slot_num);
579         TCGv check = tcg_constant_tl(ctx->store_width[slot_num]);
580         gen_helper_debug_check_store_width(cpu_env, slot, check);
581     }
582 }
583 
584 static bool slot_is_predicated(Packet *pkt, int slot_num)
585 {
586     for (int i = 0; i < pkt->num_insns; i++) {
587         if (pkt->insn[i].slot == slot_num) {
588             return GET_ATTRIB(pkt->insn[i].opcode, A_CONDEXEC);
589         }
590     }
591     /* If we get to here, we didn't find an instruction in the requested slot */
592     g_assert_not_reached();
593 }
594 
595 void process_store(DisasContext *ctx, int slot_num)
596 {
597     bool is_predicated = slot_is_predicated(ctx->pkt, slot_num);
598     TCGLabel *label_end = NULL;
599 
600     /*
601      * We may have already processed this store
602      * See CHECK_NOSHUF in macros.h
603      */
604     if (slot_num == 1 && ctx->s1_store_processed) {
605         return;
606     }
607     ctx->s1_store_processed = true;
608 
609     if (is_predicated) {
610         TCGv cancelled = tcg_temp_new();
611         label_end = gen_new_label();
612 
613         /* Don't do anything if the slot was cancelled */
614         tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
615         tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
616     }
617     {
618         TCGv address = tcg_temp_new();
619         tcg_gen_mov_tl(address, hex_store_addr[slot_num]);
620 
621         /*
622          * If we know the width from the DisasContext, we can
623          * generate much cleaner code.
624          * Unfortunately, not all instructions execute the fSTORE
625          * macro during code generation.  Anything that uses the
626          * generic helper will have this problem.  Instructions
627          * that use fWRAP to generate proper TCG code will be OK.
628          */
629         switch (ctx->store_width[slot_num]) {
630         case 1:
631             gen_check_store_width(ctx, slot_num);
632             tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
633                                hex_store_addr[slot_num],
634                                ctx->mem_idx, MO_UB);
635             break;
636         case 2:
637             gen_check_store_width(ctx, slot_num);
638             tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
639                                hex_store_addr[slot_num],
640                                ctx->mem_idx, MO_TEUW);
641             break;
642         case 4:
643             gen_check_store_width(ctx, slot_num);
644             tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
645                                hex_store_addr[slot_num],
646                                ctx->mem_idx, MO_TEUL);
647             break;
648         case 8:
649             gen_check_store_width(ctx, slot_num);
650             tcg_gen_qemu_st_i64(hex_store_val64[slot_num],
651                                 hex_store_addr[slot_num],
652                                 ctx->mem_idx, MO_TEUQ);
653             break;
654         default:
655             {
656                 /*
657                  * If we get to here, we don't know the width at
658                  * TCG generation time, we'll use a helper to
659                  * avoid branching based on the width at runtime.
660                  */
661                 TCGv slot = tcg_constant_tl(slot_num);
662                 gen_helper_commit_store(cpu_env, slot);
663             }
664         }
665     }
666     if (is_predicated) {
667         gen_set_label(label_end);
668     }
669 }
670 
671 static void process_store_log(DisasContext *ctx)
672 {
673     /*
674      *  When a packet has two stores, the hardware processes
675      *  slot 1 and then slot 0.  This will be important when
676      *  the memory accesses overlap.
677      */
678     Packet *pkt = ctx->pkt;
679     if (pkt->pkt_has_store_s1) {
680         g_assert(!pkt->pkt_has_dczeroa);
681         process_store(ctx, 1);
682     }
683     if (pkt->pkt_has_store_s0) {
684         g_assert(!pkt->pkt_has_dczeroa);
685         process_store(ctx, 0);
686     }
687 }
688 
689 /* Zero out a 32-bit cache line */
690 static void process_dczeroa(DisasContext *ctx)
691 {
692     if (ctx->pkt->pkt_has_dczeroa) {
693         /* Store 32 bytes of zero starting at (addr & ~0x1f) */
694         TCGv addr = tcg_temp_new();
695         TCGv_i64 zero = tcg_constant_i64(0);
696 
697         tcg_gen_andi_tl(addr, hex_dczero_addr, ~0x1f);
698         tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
699         tcg_gen_addi_tl(addr, addr, 8);
700         tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
701         tcg_gen_addi_tl(addr, addr, 8);
702         tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
703         tcg_gen_addi_tl(addr, addr, 8);
704         tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
705     }
706 }
707 
708 static bool pkt_has_hvx_store(Packet *pkt)
709 {
710     int i;
711     for (i = 0; i < pkt->num_insns; i++) {
712         int opcode = pkt->insn[i].opcode;
713         if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) {
714             return true;
715         }
716     }
717     return false;
718 }
719 
720 static void gen_commit_hvx(DisasContext *ctx)
721 {
722     int i;
723 
724     /*
725      *    for (i = 0; i < ctx->vreg_log_idx; i++) {
726      *        int rnum = ctx->vreg_log[i];
727      *        env->VRegs[rnum] = env->future_VRegs[rnum];
728      *    }
729      */
730     for (i = 0; i < ctx->vreg_log_idx; i++) {
731         int rnum = ctx->vreg_log[i];
732         intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]);
733         intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false);
734         size_t size = sizeof(MMVector);
735 
736         tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
737     }
738 
739     /*
740      *    for (i = 0; i < ctx->qreg_log_idx; i++) {
741      *        int rnum = ctx->qreg_log[i];
742      *        env->QRegs[rnum] = env->future_QRegs[rnum];
743      *    }
744      */
745     for (i = 0; i < ctx->qreg_log_idx; i++) {
746         int rnum = ctx->qreg_log[i];
747         intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]);
748         intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]);
749         size_t size = sizeof(MMQReg);
750 
751         tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
752     }
753 
754     if (pkt_has_hvx_store(ctx->pkt)) {
755         gen_helper_commit_hvx_stores(cpu_env);
756     }
757 }
758 
759 static void update_exec_counters(DisasContext *ctx)
760 {
761     Packet *pkt = ctx->pkt;
762     int num_insns = pkt->num_insns;
763     int num_real_insns = 0;
764     int num_hvx_insns = 0;
765 
766     for (int i = 0; i < num_insns; i++) {
767         if (!pkt->insn[i].is_endloop &&
768             !pkt->insn[i].part1 &&
769             !GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) {
770             num_real_insns++;
771         }
772         if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) {
773             num_hvx_insns++;
774         }
775     }
776 
777     ctx->num_packets++;
778     ctx->num_insns += num_real_insns;
779     ctx->num_hvx_insns += num_hvx_insns;
780 }
781 
782 static void gen_commit_packet(DisasContext *ctx)
783 {
784     /*
785      * If there is more than one store in a packet, make sure they are all OK
786      * before proceeding with the rest of the packet commit.
787      *
788      * dczeroa has to be the only store operation in the packet, so we go
789      * ahead and process that first.
790      *
791      * When there is an HVX store, there can also be a scalar store in either
792      * slot 0 or slot1, so we create a mask for the helper to indicate what
793      * work to do.
794      *
795      * When there are two scalar stores, we probe the one in slot 0.
796      *
797      * Note that we don't call the probe helper for packets with only one
798      * store.  Therefore, we call process_store_log before anything else
799      * involved in committing the packet.
800      */
801     Packet *pkt = ctx->pkt;
802     bool has_store_s0 = pkt->pkt_has_store_s0;
803     bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed);
804     bool has_hvx_store = pkt_has_hvx_store(pkt);
805     if (pkt->pkt_has_dczeroa) {
806         /*
807          * The dczeroa will be the store in slot 0, check that we don't have
808          * a store in slot 1 or an HVX store.
809          */
810         g_assert(!has_store_s1 && !has_hvx_store);
811         process_dczeroa(ctx);
812     } else if (has_hvx_store) {
813         if (!has_store_s0 && !has_store_s1) {
814             TCGv mem_idx = tcg_constant_tl(ctx->mem_idx);
815             gen_helper_probe_hvx_stores(cpu_env, mem_idx);
816         } else {
817             int mask = 0;
818 
819             if (has_store_s0) {
820                 mask =
821                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 1);
822             }
823             if (has_store_s1) {
824                 mask =
825                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1);
826             }
827             if (has_hvx_store) {
828                 mask =
829                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
830                                HAS_HVX_STORES, 1);
831             }
832             if (has_store_s0 && slot_is_predicated(pkt, 0)) {
833                 mask =
834                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
835                                S0_IS_PRED, 1);
836             }
837             if (has_store_s1 && slot_is_predicated(pkt, 1)) {
838                 mask =
839                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
840                                S1_IS_PRED, 1);
841             }
842             mask = FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, MMU_IDX,
843                               ctx->mem_idx);
844             gen_helper_probe_pkt_scalar_hvx_stores(cpu_env,
845                                                    tcg_constant_tl(mask));
846         }
847     } else if (has_store_s0 && has_store_s1) {
848         /*
849          * process_store_log will execute the slot 1 store first,
850          * so we only have to probe the store in slot 0
851          */
852         int args = 0;
853         args =
854             FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, ctx->mem_idx);
855         if (slot_is_predicated(pkt, 0)) {
856             args =
857                 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 1);
858         }
859         TCGv args_tcgv = tcg_constant_tl(args);
860         gen_helper_probe_pkt_scalar_store_s0(cpu_env, args_tcgv);
861     }
862 
863     process_store_log(ctx);
864 
865     gen_reg_writes(ctx);
866     gen_pred_writes(ctx);
867     if (pkt->pkt_has_hvx) {
868         gen_commit_hvx(ctx);
869     }
870     update_exec_counters(ctx);
871     if (HEX_DEBUG) {
872         TCGv has_st0 =
873             tcg_constant_tl(pkt->pkt_has_store_s0 && !pkt->pkt_has_dczeroa);
874         TCGv has_st1 =
875             tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa);
876 
877         /* Handy place to set a breakpoint at the end of execution */
878         gen_helper_debug_commit_end(cpu_env, has_st0, has_st1);
879     }
880 
881     if (pkt->vhist_insn != NULL) {
882         ctx->pre_commit = false;
883         ctx->insn = pkt->vhist_insn;
884         pkt->vhist_insn->generate(ctx);
885     }
886 
887     if (pkt->pkt_has_cof) {
888         gen_end_tb(ctx);
889     }
890 }
891 
892 static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
893 {
894     uint32_t words[PACKET_WORDS_MAX];
895     int nwords;
896     Packet pkt;
897     int i;
898 
899     nwords = read_packet_words(env, ctx, words);
900     if (!nwords) {
901         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
902         return;
903     }
904 
905     if (decode_packet(nwords, words, &pkt, false) > 0) {
906         pkt.pc = ctx->base.pc_next;
907         HEX_DEBUG_PRINT_PKT(&pkt);
908         ctx->pkt = &pkt;
909         gen_start_packet(ctx);
910         for (i = 0; i < pkt.num_insns; i++) {
911             ctx->insn = &pkt.insn[i];
912             gen_insn(ctx);
913         }
914         gen_commit_packet(ctx);
915         ctx->base.pc_next += pkt.encod_pkt_size_in_bytes;
916     } else {
917         gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
918     }
919 }
920 
921 static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
922                                           CPUState *cs)
923 {
924     DisasContext *ctx = container_of(dcbase, DisasContext, base);
925     uint32_t hex_flags = dcbase->tb->flags;
926 
927     ctx->mem_idx = MMU_USER_IDX;
928     ctx->num_packets = 0;
929     ctx->num_insns = 0;
930     ctx->num_hvx_insns = 0;
931     ctx->branch_cond = TCG_COND_NEVER;
932     ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
933 }
934 
935 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
936 {
937 }
938 
939 static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
940 {
941     DisasContext *ctx = container_of(dcbase, DisasContext, base);
942 
943     tcg_gen_insn_start(ctx->base.pc_next);
944 }
945 
946 static bool pkt_crosses_page(CPUHexagonState *env, DisasContext *ctx)
947 {
948     target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
949     bool found_end = false;
950     int nwords;
951 
952     for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
953         uint32_t word = cpu_ldl_code(env,
954                             ctx->base.pc_next + nwords * sizeof(uint32_t));
955         found_end = is_packet_end(word);
956     }
957     uint32_t next_ptr =  ctx->base.pc_next + nwords * sizeof(uint32_t);
958     return found_end && next_ptr - page_start >= TARGET_PAGE_SIZE;
959 }
960 
961 static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu)
962 {
963     DisasContext *ctx = container_of(dcbase, DisasContext, base);
964     CPUHexagonState *env = cpu->env_ptr;
965 
966     decode_and_translate_packet(env, ctx);
967 
968     if (ctx->base.is_jmp == DISAS_NEXT) {
969         target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
970         target_ulong bytes_max = PACKET_WORDS_MAX * sizeof(target_ulong);
971 
972         if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE ||
973             (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE - bytes_max &&
974              pkt_crosses_page(env, ctx))) {
975             ctx->base.is_jmp = DISAS_TOO_MANY;
976         }
977 
978         /*
979          * The CPU log is used to compare against LLDB single stepping,
980          * so end the TLB after every packet.
981          */
982         HexagonCPU *hex_cpu = env_archcpu(env);
983         if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
984             ctx->base.is_jmp = DISAS_TOO_MANY;
985         }
986     }
987 }
988 
989 static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
990 {
991     DisasContext *ctx = container_of(dcbase, DisasContext, base);
992 
993     switch (ctx->base.is_jmp) {
994     case DISAS_TOO_MANY:
995         gen_exec_counters(ctx);
996         tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next);
997         tcg_gen_exit_tb(NULL, 0);
998         break;
999     case DISAS_NORETURN:
1000         break;
1001     default:
1002         g_assert_not_reached();
1003     }
1004 }
1005 
1006 static void hexagon_tr_disas_log(const DisasContextBase *dcbase,
1007                                  CPUState *cpu, FILE *logfile)
1008 {
1009     fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first));
1010     target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size);
1011 }
1012 
1013 
1014 static const TranslatorOps hexagon_tr_ops = {
1015     .init_disas_context = hexagon_tr_init_disas_context,
1016     .tb_start           = hexagon_tr_tb_start,
1017     .insn_start         = hexagon_tr_insn_start,
1018     .translate_insn     = hexagon_tr_translate_packet,
1019     .tb_stop            = hexagon_tr_tb_stop,
1020     .disas_log          = hexagon_tr_disas_log,
1021 };
1022 
1023 void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
1024                            target_ulong pc, void *host_pc)
1025 {
1026     DisasContext ctx;
1027 
1028     translator_loop(cs, tb, max_insns, pc, host_pc,
1029                     &hexagon_tr_ops, &ctx.base);
1030 }
1031 
1032 #define NAME_LEN               64
1033 static char new_value_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
1034 static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
1035 static char new_pred_value_names[NUM_PREGS][NAME_LEN];
1036 static char store_addr_names[STORES_MAX][NAME_LEN];
1037 static char store_width_names[STORES_MAX][NAME_LEN];
1038 static char store_val32_names[STORES_MAX][NAME_LEN];
1039 static char store_val64_names[STORES_MAX][NAME_LEN];
1040 static char vstore_addr_names[VSTORES_MAX][NAME_LEN];
1041 static char vstore_size_names[VSTORES_MAX][NAME_LEN];
1042 static char vstore_pending_names[VSTORES_MAX][NAME_LEN];
1043 
1044 void hexagon_translate_init(void)
1045 {
1046     int i;
1047 
1048     opcode_init();
1049 
1050     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
1051         hex_gpr[i] = tcg_global_mem_new(cpu_env,
1052             offsetof(CPUHexagonState, gpr[i]),
1053             hexagon_regnames[i]);
1054 
1055         snprintf(new_value_names[i], NAME_LEN, "new_%s", hexagon_regnames[i]);
1056         hex_new_value[i] = tcg_global_mem_new(cpu_env,
1057             offsetof(CPUHexagonState, new_value[i]),
1058             new_value_names[i]);
1059 
1060         if (HEX_DEBUG) {
1061             snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s",
1062                      hexagon_regnames[i]);
1063             hex_reg_written[i] = tcg_global_mem_new(cpu_env,
1064                 offsetof(CPUHexagonState, reg_written[i]),
1065                 reg_written_names[i]);
1066         }
1067     }
1068     for (i = 0; i < NUM_PREGS; i++) {
1069         hex_pred[i] = tcg_global_mem_new(cpu_env,
1070             offsetof(CPUHexagonState, pred[i]),
1071             hexagon_prednames[i]);
1072 
1073         snprintf(new_pred_value_names[i], NAME_LEN, "new_pred_%s",
1074                  hexagon_prednames[i]);
1075         hex_new_pred_value[i] = tcg_global_mem_new(cpu_env,
1076             offsetof(CPUHexagonState, new_pred_value[i]),
1077             new_pred_value_names[i]);
1078     }
1079     hex_pred_written = tcg_global_mem_new(cpu_env,
1080         offsetof(CPUHexagonState, pred_written), "pred_written");
1081     hex_this_PC = tcg_global_mem_new(cpu_env,
1082         offsetof(CPUHexagonState, this_PC), "this_PC");
1083     hex_slot_cancelled = tcg_global_mem_new(cpu_env,
1084         offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled");
1085     hex_branch_taken = tcg_global_mem_new(cpu_env,
1086         offsetof(CPUHexagonState, branch_taken), "branch_taken");
1087     hex_pkt_has_store_s1 = tcg_global_mem_new(cpu_env,
1088         offsetof(CPUHexagonState, pkt_has_store_s1), "pkt_has_store_s1");
1089     hex_dczero_addr = tcg_global_mem_new(cpu_env,
1090         offsetof(CPUHexagonState, dczero_addr), "dczero_addr");
1091     hex_llsc_addr = tcg_global_mem_new(cpu_env,
1092         offsetof(CPUHexagonState, llsc_addr), "llsc_addr");
1093     hex_llsc_val = tcg_global_mem_new(cpu_env,
1094         offsetof(CPUHexagonState, llsc_val), "llsc_val");
1095     hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env,
1096         offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
1097     for (i = 0; i < STORES_MAX; i++) {
1098         snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
1099         hex_store_addr[i] = tcg_global_mem_new(cpu_env,
1100             offsetof(CPUHexagonState, mem_log_stores[i].va),
1101             store_addr_names[i]);
1102 
1103         snprintf(store_width_names[i], NAME_LEN, "store_width_%d", i);
1104         hex_store_width[i] = tcg_global_mem_new(cpu_env,
1105             offsetof(CPUHexagonState, mem_log_stores[i].width),
1106             store_width_names[i]);
1107 
1108         snprintf(store_val32_names[i], NAME_LEN, "store_val32_%d", i);
1109         hex_store_val32[i] = tcg_global_mem_new(cpu_env,
1110             offsetof(CPUHexagonState, mem_log_stores[i].data32),
1111             store_val32_names[i]);
1112 
1113         snprintf(store_val64_names[i], NAME_LEN, "store_val64_%d", i);
1114         hex_store_val64[i] = tcg_global_mem_new_i64(cpu_env,
1115             offsetof(CPUHexagonState, mem_log_stores[i].data64),
1116             store_val64_names[i]);
1117     }
1118     for (int i = 0; i < VSTORES_MAX; i++) {
1119         snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i);
1120         hex_vstore_addr[i] = tcg_global_mem_new(cpu_env,
1121             offsetof(CPUHexagonState, vstore[i].va),
1122             vstore_addr_names[i]);
1123 
1124         snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i);
1125         hex_vstore_size[i] = tcg_global_mem_new(cpu_env,
1126             offsetof(CPUHexagonState, vstore[i].size),
1127             vstore_size_names[i]);
1128 
1129         snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i);
1130         hex_vstore_pending[i] = tcg_global_mem_new(cpu_env,
1131             offsetof(CPUHexagonState, vstore_pending[i]),
1132             vstore_pending_names[i]);
1133     }
1134 }
1135