xref: /openbmc/qemu/target/hexagon/translate.c (revision 1cab5a02ab8144aad2abd001835e49104e4aae0f)
1 /*
2  *  Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #define QEMU_GENERATE
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/helper-gen.h"
24 #include "exec/helper-proto.h"
25 #include "exec/translation-block.h"
26 #include "accel/tcg/cpu-ldst.h"
27 #include "exec/log.h"
28 #include "internal.h"
29 #include "attribs.h"
30 #include "insn.h"
31 #include "decode.h"
32 #include "translate.h"
33 #include "genptr.h"
34 #include "printinsn.h"
35 
36 #define HELPER_H "helper.h"
37 #include "exec/helper-info.c.inc"
38 #undef  HELPER_H
39 
40 #include "analyze_funcs_generated.c.inc"
41 
42 typedef void (*AnalyzeInsn)(DisasContext *ctx);
43 static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = {
44 #define OPCODE(X)    [X] = analyze_##X
45 #include "opcodes_def_generated.h.inc"
46 #undef OPCODE
47 };
48 
49 TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
50 TCGv hex_pred[NUM_PREGS];
51 TCGv hex_slot_cancelled;
52 TCGv hex_new_value_usr;
53 TCGv hex_store_addr[STORES_MAX];
54 TCGv hex_store_width[STORES_MAX];
55 TCGv hex_store_val32[STORES_MAX];
56 TCGv_i64 hex_store_val64[STORES_MAX];
57 TCGv hex_llsc_addr;
58 TCGv hex_llsc_val;
59 TCGv_i64 hex_llsc_val_i64;
60 TCGv hex_vstore_addr[VSTORES_MAX];
61 TCGv hex_vstore_size[VSTORES_MAX];
62 TCGv hex_vstore_pending[VSTORES_MAX];
63 
64 static const char * const hexagon_prednames[] = {
65   "p0", "p1", "p2", "p3"
66 };
67 
68 intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
69                           int num, bool alloc_ok)
70 {
71     intptr_t offset;
72 
73     if (!ctx->need_commit) {
74         return offsetof(CPUHexagonState, VRegs[regnum]);
75     }
76 
77     /* See if it is already allocated */
78     for (int i = 0; i < ctx->future_vregs_idx; i++) {
79         if (ctx->future_vregs_num[i] == regnum) {
80             return offsetof(CPUHexagonState, future_VRegs[i]);
81         }
82     }
83 
84     g_assert(alloc_ok);
85     offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]);
86     for (int i = 0; i < num; i++) {
87         ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++;
88     }
89     ctx->future_vregs_idx += num;
90     g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX);
91     return offset;
92 }
93 
94 intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
95                           int num, bool alloc_ok)
96 {
97     intptr_t offset;
98 
99     /* See if it is already allocated */
100     for (int i = 0; i < ctx->tmp_vregs_idx; i++) {
101         if (ctx->tmp_vregs_num[i] == regnum) {
102             return offsetof(CPUHexagonState, tmp_VRegs[i]);
103         }
104     }
105 
106     g_assert(alloc_ok);
107     offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]);
108     for (int i = 0; i < num; i++) {
109         ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++;
110     }
111     ctx->tmp_vregs_idx += num;
112     g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX);
113     return offset;
114 }
115 
116 static void gen_exception_raw(int excp)
117 {
118     gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp));
119 }
120 
121 static void gen_exec_counters(DisasContext *ctx)
122 {
123     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
124                     hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets);
125     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT],
126                     hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
127     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT],
128                     hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
129 }
130 
131 static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
132 {
133     return translator_use_goto_tb(&ctx->base, dest);
134 }
135 
136 static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest, bool
137                         move_to_pc)
138 {
139     if (use_goto_tb(ctx, dest)) {
140         tcg_gen_goto_tb(idx);
141         if (move_to_pc) {
142             tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
143         }
144         tcg_gen_exit_tb(ctx->base.tb, idx);
145     } else {
146         if (move_to_pc) {
147             tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
148         }
149         tcg_gen_lookup_and_goto_ptr();
150     }
151 }
152 
153 static void gen_end_tb(DisasContext *ctx)
154 {
155     Packet *pkt = ctx->pkt;
156 
157     gen_exec_counters(ctx);
158 
159     if (ctx->branch_cond != TCG_COND_NEVER) {
160         if (ctx->branch_cond != TCG_COND_ALWAYS) {
161             TCGLabel *skip = gen_new_label();
162             tcg_gen_brcondi_tl(ctx->branch_cond, ctx->branch_taken, 0, skip);
163             gen_goto_tb(ctx, 0, ctx->branch_dest, true);
164             gen_set_label(skip);
165             gen_goto_tb(ctx, 1, ctx->next_PC, false);
166         } else {
167             gen_goto_tb(ctx, 0, ctx->branch_dest, true);
168         }
169     } else if (ctx->is_tight_loop &&
170                pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) {
171         /*
172          * When we're in a tight loop, we defer the endloop0 processing
173          * to take advantage of direct block chaining
174          */
175         TCGLabel *skip = gen_new_label();
176         tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip);
177         tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1);
178         gen_goto_tb(ctx, 0, ctx->base.tb->pc, true);
179         gen_set_label(skip);
180         gen_goto_tb(ctx, 1, ctx->next_PC, false);
181     } else {
182         tcg_gen_lookup_and_goto_ptr();
183     }
184 
185     ctx->base.is_jmp = DISAS_NORETURN;
186 }
187 
188 static void gen_exception_end_tb(DisasContext *ctx, int excp)
189 {
190     gen_exec_counters(ctx);
191     tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC);
192     gen_exception_raw(excp);
193     ctx->base.is_jmp = DISAS_NORETURN;
194 
195 }
196 
197 static int read_packet_words(CPUHexagonState *env, DisasContext *ctx,
198                              uint32_t words[])
199 {
200     bool found_end = false;
201     int nwords, max_words;
202 
203     memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t));
204     for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
205         words[nwords] =
206             translator_ldl(env, &ctx->base,
207                            ctx->base.pc_next + nwords * sizeof(uint32_t));
208         found_end = is_packet_end(words[nwords]);
209     }
210     if (!found_end) {
211         /* Read too many words without finding the end */
212         return 0;
213     }
214 
215     /* Check for page boundary crossing */
216     max_words = -(ctx->base.pc_next | TARGET_PAGE_MASK) / sizeof(uint32_t);
217     if (nwords > max_words) {
218         /* We can only cross a page boundary at the beginning of a TB */
219         g_assert(ctx->base.num_insns == 1);
220     }
221 
222     return nwords;
223 }
224 
225 static bool check_for_attrib(Packet *pkt, int attrib)
226 {
227     for (int i = 0; i < pkt->num_insns; i++) {
228         if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) {
229             return true;
230         }
231     }
232     return false;
233 }
234 
235 static bool need_slot_cancelled(Packet *pkt)
236 {
237     /* We only need slot_cancelled for conditional store instructions */
238     for (int i = 0; i < pkt->num_insns; i++) {
239         uint16_t opcode = pkt->insn[i].opcode;
240         if (GET_ATTRIB(opcode, A_CONDEXEC) &&
241             GET_ATTRIB(opcode, A_SCALAR_STORE)) {
242             return true;
243         }
244     }
245     return false;
246 }
247 
248 static bool need_next_PC(DisasContext *ctx)
249 {
250     Packet *pkt = ctx->pkt;
251 
252     /* Check for conditional control flow or HW loop end */
253     for (int i = 0; i < pkt->num_insns; i++) {
254         uint16_t opcode = pkt->insn[i].opcode;
255         if (GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) {
256             return true;
257         }
258         if (GET_ATTRIB(opcode, A_HWLOOP0_END) ||
259             GET_ATTRIB(opcode, A_HWLOOP1_END)) {
260             return true;
261         }
262     }
263     return false;
264 }
265 
266 /*
267  * The opcode_analyze functions mark most of the writes in a packet
268  * However, there are some implicit writes marked as attributes
269  * of the applicable instructions.
270  */
271 static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum)
272 {
273     uint16_t opcode = ctx->insn->opcode;
274     if (GET_ATTRIB(opcode, attrib)) {
275         /*
276          * USR is used to set overflow and FP exceptions,
277          * so treat it as conditional
278          */
279         bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) ||
280                              rnum == HEX_REG_USR;
281 
282         /* LC0/LC1 is conditionally written by endloop instructions */
283         if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) &&
284             (opcode == J2_endloop0 ||
285              opcode == J2_endloop1 ||
286              opcode == J2_endloop01)) {
287             is_predicated = true;
288         }
289 
290         ctx_log_reg_write(ctx, rnum, is_predicated);
291     }
292 }
293 
294 static void mark_implicit_reg_writes(DisasContext *ctx)
295 {
296     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP,  HEX_REG_FP);
297     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP,  HEX_REG_SP);
298     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR,  HEX_REG_LR);
299     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0);
300     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0);
301     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1);
302     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1);
303     mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR);
304     mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR);
305 }
306 
307 static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum)
308 {
309     if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
310         ctx_log_pred_write(ctx, pnum);
311     }
312 }
313 
314 static void mark_implicit_pred_writes(DisasContext *ctx)
315 {
316     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0);
317     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1);
318     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2);
319     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3);
320 }
321 
322 static bool pkt_raises_exception(Packet *pkt)
323 {
324     if (check_for_attrib(pkt, A_LOAD) ||
325         check_for_attrib(pkt, A_STORE)) {
326         return true;
327     }
328     return false;
329 }
330 
331 static bool need_commit(DisasContext *ctx)
332 {
333     Packet *pkt = ctx->pkt;
334 
335     /*
336      * If the short-circuit property is set to false, we'll always do the commit
337      */
338     if (!ctx->short_circuit) {
339         return true;
340     }
341 
342     if (pkt_raises_exception(pkt)) {
343         return true;
344     }
345 
346     /* Registers with immutability flags require new_value */
347     for (int i = 0; i < ctx->reg_log_idx; i++) {
348         int rnum = ctx->reg_log[i];
349         if (reg_immut_masks[rnum]) {
350             return true;
351         }
352     }
353 
354     /* Floating point instructions are hard-coded to use new_value */
355     if (check_for_attrib(pkt, A_FPOP)) {
356         return true;
357     }
358 
359     if (ctx->read_after_write || ctx->has_hvx_overlap) {
360         return true;
361     }
362 
363     return false;
364 }
365 
366 static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum)
367 {
368     if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
369         ctx_log_pred_read(ctx, pnum);
370     }
371 }
372 
373 static void mark_implicit_pred_reads(DisasContext *ctx)
374 {
375     mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P0, 0);
376     mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P1, 1);
377     mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 2);
378     mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 3);
379 }
380 
381 static void analyze_packet(DisasContext *ctx)
382 {
383     Packet *pkt = ctx->pkt;
384     ctx->read_after_write = false;
385     ctx->has_hvx_overlap = false;
386     for (int i = 0; i < pkt->num_insns; i++) {
387         Insn *insn = &pkt->insn[i];
388         ctx->insn = insn;
389         if (opcode_analyze[insn->opcode]) {
390             opcode_analyze[insn->opcode](ctx);
391         }
392         mark_implicit_reg_writes(ctx);
393         mark_implicit_pred_writes(ctx);
394         mark_implicit_pred_reads(ctx);
395     }
396 
397     ctx->need_commit = need_commit(ctx);
398 }
399 
400 static void gen_start_packet(DisasContext *ctx)
401 {
402     Packet *pkt = ctx->pkt;
403     target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes;
404     int i;
405 
406     /* Clear out the disassembly context */
407     ctx->next_PC = next_PC;
408     ctx->reg_log_idx = 0;
409     bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
410     bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
411     ctx->preg_log_idx = 0;
412     bitmap_zero(ctx->pregs_written, NUM_PREGS);
413     ctx->future_vregs_idx = 0;
414     ctx->tmp_vregs_idx = 0;
415     ctx->vreg_log_idx = 0;
416     bitmap_zero(ctx->vregs_written, NUM_VREGS);
417     bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
418     bitmap_zero(ctx->vregs_updated, NUM_VREGS);
419     bitmap_zero(ctx->vregs_select, NUM_VREGS);
420     bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
421     bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
422     bitmap_zero(ctx->qregs_written, NUM_QREGS);
423     ctx->qreg_log_idx = 0;
424     for (i = 0; i < STORES_MAX; i++) {
425         ctx->store_width[i] = 0;
426     }
427     ctx->s1_store_processed = false;
428     ctx->pre_commit = true;
429     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
430         ctx->new_value[i] = NULL;
431     }
432     for (i = 0; i < NUM_PREGS; i++) {
433         ctx->new_pred_value[i] = NULL;
434     }
435 
436     analyze_packet(ctx);
437 
438     /*
439      * pregs_written is used both in the analyze phase as well as the code
440      * gen phase, so clear it again.
441      */
442     bitmap_zero(ctx->pregs_written, NUM_PREGS);
443 
444     /* Initialize the runtime state for packet semantics */
445     if (need_slot_cancelled(pkt)) {
446         tcg_gen_movi_tl(hex_slot_cancelled, 0);
447     }
448     ctx->branch_taken = NULL;
449     if (pkt->pkt_has_cof) {
450         ctx->branch_taken = tcg_temp_new();
451         if (pkt->pkt_has_multi_cof) {
452             tcg_gen_movi_tl(ctx->branch_taken, 0);
453         }
454         if (need_next_PC(ctx)) {
455             tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC);
456         }
457     }
458 
459     /* Preload the predicated registers into get_result_gpr(ctx, i) */
460     if (ctx->need_commit &&
461         !bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) {
462         i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
463         while (i < TOTAL_PER_THREAD_REGS) {
464             tcg_gen_mov_tl(get_result_gpr(ctx, i), hex_gpr[i]);
465             i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS,
466                               i + 1);
467         }
468     }
469 
470     /*
471      * Preload the predicated pred registers into ctx->new_pred_value[pred_num]
472      * Only endloop instructions conditionally write to pred registers
473      */
474     if (ctx->need_commit && pkt->pkt_has_endloop) {
475         for (i = 0; i < ctx->preg_log_idx; i++) {
476             int pred_num = ctx->preg_log[i];
477             ctx->new_pred_value[pred_num] = tcg_temp_new();
478             tcg_gen_mov_tl(ctx->new_pred_value[pred_num], hex_pred[pred_num]);
479         }
480     }
481 
482     /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */
483     if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) {
484         i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS);
485         while (i < NUM_VREGS) {
486             const intptr_t VdV_off =
487                 ctx_future_vreg_off(ctx, i, 1, true);
488             intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
489             tcg_gen_gvec_mov(MO_64, VdV_off,
490                              src_off,
491                              sizeof(MMVector),
492                              sizeof(MMVector));
493             i = find_next_bit(ctx->predicated_future_vregs, NUM_VREGS, i + 1);
494         }
495     }
496     if (!bitmap_empty(ctx->predicated_tmp_vregs, NUM_VREGS)) {
497         i = find_first_bit(ctx->predicated_tmp_vregs, NUM_VREGS);
498         while (i < NUM_VREGS) {
499             const intptr_t VdV_off =
500                 ctx_tmp_vreg_off(ctx, i, 1, true);
501             intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
502             tcg_gen_gvec_mov(MO_64, VdV_off,
503                              src_off,
504                              sizeof(MMVector),
505                              sizeof(MMVector));
506             i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1);
507         }
508     }
509 }
510 
511 bool is_gather_store_insn(DisasContext *ctx)
512 {
513     Packet *pkt = ctx->pkt;
514     Insn *insn = ctx->insn;
515     if (GET_ATTRIB(insn->opcode, A_CVI_NEW) &&
516         insn->new_value_producer_slot == 1) {
517         /* Look for gather instruction */
518         for (int i = 0; i < pkt->num_insns; i++) {
519             Insn *in = &pkt->insn[i];
520             if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) {
521                 return true;
522             }
523         }
524     }
525     return false;
526 }
527 
528 static void mark_store_width(DisasContext *ctx)
529 {
530     uint16_t opcode = ctx->insn->opcode;
531     uint32_t slot = ctx->insn->slot;
532     uint8_t width = 0;
533 
534     if (GET_ATTRIB(opcode, A_SCALAR_STORE)) {
535         if (GET_ATTRIB(opcode, A_MEMSIZE_0B)) {
536             return;
537         }
538         if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) {
539             width |= 1;
540         }
541         if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) {
542             width |= 2;
543         }
544         if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) {
545             width |= 4;
546         }
547         if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) {
548             width |= 8;
549         }
550         tcg_debug_assert(is_power_of_2(width));
551         ctx->store_width[slot] = width;
552     }
553 }
554 
555 static void gen_insn(DisasContext *ctx)
556 {
557     if (ctx->insn->generate) {
558         ctx->insn->generate(ctx);
559         mark_store_width(ctx);
560     } else {
561         gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_OPCODE);
562     }
563 }
564 
565 /*
566  * Helpers for generating the packet commit
567  */
568 static void gen_reg_writes(DisasContext *ctx)
569 {
570     int i;
571 
572     /* Early exit if not needed */
573     if (!ctx->need_commit) {
574         return;
575     }
576 
577     for (i = 0; i < ctx->reg_log_idx; i++) {
578         int reg_num = ctx->reg_log[i];
579 
580         tcg_gen_mov_tl(hex_gpr[reg_num], get_result_gpr(ctx, reg_num));
581 
582         /*
583          * ctx->is_tight_loop is set when SA0 points to the beginning of the TB.
584          * If we write to SA0, we have to turn off tight loop handling.
585          */
586         if (reg_num == HEX_REG_SA0) {
587             ctx->is_tight_loop = false;
588         }
589     }
590 }
591 
592 static void gen_pred_writes(DisasContext *ctx)
593 {
594     /* Early exit if not needed or the log is empty */
595     if (!ctx->need_commit || !ctx->preg_log_idx) {
596         return;
597     }
598 
599     for (int i = 0; i < ctx->preg_log_idx; i++) {
600         int pred_num = ctx->preg_log[i];
601         tcg_gen_mov_tl(hex_pred[pred_num], ctx->new_pred_value[pred_num]);
602     }
603 }
604 
605 static bool slot_is_predicated(Packet *pkt, int slot_num)
606 {
607     for (int i = 0; i < pkt->num_insns; i++) {
608         if (pkt->insn[i].slot == slot_num) {
609             return GET_ATTRIB(pkt->insn[i].opcode, A_CONDEXEC);
610         }
611     }
612     /* If we get to here, we didn't find an instruction in the requested slot */
613     g_assert_not_reached();
614 }
615 
616 void process_store(DisasContext *ctx, int slot_num)
617 {
618     bool is_predicated = slot_is_predicated(ctx->pkt, slot_num);
619     TCGLabel *label_end = NULL;
620 
621     /*
622      * We may have already processed this store
623      * See CHECK_NOSHUF in macros.h
624      */
625     if (slot_num == 1 && ctx->s1_store_processed) {
626         return;
627     }
628     ctx->s1_store_processed = true;
629 
630     if (is_predicated) {
631         TCGv cancelled = tcg_temp_new();
632         label_end = gen_new_label();
633 
634         /* Don't do anything if the slot was cancelled */
635         tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
636         tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
637     }
638     {
639         TCGv address = tcg_temp_new();
640         tcg_gen_mov_tl(address, hex_store_addr[slot_num]);
641 
642         /*
643          * If we know the width from the DisasContext, we can
644          * generate much cleaner code.
645          * Unfortunately, not all instructions execute the fSTORE
646          * macro during code generation.  Anything that uses the
647          * generic helper will have this problem.  Instructions
648          * that use fWRAP to generate proper TCG code will be OK.
649          */
650         switch (ctx->store_width[slot_num]) {
651         case 1:
652             tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
653                                hex_store_addr[slot_num],
654                                ctx->mem_idx, MO_UB);
655             break;
656         case 2:
657             tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
658                                hex_store_addr[slot_num],
659                                ctx->mem_idx, MO_LE | MO_UW);
660             break;
661         case 4:
662             tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
663                                hex_store_addr[slot_num],
664                                ctx->mem_idx, MO_LE | MO_UL);
665             break;
666         case 8:
667             tcg_gen_qemu_st_i64(hex_store_val64[slot_num],
668                                 hex_store_addr[slot_num],
669                                 ctx->mem_idx, MO_LE | MO_UQ);
670             break;
671         default:
672             {
673                 /*
674                  * If we get to here, we don't know the width at
675                  * TCG generation time, we'll use a helper to
676                  * avoid branching based on the width at runtime.
677                  */
678                 TCGv slot = tcg_constant_tl(slot_num);
679                 gen_helper_commit_store(tcg_env, slot);
680             }
681         }
682     }
683     if (is_predicated) {
684         gen_set_label(label_end);
685     }
686 }
687 
688 static void process_store_log(DisasContext *ctx)
689 {
690     /*
691      *  When a packet has two stores, the hardware processes
692      *  slot 1 and then slot 0.  This will be important when
693      *  the memory accesses overlap.
694      */
695     Packet *pkt = ctx->pkt;
696     if (pkt->pkt_has_store_s1) {
697         g_assert(!pkt->pkt_has_dczeroa);
698         process_store(ctx, 1);
699     }
700     if (pkt->pkt_has_store_s0) {
701         g_assert(!pkt->pkt_has_dczeroa);
702         process_store(ctx, 0);
703     }
704 }
705 
706 /* Zero out a 32-bit cache line */
707 static void process_dczeroa(DisasContext *ctx)
708 {
709     if (ctx->pkt->pkt_has_dczeroa) {
710         /* Store 32 bytes of zero starting at (addr & ~0x1f) */
711         TCGv addr = tcg_temp_new();
712         TCGv_i64 zero = tcg_constant_i64(0);
713 
714         tcg_gen_andi_tl(addr, ctx->dczero_addr, ~0x1f);
715         tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
716         tcg_gen_addi_tl(addr, addr, 8);
717         tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
718         tcg_gen_addi_tl(addr, addr, 8);
719         tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
720         tcg_gen_addi_tl(addr, addr, 8);
721         tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
722     }
723 }
724 
725 static bool pkt_has_hvx_store(Packet *pkt)
726 {
727     int i;
728     for (i = 0; i < pkt->num_insns; i++) {
729         int opcode = pkt->insn[i].opcode;
730         if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) {
731             return true;
732         }
733     }
734     return false;
735 }
736 
737 static void gen_commit_hvx(DisasContext *ctx)
738 {
739     int i;
740 
741     /* Early exit if not needed */
742     if (!ctx->need_commit) {
743         g_assert(!pkt_has_hvx_store(ctx->pkt));
744         return;
745     }
746 
747     /*
748      *    for (i = 0; i < ctx->vreg_log_idx; i++) {
749      *        int rnum = ctx->vreg_log[i];
750      *        env->VRegs[rnum] = env->future_VRegs[rnum];
751      *    }
752      */
753     for (i = 0; i < ctx->vreg_log_idx; i++) {
754         int rnum = ctx->vreg_log[i];
755         intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]);
756         intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false);
757         size_t size = sizeof(MMVector);
758 
759         tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
760     }
761 
762     /*
763      *    for (i = 0; i < ctx->qreg_log_idx; i++) {
764      *        int rnum = ctx->qreg_log[i];
765      *        env->QRegs[rnum] = env->future_QRegs[rnum];
766      *    }
767      */
768     for (i = 0; i < ctx->qreg_log_idx; i++) {
769         int rnum = ctx->qreg_log[i];
770         intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]);
771         intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]);
772         size_t size = sizeof(MMQReg);
773 
774         tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
775     }
776 
777     if (pkt_has_hvx_store(ctx->pkt)) {
778         gen_helper_commit_hvx_stores(tcg_env);
779     }
780 }
781 
782 static void update_exec_counters(DisasContext *ctx)
783 {
784     Packet *pkt = ctx->pkt;
785     int num_insns = pkt->num_insns;
786     int num_real_insns = 0;
787     int num_hvx_insns = 0;
788 
789     for (int i = 0; i < num_insns; i++) {
790         if (!pkt->insn[i].is_endloop &&
791             !pkt->insn[i].part1 &&
792             !GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) {
793             num_real_insns++;
794         }
795         if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) {
796             num_hvx_insns++;
797         }
798     }
799 
800     ctx->num_packets++;
801     ctx->num_insns += num_real_insns;
802     ctx->num_hvx_insns += num_hvx_insns;
803 }
804 
805 static void gen_commit_packet(DisasContext *ctx)
806 {
807     /*
808      * If there is more than one store in a packet, make sure they are all OK
809      * before proceeding with the rest of the packet commit.
810      *
811      * dczeroa has to be the only store operation in the packet, so we go
812      * ahead and process that first.
813      *
814      * When there is an HVX store, there can also be a scalar store in either
815      * slot 0 or slot1, so we create a mask for the helper to indicate what
816      * work to do.
817      *
818      * When there are two scalar stores, we probe the one in slot 0.
819      *
820      * Note that we don't call the probe helper for packets with only one
821      * store.  Therefore, we call process_store_log before anything else
822      * involved in committing the packet.
823      */
824     Packet *pkt = ctx->pkt;
825     bool has_store_s0 = pkt->pkt_has_store_s0;
826     bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed);
827     bool has_hvx_store = pkt_has_hvx_store(pkt);
828     if (pkt->pkt_has_dczeroa) {
829         /*
830          * The dczeroa will be the store in slot 0, check that we don't have
831          * a store in slot 1 or an HVX store.
832          */
833         g_assert(!has_store_s1 && !has_hvx_store);
834         process_dczeroa(ctx);
835     } else if (has_hvx_store) {
836         if (!has_store_s0 && !has_store_s1) {
837             TCGv mem_idx = tcg_constant_tl(ctx->mem_idx);
838             gen_helper_probe_hvx_stores(tcg_env, mem_idx);
839         } else {
840             int mask = 0;
841 
842             if (has_store_s0) {
843                 mask =
844                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 1);
845             }
846             if (has_store_s1) {
847                 mask =
848                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1);
849             }
850             if (has_hvx_store) {
851                 mask =
852                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
853                                HAS_HVX_STORES, 1);
854             }
855             if (has_store_s0 && slot_is_predicated(pkt, 0)) {
856                 mask =
857                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
858                                S0_IS_PRED, 1);
859             }
860             if (has_store_s1 && slot_is_predicated(pkt, 1)) {
861                 mask =
862                     FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
863                                S1_IS_PRED, 1);
864             }
865             mask = FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, MMU_IDX,
866                               ctx->mem_idx);
867             gen_helper_probe_pkt_scalar_hvx_stores(tcg_env,
868                                                    tcg_constant_tl(mask));
869         }
870     } else if (has_store_s0 && has_store_s1) {
871         /*
872          * process_store_log will execute the slot 1 store first,
873          * so we only have to probe the store in slot 0
874          */
875         int args = 0;
876         args =
877             FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, ctx->mem_idx);
878         if (slot_is_predicated(pkt, 0)) {
879             args =
880                 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 1);
881         }
882         TCGv args_tcgv = tcg_constant_tl(args);
883         gen_helper_probe_pkt_scalar_store_s0(tcg_env, args_tcgv);
884     }
885 
886     process_store_log(ctx);
887 
888     gen_reg_writes(ctx);
889     gen_pred_writes(ctx);
890     if (pkt->pkt_has_hvx) {
891         gen_commit_hvx(ctx);
892     }
893     update_exec_counters(ctx);
894 
895     if (pkt->vhist_insn != NULL) {
896         ctx->pre_commit = false;
897         ctx->insn = pkt->vhist_insn;
898         pkt->vhist_insn->generate(ctx);
899     }
900 
901     if (pkt->pkt_has_cof) {
902         gen_end_tb(ctx);
903     }
904 }
905 
906 static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
907 {
908     uint32_t words[PACKET_WORDS_MAX];
909     int nwords;
910     Packet pkt;
911     int i;
912 
913     nwords = read_packet_words(env, ctx, words);
914     if (!nwords) {
915         gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET);
916         return;
917     }
918 
919     ctx->pkt = &pkt;
920     if (decode_packet(ctx, nwords, words, &pkt, false) > 0) {
921         pkt.pc = ctx->base.pc_next;
922         gen_start_packet(ctx);
923         for (i = 0; i < pkt.num_insns; i++) {
924             ctx->insn = &pkt.insn[i];
925             gen_insn(ctx);
926         }
927         gen_commit_packet(ctx);
928         ctx->base.pc_next += pkt.encod_pkt_size_in_bytes;
929     } else {
930         gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET);
931     }
932 }
933 
934 static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
935                                           CPUState *cs)
936 {
937     DisasContext *ctx = container_of(dcbase, DisasContext, base);
938     HexagonCPU *hex_cpu = env_archcpu(cpu_env(cs));
939     uint32_t hex_flags = dcbase->tb->flags;
940 
941     ctx->mem_idx = MMU_USER_IDX;
942     ctx->num_packets = 0;
943     ctx->num_insns = 0;
944     ctx->num_hvx_insns = 0;
945     ctx->branch_cond = TCG_COND_NEVER;
946     ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
947     ctx->short_circuit = hex_cpu->short_circuit;
948 }
949 
950 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
951 {
952 }
953 
954 static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
955 {
956     DisasContext *ctx = container_of(dcbase, DisasContext, base);
957 
958     tcg_gen_insn_start(ctx->base.pc_next);
959 }
960 
961 static bool pkt_crosses_page(CPUHexagonState *env, DisasContext *ctx)
962 {
963     target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
964     bool found_end = false;
965     int nwords;
966 
967     for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
968         uint32_t word = translator_ldl(env, &ctx->base,
969                             ctx->base.pc_next + nwords * sizeof(uint32_t));
970         found_end = is_packet_end(word);
971     }
972     uint32_t next_ptr =  ctx->base.pc_next + nwords * sizeof(uint32_t);
973     return found_end && next_ptr - page_start >= TARGET_PAGE_SIZE;
974 }
975 
976 static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu)
977 {
978     DisasContext *ctx = container_of(dcbase, DisasContext, base);
979     CPUHexagonState *env = cpu_env(cpu);
980 
981     decode_and_translate_packet(env, ctx);
982 
983     if (ctx->base.is_jmp == DISAS_NEXT) {
984         target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
985         target_ulong bytes_max = PACKET_WORDS_MAX * sizeof(target_ulong);
986 
987         if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE ||
988             (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE - bytes_max &&
989              pkt_crosses_page(env, ctx))) {
990             ctx->base.is_jmp = DISAS_TOO_MANY;
991         }
992 
993         /*
994          * The CPU log is used to compare against LLDB single stepping,
995          * so end the TLB after every packet.
996          */
997         HexagonCPU *hex_cpu = env_archcpu(env);
998         if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
999             ctx->base.is_jmp = DISAS_TOO_MANY;
1000         }
1001     }
1002 }
1003 
1004 static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
1005 {
1006     DisasContext *ctx = container_of(dcbase, DisasContext, base);
1007 
1008     switch (ctx->base.is_jmp) {
1009     case DISAS_TOO_MANY:
1010         gen_exec_counters(ctx);
1011         tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next);
1012         tcg_gen_exit_tb(NULL, 0);
1013         break;
1014     case DISAS_NORETURN:
1015         break;
1016     default:
1017         g_assert_not_reached();
1018     }
1019 }
1020 
1021 static const TranslatorOps hexagon_tr_ops = {
1022     .init_disas_context = hexagon_tr_init_disas_context,
1023     .tb_start           = hexagon_tr_tb_start,
1024     .insn_start         = hexagon_tr_insn_start,
1025     .translate_insn     = hexagon_tr_translate_packet,
1026     .tb_stop            = hexagon_tr_tb_stop,
1027 };
1028 
1029 void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
1030                             int *max_insns, vaddr pc, void *host_pc)
1031 {
1032     DisasContext ctx;
1033 
1034     translator_loop(cs, tb, max_insns, pc, host_pc,
1035                     &hexagon_tr_ops, &ctx.base);
1036 }
1037 
1038 #define NAME_LEN               64
1039 static char store_addr_names[STORES_MAX][NAME_LEN];
1040 static char store_width_names[STORES_MAX][NAME_LEN];
1041 static char store_val32_names[STORES_MAX][NAME_LEN];
1042 static char store_val64_names[STORES_MAX][NAME_LEN];
1043 static char vstore_addr_names[VSTORES_MAX][NAME_LEN];
1044 static char vstore_size_names[VSTORES_MAX][NAME_LEN];
1045 static char vstore_pending_names[VSTORES_MAX][NAME_LEN];
1046 
1047 void hexagon_translate_init(void)
1048 {
1049     int i;
1050 
1051     opcode_init();
1052 
1053     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
1054         hex_gpr[i] = tcg_global_mem_new(tcg_env,
1055             offsetof(CPUHexagonState, gpr[i]),
1056             hexagon_regnames[i]);
1057     }
1058     hex_new_value_usr = tcg_global_mem_new(tcg_env,
1059         offsetof(CPUHexagonState, new_value_usr), "new_value_usr");
1060 
1061     for (i = 0; i < NUM_PREGS; i++) {
1062         hex_pred[i] = tcg_global_mem_new(tcg_env,
1063             offsetof(CPUHexagonState, pred[i]),
1064             hexagon_prednames[i]);
1065     }
1066     hex_slot_cancelled = tcg_global_mem_new(tcg_env,
1067         offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled");
1068     hex_llsc_addr = tcg_global_mem_new(tcg_env,
1069         offsetof(CPUHexagonState, llsc_addr), "llsc_addr");
1070     hex_llsc_val = tcg_global_mem_new(tcg_env,
1071         offsetof(CPUHexagonState, llsc_val), "llsc_val");
1072     hex_llsc_val_i64 = tcg_global_mem_new_i64(tcg_env,
1073         offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
1074     for (i = 0; i < STORES_MAX; i++) {
1075         snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
1076         hex_store_addr[i] = tcg_global_mem_new(tcg_env,
1077             offsetof(CPUHexagonState, mem_log_stores[i].va),
1078             store_addr_names[i]);
1079 
1080         snprintf(store_width_names[i], NAME_LEN, "store_width_%d", i);
1081         hex_store_width[i] = tcg_global_mem_new(tcg_env,
1082             offsetof(CPUHexagonState, mem_log_stores[i].width),
1083             store_width_names[i]);
1084 
1085         snprintf(store_val32_names[i], NAME_LEN, "store_val32_%d", i);
1086         hex_store_val32[i] = tcg_global_mem_new(tcg_env,
1087             offsetof(CPUHexagonState, mem_log_stores[i].data32),
1088             store_val32_names[i]);
1089 
1090         snprintf(store_val64_names[i], NAME_LEN, "store_val64_%d", i);
1091         hex_store_val64[i] = tcg_global_mem_new_i64(tcg_env,
1092             offsetof(CPUHexagonState, mem_log_stores[i].data64),
1093             store_val64_names[i]);
1094     }
1095     for (i = 0; i < VSTORES_MAX; i++) {
1096         snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i);
1097         hex_vstore_addr[i] = tcg_global_mem_new(tcg_env,
1098             offsetof(CPUHexagonState, vstore[i].va),
1099             vstore_addr_names[i]);
1100 
1101         snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i);
1102         hex_vstore_size[i] = tcg_global_mem_new(tcg_env,
1103             offsetof(CPUHexagonState, vstore[i].size),
1104             vstore_size_names[i]);
1105 
1106         snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i);
1107         hex_vstore_pending[i] = tcg_global_mem_new(tcg_env,
1108             offsetof(CPUHexagonState, vstore_pending[i]),
1109             vstore_pending_names[i]);
1110     }
1111 }
1112