1 /* 2 * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #define QEMU_GENERATE 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/helper-gen.h" 24 #include "exec/helper-proto.h" 25 #include "exec/translation-block.h" 26 #include "exec/cpu_ldst.h" 27 #include "exec/log.h" 28 #include "internal.h" 29 #include "attribs.h" 30 #include "insn.h" 31 #include "decode.h" 32 #include "translate.h" 33 #include "genptr.h" 34 #include "printinsn.h" 35 36 #define HELPER_H "helper.h" 37 #include "exec/helper-info.c.inc" 38 #undef HELPER_H 39 40 #include "analyze_funcs_generated.c.inc" 41 42 typedef void (*AnalyzeInsn)(DisasContext *ctx); 43 static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = { 44 #define OPCODE(X) [X] = analyze_##X 45 #include "opcodes_def_generated.h.inc" 46 #undef OPCODE 47 }; 48 49 TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; 50 TCGv hex_pred[NUM_PREGS]; 51 TCGv hex_slot_cancelled; 52 TCGv hex_new_value_usr; 53 TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; 54 TCGv hex_store_addr[STORES_MAX]; 55 TCGv hex_store_width[STORES_MAX]; 56 TCGv hex_store_val32[STORES_MAX]; 57 TCGv_i64 hex_store_val64[STORES_MAX]; 58 TCGv hex_llsc_addr; 59 TCGv hex_llsc_val; 60 TCGv_i64 hex_llsc_val_i64; 61 TCGv hex_vstore_addr[VSTORES_MAX]; 62 TCGv hex_vstore_size[VSTORES_MAX]; 63 TCGv hex_vstore_pending[VSTORES_MAX]; 64 65 static const char * const hexagon_prednames[] = { 66 "p0", "p1", "p2", "p3" 67 }; 68 69 intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum, 70 int num, bool alloc_ok) 71 { 72 intptr_t offset; 73 74 if (!ctx->need_commit) { 75 return offsetof(CPUHexagonState, VRegs[regnum]); 76 } 77 78 /* See if it is already allocated */ 79 for (int i = 0; i < ctx->future_vregs_idx; i++) { 80 if (ctx->future_vregs_num[i] == regnum) { 81 return offsetof(CPUHexagonState, future_VRegs[i]); 82 } 83 } 84 85 g_assert(alloc_ok); 86 offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]); 87 for (int i = 0; i < num; i++) { 88 ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++; 89 } 90 ctx->future_vregs_idx += num; 91 g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX); 92 return offset; 93 } 94 95 intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum, 96 int num, bool alloc_ok) 97 { 98 intptr_t offset; 99 100 /* See if it is already allocated */ 101 for (int i = 0; i < ctx->tmp_vregs_idx; i++) { 102 if (ctx->tmp_vregs_num[i] == regnum) { 103 return offsetof(CPUHexagonState, tmp_VRegs[i]); 104 } 105 } 106 107 g_assert(alloc_ok); 108 offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]); 109 for (int i = 0; i < num; i++) { 110 ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++; 111 } 112 ctx->tmp_vregs_idx += num; 113 g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX); 114 return offset; 115 } 116 117 static void gen_exception_raw(int excp) 118 { 119 gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp)); 120 } 121 122 static void gen_exec_counters(DisasContext *ctx) 123 { 124 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT], 125 hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets); 126 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT], 127 hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns); 128 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT], 129 hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns); 130 } 131 132 static bool use_goto_tb(DisasContext *ctx, target_ulong dest) 133 { 134 return translator_use_goto_tb(&ctx->base, dest); 135 } 136 137 static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest, bool 138 move_to_pc) 139 { 140 if (use_goto_tb(ctx, dest)) { 141 tcg_gen_goto_tb(idx); 142 if (move_to_pc) { 143 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest); 144 } 145 tcg_gen_exit_tb(ctx->base.tb, idx); 146 } else { 147 if (move_to_pc) { 148 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest); 149 } 150 tcg_gen_lookup_and_goto_ptr(); 151 } 152 } 153 154 static void gen_end_tb(DisasContext *ctx) 155 { 156 Packet *pkt = ctx->pkt; 157 158 gen_exec_counters(ctx); 159 160 if (ctx->branch_cond != TCG_COND_NEVER) { 161 if (ctx->branch_cond != TCG_COND_ALWAYS) { 162 TCGLabel *skip = gen_new_label(); 163 tcg_gen_brcondi_tl(ctx->branch_cond, ctx->branch_taken, 0, skip); 164 gen_goto_tb(ctx, 0, ctx->branch_dest, true); 165 gen_set_label(skip); 166 gen_goto_tb(ctx, 1, ctx->next_PC, false); 167 } else { 168 gen_goto_tb(ctx, 0, ctx->branch_dest, true); 169 } 170 } else if (ctx->is_tight_loop && 171 pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) { 172 /* 173 * When we're in a tight loop, we defer the endloop0 processing 174 * to take advantage of direct block chaining 175 */ 176 TCGLabel *skip = gen_new_label(); 177 tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip); 178 tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1); 179 gen_goto_tb(ctx, 0, ctx->base.tb->pc, true); 180 gen_set_label(skip); 181 gen_goto_tb(ctx, 1, ctx->next_PC, false); 182 } else { 183 tcg_gen_lookup_and_goto_ptr(); 184 } 185 186 ctx->base.is_jmp = DISAS_NORETURN; 187 } 188 189 static void gen_exception_end_tb(DisasContext *ctx, int excp) 190 { 191 gen_exec_counters(ctx); 192 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC); 193 gen_exception_raw(excp); 194 ctx->base.is_jmp = DISAS_NORETURN; 195 196 } 197 198 #define PACKET_BUFFER_LEN 1028 199 static void print_pkt(Packet *pkt) 200 { 201 GString *buf = g_string_sized_new(PACKET_BUFFER_LEN); 202 snprint_a_pkt_debug(buf, pkt); 203 HEX_DEBUG_LOG("%s", buf->str); 204 g_string_free(buf, true); 205 } 206 #define HEX_DEBUG_PRINT_PKT(pkt) \ 207 do { \ 208 if (HEX_DEBUG) { \ 209 print_pkt(pkt); \ 210 } \ 211 } while (0) 212 213 static int read_packet_words(CPUHexagonState *env, DisasContext *ctx, 214 uint32_t words[]) 215 { 216 bool found_end = false; 217 int nwords, max_words; 218 219 memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t)); 220 for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) { 221 words[nwords] = 222 translator_ldl(env, &ctx->base, 223 ctx->base.pc_next + nwords * sizeof(uint32_t)); 224 found_end = is_packet_end(words[nwords]); 225 } 226 if (!found_end) { 227 /* Read too many words without finding the end */ 228 return 0; 229 } 230 231 /* Check for page boundary crossing */ 232 max_words = -(ctx->base.pc_next | TARGET_PAGE_MASK) / sizeof(uint32_t); 233 if (nwords > max_words) { 234 /* We can only cross a page boundary at the beginning of a TB */ 235 g_assert(ctx->base.num_insns == 1); 236 } 237 238 HEX_DEBUG_LOG("decode_packet: pc = 0x%" VADDR_PRIx "\n", 239 ctx->base.pc_next); 240 HEX_DEBUG_LOG(" words = { "); 241 for (int i = 0; i < nwords; i++) { 242 HEX_DEBUG_LOG("0x%x, ", words[i]); 243 } 244 HEX_DEBUG_LOG("}\n"); 245 246 return nwords; 247 } 248 249 static bool check_for_attrib(Packet *pkt, int attrib) 250 { 251 for (int i = 0; i < pkt->num_insns; i++) { 252 if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) { 253 return true; 254 } 255 } 256 return false; 257 } 258 259 static bool need_slot_cancelled(Packet *pkt) 260 { 261 /* We only need slot_cancelled for conditional store instructions */ 262 for (int i = 0; i < pkt->num_insns; i++) { 263 uint16_t opcode = pkt->insn[i].opcode; 264 if (GET_ATTRIB(opcode, A_CONDEXEC) && 265 GET_ATTRIB(opcode, A_SCALAR_STORE)) { 266 return true; 267 } 268 } 269 return false; 270 } 271 272 static bool need_next_PC(DisasContext *ctx) 273 { 274 Packet *pkt = ctx->pkt; 275 276 /* Check for conditional control flow or HW loop end */ 277 for (int i = 0; i < pkt->num_insns; i++) { 278 uint16_t opcode = pkt->insn[i].opcode; 279 if (GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) { 280 return true; 281 } 282 if (GET_ATTRIB(opcode, A_HWLOOP0_END) || 283 GET_ATTRIB(opcode, A_HWLOOP1_END)) { 284 return true; 285 } 286 } 287 return false; 288 } 289 290 /* 291 * The opcode_analyze functions mark most of the writes in a packet 292 * However, there are some implicit writes marked as attributes 293 * of the applicable instructions. 294 */ 295 static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum) 296 { 297 uint16_t opcode = ctx->insn->opcode; 298 if (GET_ATTRIB(opcode, attrib)) { 299 /* 300 * USR is used to set overflow and FP exceptions, 301 * so treat it as conditional 302 */ 303 bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) || 304 rnum == HEX_REG_USR; 305 306 /* LC0/LC1 is conditionally written by endloop instructions */ 307 if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) && 308 (opcode == J2_endloop0 || 309 opcode == J2_endloop1 || 310 opcode == J2_endloop01)) { 311 is_predicated = true; 312 } 313 314 ctx_log_reg_write(ctx, rnum, is_predicated); 315 } 316 } 317 318 static void mark_implicit_reg_writes(DisasContext *ctx) 319 { 320 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP, HEX_REG_FP); 321 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP, HEX_REG_SP); 322 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR, HEX_REG_LR); 323 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0); 324 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0); 325 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1); 326 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1); 327 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR); 328 mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR); 329 } 330 331 static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum) 332 { 333 if (GET_ATTRIB(ctx->insn->opcode, attrib)) { 334 ctx_log_pred_write(ctx, pnum); 335 } 336 } 337 338 static void mark_implicit_pred_writes(DisasContext *ctx) 339 { 340 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0); 341 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1); 342 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2); 343 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3); 344 } 345 346 static bool pkt_raises_exception(Packet *pkt) 347 { 348 if (check_for_attrib(pkt, A_LOAD) || 349 check_for_attrib(pkt, A_STORE)) { 350 return true; 351 } 352 return false; 353 } 354 355 static bool need_commit(DisasContext *ctx) 356 { 357 Packet *pkt = ctx->pkt; 358 359 /* 360 * If the short-circuit property is set to false, we'll always do the commit 361 */ 362 if (!ctx->short_circuit) { 363 return true; 364 } 365 366 if (pkt_raises_exception(pkt)) { 367 return true; 368 } 369 370 /* Registers with immutability flags require new_value */ 371 for (int i = 0; i < ctx->reg_log_idx; i++) { 372 int rnum = ctx->reg_log[i]; 373 if (reg_immut_masks[rnum]) { 374 return true; 375 } 376 } 377 378 /* Floating point instructions are hard-coded to use new_value */ 379 if (check_for_attrib(pkt, A_FPOP)) { 380 return true; 381 } 382 383 if (ctx->read_after_write || ctx->has_hvx_overlap) { 384 return true; 385 } 386 387 return false; 388 } 389 390 static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum) 391 { 392 if (GET_ATTRIB(ctx->insn->opcode, attrib)) { 393 ctx_log_pred_read(ctx, pnum); 394 } 395 } 396 397 static void mark_implicit_pred_reads(DisasContext *ctx) 398 { 399 mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P0, 0); 400 mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P1, 1); 401 mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 2); 402 mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 3); 403 } 404 405 static void analyze_packet(DisasContext *ctx) 406 { 407 Packet *pkt = ctx->pkt; 408 ctx->read_after_write = false; 409 ctx->has_hvx_overlap = false; 410 for (int i = 0; i < pkt->num_insns; i++) { 411 Insn *insn = &pkt->insn[i]; 412 ctx->insn = insn; 413 if (opcode_analyze[insn->opcode]) { 414 opcode_analyze[insn->opcode](ctx); 415 } 416 mark_implicit_reg_writes(ctx); 417 mark_implicit_pred_writes(ctx); 418 mark_implicit_pred_reads(ctx); 419 } 420 421 ctx->need_commit = need_commit(ctx); 422 } 423 424 static void gen_start_packet(DisasContext *ctx) 425 { 426 Packet *pkt = ctx->pkt; 427 target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes; 428 int i; 429 430 /* Clear out the disassembly context */ 431 ctx->next_PC = next_PC; 432 ctx->reg_log_idx = 0; 433 bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS); 434 bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); 435 ctx->preg_log_idx = 0; 436 bitmap_zero(ctx->pregs_written, NUM_PREGS); 437 ctx->future_vregs_idx = 0; 438 ctx->tmp_vregs_idx = 0; 439 ctx->vreg_log_idx = 0; 440 bitmap_zero(ctx->vregs_written, NUM_VREGS); 441 bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS); 442 bitmap_zero(ctx->vregs_updated, NUM_VREGS); 443 bitmap_zero(ctx->vregs_select, NUM_VREGS); 444 bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS); 445 bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS); 446 bitmap_zero(ctx->qregs_written, NUM_QREGS); 447 ctx->qreg_log_idx = 0; 448 for (i = 0; i < STORES_MAX; i++) { 449 ctx->store_width[i] = 0; 450 } 451 ctx->s1_store_processed = false; 452 ctx->pre_commit = true; 453 for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { 454 ctx->new_value[i] = NULL; 455 } 456 for (i = 0; i < NUM_PREGS; i++) { 457 ctx->new_pred_value[i] = NULL; 458 } 459 460 analyze_packet(ctx); 461 462 /* 463 * pregs_written is used both in the analyze phase as well as the code 464 * gen phase, so clear it again. 465 */ 466 bitmap_zero(ctx->pregs_written, NUM_PREGS); 467 468 if (HEX_DEBUG) { 469 /* Handy place to set a breakpoint before the packet executes */ 470 gen_helper_debug_start_packet(tcg_env); 471 } 472 473 /* Initialize the runtime state for packet semantics */ 474 if (need_slot_cancelled(pkt)) { 475 tcg_gen_movi_tl(hex_slot_cancelled, 0); 476 } 477 ctx->branch_taken = NULL; 478 if (pkt->pkt_has_cof) { 479 ctx->branch_taken = tcg_temp_new(); 480 if (pkt->pkt_has_multi_cof) { 481 tcg_gen_movi_tl(ctx->branch_taken, 0); 482 } 483 if (need_next_PC(ctx)) { 484 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC); 485 } 486 } 487 if (HEX_DEBUG) { 488 ctx->pred_written = tcg_temp_new(); 489 tcg_gen_movi_tl(ctx->pred_written, 0); 490 } 491 492 /* Preload the predicated registers into get_result_gpr(ctx, i) */ 493 if (ctx->need_commit && 494 !bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { 495 i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); 496 while (i < TOTAL_PER_THREAD_REGS) { 497 tcg_gen_mov_tl(get_result_gpr(ctx, i), hex_gpr[i]); 498 i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS, 499 i + 1); 500 } 501 } 502 503 /* 504 * Preload the predicated pred registers into ctx->new_pred_value[pred_num] 505 * Only endloop instructions conditionally write to pred registers 506 */ 507 if (ctx->need_commit && pkt->pkt_has_endloop) { 508 for (i = 0; i < ctx->preg_log_idx; i++) { 509 int pred_num = ctx->preg_log[i]; 510 ctx->new_pred_value[pred_num] = tcg_temp_new(); 511 tcg_gen_mov_tl(ctx->new_pred_value[pred_num], hex_pred[pred_num]); 512 } 513 } 514 515 /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */ 516 if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) { 517 i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS); 518 while (i < NUM_VREGS) { 519 const intptr_t VdV_off = 520 ctx_future_vreg_off(ctx, i, 1, true); 521 intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]); 522 tcg_gen_gvec_mov(MO_64, VdV_off, 523 src_off, 524 sizeof(MMVector), 525 sizeof(MMVector)); 526 i = find_next_bit(ctx->predicated_future_vregs, NUM_VREGS, i + 1); 527 } 528 } 529 if (!bitmap_empty(ctx->predicated_tmp_vregs, NUM_VREGS)) { 530 i = find_first_bit(ctx->predicated_tmp_vregs, NUM_VREGS); 531 while (i < NUM_VREGS) { 532 const intptr_t VdV_off = 533 ctx_tmp_vreg_off(ctx, i, 1, true); 534 intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]); 535 tcg_gen_gvec_mov(MO_64, VdV_off, 536 src_off, 537 sizeof(MMVector), 538 sizeof(MMVector)); 539 i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1); 540 } 541 } 542 } 543 544 bool is_gather_store_insn(DisasContext *ctx) 545 { 546 Packet *pkt = ctx->pkt; 547 Insn *insn = ctx->insn; 548 if (GET_ATTRIB(insn->opcode, A_CVI_NEW) && 549 insn->new_value_producer_slot == 1) { 550 /* Look for gather instruction */ 551 for (int i = 0; i < pkt->num_insns; i++) { 552 Insn *in = &pkt->insn[i]; 553 if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) { 554 return true; 555 } 556 } 557 } 558 return false; 559 } 560 561 static void mark_store_width(DisasContext *ctx) 562 { 563 uint16_t opcode = ctx->insn->opcode; 564 uint32_t slot = ctx->insn->slot; 565 uint8_t width = 0; 566 567 if (GET_ATTRIB(opcode, A_SCALAR_STORE)) { 568 if (GET_ATTRIB(opcode, A_MEMSIZE_0B)) { 569 return; 570 } 571 if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) { 572 width |= 1; 573 } 574 if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) { 575 width |= 2; 576 } 577 if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) { 578 width |= 4; 579 } 580 if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) { 581 width |= 8; 582 } 583 tcg_debug_assert(is_power_of_2(width)); 584 ctx->store_width[slot] = width; 585 } 586 } 587 588 static void gen_insn(DisasContext *ctx) 589 { 590 if (ctx->insn->generate) { 591 ctx->insn->generate(ctx); 592 mark_store_width(ctx); 593 } else { 594 gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE); 595 } 596 } 597 598 /* 599 * Helpers for generating the packet commit 600 */ 601 static void gen_reg_writes(DisasContext *ctx) 602 { 603 int i; 604 605 /* Early exit if not needed */ 606 if (!ctx->need_commit) { 607 return; 608 } 609 610 for (i = 0; i < ctx->reg_log_idx; i++) { 611 int reg_num = ctx->reg_log[i]; 612 613 tcg_gen_mov_tl(hex_gpr[reg_num], get_result_gpr(ctx, reg_num)); 614 615 /* 616 * ctx->is_tight_loop is set when SA0 points to the beginning of the TB. 617 * If we write to SA0, we have to turn off tight loop handling. 618 */ 619 if (reg_num == HEX_REG_SA0) { 620 ctx->is_tight_loop = false; 621 } 622 } 623 } 624 625 static void gen_pred_writes(DisasContext *ctx) 626 { 627 /* Early exit if not needed or the log is empty */ 628 if (!ctx->need_commit || !ctx->preg_log_idx) { 629 return; 630 } 631 632 for (int i = 0; i < ctx->preg_log_idx; i++) { 633 int pred_num = ctx->preg_log[i]; 634 tcg_gen_mov_tl(hex_pred[pred_num], ctx->new_pred_value[pred_num]); 635 } 636 } 637 638 static void gen_check_store_width(DisasContext *ctx, int slot_num) 639 { 640 if (HEX_DEBUG) { 641 TCGv slot = tcg_constant_tl(slot_num); 642 TCGv check = tcg_constant_tl(ctx->store_width[slot_num]); 643 gen_helper_debug_check_store_width(tcg_env, slot, check); 644 } 645 } 646 647 static bool slot_is_predicated(Packet *pkt, int slot_num) 648 { 649 for (int i = 0; i < pkt->num_insns; i++) { 650 if (pkt->insn[i].slot == slot_num) { 651 return GET_ATTRIB(pkt->insn[i].opcode, A_CONDEXEC); 652 } 653 } 654 /* If we get to here, we didn't find an instruction in the requested slot */ 655 g_assert_not_reached(); 656 } 657 658 void process_store(DisasContext *ctx, int slot_num) 659 { 660 bool is_predicated = slot_is_predicated(ctx->pkt, slot_num); 661 TCGLabel *label_end = NULL; 662 663 /* 664 * We may have already processed this store 665 * See CHECK_NOSHUF in macros.h 666 */ 667 if (slot_num == 1 && ctx->s1_store_processed) { 668 return; 669 } 670 ctx->s1_store_processed = true; 671 672 if (is_predicated) { 673 TCGv cancelled = tcg_temp_new(); 674 label_end = gen_new_label(); 675 676 /* Don't do anything if the slot was cancelled */ 677 tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1); 678 tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end); 679 } 680 { 681 TCGv address = tcg_temp_new(); 682 tcg_gen_mov_tl(address, hex_store_addr[slot_num]); 683 684 /* 685 * If we know the width from the DisasContext, we can 686 * generate much cleaner code. 687 * Unfortunately, not all instructions execute the fSTORE 688 * macro during code generation. Anything that uses the 689 * generic helper will have this problem. Instructions 690 * that use fWRAP to generate proper TCG code will be OK. 691 */ 692 switch (ctx->store_width[slot_num]) { 693 case 1: 694 gen_check_store_width(ctx, slot_num); 695 tcg_gen_qemu_st_tl(hex_store_val32[slot_num], 696 hex_store_addr[slot_num], 697 ctx->mem_idx, MO_UB); 698 break; 699 case 2: 700 gen_check_store_width(ctx, slot_num); 701 tcg_gen_qemu_st_tl(hex_store_val32[slot_num], 702 hex_store_addr[slot_num], 703 ctx->mem_idx, MO_TEUW); 704 break; 705 case 4: 706 gen_check_store_width(ctx, slot_num); 707 tcg_gen_qemu_st_tl(hex_store_val32[slot_num], 708 hex_store_addr[slot_num], 709 ctx->mem_idx, MO_TEUL); 710 break; 711 case 8: 712 gen_check_store_width(ctx, slot_num); 713 tcg_gen_qemu_st_i64(hex_store_val64[slot_num], 714 hex_store_addr[slot_num], 715 ctx->mem_idx, MO_TEUQ); 716 break; 717 default: 718 { 719 /* 720 * If we get to here, we don't know the width at 721 * TCG generation time, we'll use a helper to 722 * avoid branching based on the width at runtime. 723 */ 724 TCGv slot = tcg_constant_tl(slot_num); 725 gen_helper_commit_store(tcg_env, slot); 726 } 727 } 728 } 729 if (is_predicated) { 730 gen_set_label(label_end); 731 } 732 } 733 734 static void process_store_log(DisasContext *ctx) 735 { 736 /* 737 * When a packet has two stores, the hardware processes 738 * slot 1 and then slot 0. This will be important when 739 * the memory accesses overlap. 740 */ 741 Packet *pkt = ctx->pkt; 742 if (pkt->pkt_has_store_s1) { 743 g_assert(!pkt->pkt_has_dczeroa); 744 process_store(ctx, 1); 745 } 746 if (pkt->pkt_has_store_s0) { 747 g_assert(!pkt->pkt_has_dczeroa); 748 process_store(ctx, 0); 749 } 750 } 751 752 /* Zero out a 32-bit cache line */ 753 static void process_dczeroa(DisasContext *ctx) 754 { 755 if (ctx->pkt->pkt_has_dczeroa) { 756 /* Store 32 bytes of zero starting at (addr & ~0x1f) */ 757 TCGv addr = tcg_temp_new(); 758 TCGv_i64 zero = tcg_constant_i64(0); 759 760 tcg_gen_andi_tl(addr, ctx->dczero_addr, ~0x1f); 761 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); 762 tcg_gen_addi_tl(addr, addr, 8); 763 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); 764 tcg_gen_addi_tl(addr, addr, 8); 765 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); 766 tcg_gen_addi_tl(addr, addr, 8); 767 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); 768 } 769 } 770 771 static bool pkt_has_hvx_store(Packet *pkt) 772 { 773 int i; 774 for (i = 0; i < pkt->num_insns; i++) { 775 int opcode = pkt->insn[i].opcode; 776 if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) { 777 return true; 778 } 779 } 780 return false; 781 } 782 783 static void gen_commit_hvx(DisasContext *ctx) 784 { 785 int i; 786 787 /* Early exit if not needed */ 788 if (!ctx->need_commit) { 789 g_assert(!pkt_has_hvx_store(ctx->pkt)); 790 return; 791 } 792 793 /* 794 * for (i = 0; i < ctx->vreg_log_idx; i++) { 795 * int rnum = ctx->vreg_log[i]; 796 * env->VRegs[rnum] = env->future_VRegs[rnum]; 797 * } 798 */ 799 for (i = 0; i < ctx->vreg_log_idx; i++) { 800 int rnum = ctx->vreg_log[i]; 801 intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]); 802 intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false); 803 size_t size = sizeof(MMVector); 804 805 tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); 806 } 807 808 /* 809 * for (i = 0; i < ctx->qreg_log_idx; i++) { 810 * int rnum = ctx->qreg_log[i]; 811 * env->QRegs[rnum] = env->future_QRegs[rnum]; 812 * } 813 */ 814 for (i = 0; i < ctx->qreg_log_idx; i++) { 815 int rnum = ctx->qreg_log[i]; 816 intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]); 817 intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]); 818 size_t size = sizeof(MMQReg); 819 820 tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); 821 } 822 823 if (pkt_has_hvx_store(ctx->pkt)) { 824 gen_helper_commit_hvx_stores(tcg_env); 825 } 826 } 827 828 static void update_exec_counters(DisasContext *ctx) 829 { 830 Packet *pkt = ctx->pkt; 831 int num_insns = pkt->num_insns; 832 int num_real_insns = 0; 833 int num_hvx_insns = 0; 834 835 for (int i = 0; i < num_insns; i++) { 836 if (!pkt->insn[i].is_endloop && 837 !pkt->insn[i].part1 && 838 !GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) { 839 num_real_insns++; 840 } 841 if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) { 842 num_hvx_insns++; 843 } 844 } 845 846 ctx->num_packets++; 847 ctx->num_insns += num_real_insns; 848 ctx->num_hvx_insns += num_hvx_insns; 849 } 850 851 static void gen_commit_packet(DisasContext *ctx) 852 { 853 /* 854 * If there is more than one store in a packet, make sure they are all OK 855 * before proceeding with the rest of the packet commit. 856 * 857 * dczeroa has to be the only store operation in the packet, so we go 858 * ahead and process that first. 859 * 860 * When there is an HVX store, there can also be a scalar store in either 861 * slot 0 or slot1, so we create a mask for the helper to indicate what 862 * work to do. 863 * 864 * When there are two scalar stores, we probe the one in slot 0. 865 * 866 * Note that we don't call the probe helper for packets with only one 867 * store. Therefore, we call process_store_log before anything else 868 * involved in committing the packet. 869 */ 870 Packet *pkt = ctx->pkt; 871 bool has_store_s0 = pkt->pkt_has_store_s0; 872 bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed); 873 bool has_hvx_store = pkt_has_hvx_store(pkt); 874 if (pkt->pkt_has_dczeroa) { 875 /* 876 * The dczeroa will be the store in slot 0, check that we don't have 877 * a store in slot 1 or an HVX store. 878 */ 879 g_assert(!has_store_s1 && !has_hvx_store); 880 process_dczeroa(ctx); 881 } else if (has_hvx_store) { 882 if (!has_store_s0 && !has_store_s1) { 883 TCGv mem_idx = tcg_constant_tl(ctx->mem_idx); 884 gen_helper_probe_hvx_stores(tcg_env, mem_idx); 885 } else { 886 int mask = 0; 887 888 if (has_store_s0) { 889 mask = 890 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 1); 891 } 892 if (has_store_s1) { 893 mask = 894 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1); 895 } 896 if (has_hvx_store) { 897 mask = 898 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, 899 HAS_HVX_STORES, 1); 900 } 901 if (has_store_s0 && slot_is_predicated(pkt, 0)) { 902 mask = 903 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, 904 S0_IS_PRED, 1); 905 } 906 if (has_store_s1 && slot_is_predicated(pkt, 1)) { 907 mask = 908 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, 909 S1_IS_PRED, 1); 910 } 911 mask = FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, MMU_IDX, 912 ctx->mem_idx); 913 gen_helper_probe_pkt_scalar_hvx_stores(tcg_env, 914 tcg_constant_tl(mask)); 915 } 916 } else if (has_store_s0 && has_store_s1) { 917 /* 918 * process_store_log will execute the slot 1 store first, 919 * so we only have to probe the store in slot 0 920 */ 921 int args = 0; 922 args = 923 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, ctx->mem_idx); 924 if (slot_is_predicated(pkt, 0)) { 925 args = 926 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 1); 927 } 928 TCGv args_tcgv = tcg_constant_tl(args); 929 gen_helper_probe_pkt_scalar_store_s0(tcg_env, args_tcgv); 930 } 931 932 process_store_log(ctx); 933 934 gen_reg_writes(ctx); 935 gen_pred_writes(ctx); 936 if (pkt->pkt_has_hvx) { 937 gen_commit_hvx(ctx); 938 } 939 update_exec_counters(ctx); 940 if (HEX_DEBUG) { 941 TCGv has_st0 = 942 tcg_constant_tl(pkt->pkt_has_store_s0 && !pkt->pkt_has_dczeroa); 943 TCGv has_st1 = 944 tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa); 945 946 /* Handy place to set a breakpoint at the end of execution */ 947 gen_helper_debug_commit_end(tcg_env, tcg_constant_tl(ctx->pkt->pc), 948 ctx->pred_written, has_st0, has_st1); 949 } 950 951 if (pkt->vhist_insn != NULL) { 952 ctx->pre_commit = false; 953 ctx->insn = pkt->vhist_insn; 954 pkt->vhist_insn->generate(ctx); 955 } 956 957 if (pkt->pkt_has_cof) { 958 gen_end_tb(ctx); 959 } 960 } 961 962 static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx) 963 { 964 uint32_t words[PACKET_WORDS_MAX]; 965 int nwords; 966 Packet pkt; 967 int i; 968 969 nwords = read_packet_words(env, ctx, words); 970 if (!nwords) { 971 gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET); 972 return; 973 } 974 975 ctx->pkt = &pkt; 976 if (decode_packet(ctx, nwords, words, &pkt, false) > 0) { 977 pkt.pc = ctx->base.pc_next; 978 HEX_DEBUG_PRINT_PKT(&pkt); 979 gen_start_packet(ctx); 980 for (i = 0; i < pkt.num_insns; i++) { 981 ctx->insn = &pkt.insn[i]; 982 gen_insn(ctx); 983 } 984 gen_commit_packet(ctx); 985 ctx->base.pc_next += pkt.encod_pkt_size_in_bytes; 986 } else { 987 gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET); 988 } 989 } 990 991 static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, 992 CPUState *cs) 993 { 994 DisasContext *ctx = container_of(dcbase, DisasContext, base); 995 HexagonCPU *hex_cpu = env_archcpu(cpu_env(cs)); 996 uint32_t hex_flags = dcbase->tb->flags; 997 998 ctx->mem_idx = MMU_USER_IDX; 999 ctx->num_packets = 0; 1000 ctx->num_insns = 0; 1001 ctx->num_hvx_insns = 0; 1002 ctx->branch_cond = TCG_COND_NEVER; 1003 ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP); 1004 ctx->short_circuit = hex_cpu->short_circuit; 1005 } 1006 1007 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) 1008 { 1009 } 1010 1011 static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 1012 { 1013 DisasContext *ctx = container_of(dcbase, DisasContext, base); 1014 1015 tcg_gen_insn_start(ctx->base.pc_next); 1016 } 1017 1018 static bool pkt_crosses_page(CPUHexagonState *env, DisasContext *ctx) 1019 { 1020 target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK; 1021 bool found_end = false; 1022 int nwords; 1023 1024 for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) { 1025 uint32_t word = translator_ldl(env, &ctx->base, 1026 ctx->base.pc_next + nwords * sizeof(uint32_t)); 1027 found_end = is_packet_end(word); 1028 } 1029 uint32_t next_ptr = ctx->base.pc_next + nwords * sizeof(uint32_t); 1030 return found_end && next_ptr - page_start >= TARGET_PAGE_SIZE; 1031 } 1032 1033 static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu) 1034 { 1035 DisasContext *ctx = container_of(dcbase, DisasContext, base); 1036 CPUHexagonState *env = cpu_env(cpu); 1037 1038 decode_and_translate_packet(env, ctx); 1039 1040 if (ctx->base.is_jmp == DISAS_NEXT) { 1041 target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK; 1042 target_ulong bytes_max = PACKET_WORDS_MAX * sizeof(target_ulong); 1043 1044 if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE || 1045 (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE - bytes_max && 1046 pkt_crosses_page(env, ctx))) { 1047 ctx->base.is_jmp = DISAS_TOO_MANY; 1048 } 1049 1050 /* 1051 * The CPU log is used to compare against LLDB single stepping, 1052 * so end the TLB after every packet. 1053 */ 1054 HexagonCPU *hex_cpu = env_archcpu(env); 1055 if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) { 1056 ctx->base.is_jmp = DISAS_TOO_MANY; 1057 } 1058 } 1059 } 1060 1061 static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 1062 { 1063 DisasContext *ctx = container_of(dcbase, DisasContext, base); 1064 1065 switch (ctx->base.is_jmp) { 1066 case DISAS_TOO_MANY: 1067 gen_exec_counters(ctx); 1068 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next); 1069 tcg_gen_exit_tb(NULL, 0); 1070 break; 1071 case DISAS_NORETURN: 1072 break; 1073 default: 1074 g_assert_not_reached(); 1075 } 1076 } 1077 1078 static const TranslatorOps hexagon_tr_ops = { 1079 .init_disas_context = hexagon_tr_init_disas_context, 1080 .tb_start = hexagon_tr_tb_start, 1081 .insn_start = hexagon_tr_insn_start, 1082 .translate_insn = hexagon_tr_translate_packet, 1083 .tb_stop = hexagon_tr_tb_stop, 1084 }; 1085 1086 void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, 1087 vaddr pc, void *host_pc) 1088 { 1089 DisasContext ctx; 1090 1091 translator_loop(cs, tb, max_insns, pc, host_pc, 1092 &hexagon_tr_ops, &ctx.base); 1093 } 1094 1095 #define NAME_LEN 64 1096 static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; 1097 static char store_addr_names[STORES_MAX][NAME_LEN]; 1098 static char store_width_names[STORES_MAX][NAME_LEN]; 1099 static char store_val32_names[STORES_MAX][NAME_LEN]; 1100 static char store_val64_names[STORES_MAX][NAME_LEN]; 1101 static char vstore_addr_names[VSTORES_MAX][NAME_LEN]; 1102 static char vstore_size_names[VSTORES_MAX][NAME_LEN]; 1103 static char vstore_pending_names[VSTORES_MAX][NAME_LEN]; 1104 1105 void hexagon_translate_init(void) 1106 { 1107 int i; 1108 1109 opcode_init(); 1110 1111 for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { 1112 hex_gpr[i] = tcg_global_mem_new(tcg_env, 1113 offsetof(CPUHexagonState, gpr[i]), 1114 hexagon_regnames[i]); 1115 1116 if (HEX_DEBUG) { 1117 snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s", 1118 hexagon_regnames[i]); 1119 hex_reg_written[i] = tcg_global_mem_new(tcg_env, 1120 offsetof(CPUHexagonState, reg_written[i]), 1121 reg_written_names[i]); 1122 } 1123 } 1124 hex_new_value_usr = tcg_global_mem_new(tcg_env, 1125 offsetof(CPUHexagonState, new_value_usr), "new_value_usr"); 1126 1127 for (i = 0; i < NUM_PREGS; i++) { 1128 hex_pred[i] = tcg_global_mem_new(tcg_env, 1129 offsetof(CPUHexagonState, pred[i]), 1130 hexagon_prednames[i]); 1131 } 1132 hex_slot_cancelled = tcg_global_mem_new(tcg_env, 1133 offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled"); 1134 hex_llsc_addr = tcg_global_mem_new(tcg_env, 1135 offsetof(CPUHexagonState, llsc_addr), "llsc_addr"); 1136 hex_llsc_val = tcg_global_mem_new(tcg_env, 1137 offsetof(CPUHexagonState, llsc_val), "llsc_val"); 1138 hex_llsc_val_i64 = tcg_global_mem_new_i64(tcg_env, 1139 offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64"); 1140 for (i = 0; i < STORES_MAX; i++) { 1141 snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i); 1142 hex_store_addr[i] = tcg_global_mem_new(tcg_env, 1143 offsetof(CPUHexagonState, mem_log_stores[i].va), 1144 store_addr_names[i]); 1145 1146 snprintf(store_width_names[i], NAME_LEN, "store_width_%d", i); 1147 hex_store_width[i] = tcg_global_mem_new(tcg_env, 1148 offsetof(CPUHexagonState, mem_log_stores[i].width), 1149 store_width_names[i]); 1150 1151 snprintf(store_val32_names[i], NAME_LEN, "store_val32_%d", i); 1152 hex_store_val32[i] = tcg_global_mem_new(tcg_env, 1153 offsetof(CPUHexagonState, mem_log_stores[i].data32), 1154 store_val32_names[i]); 1155 1156 snprintf(store_val64_names[i], NAME_LEN, "store_val64_%d", i); 1157 hex_store_val64[i] = tcg_global_mem_new_i64(tcg_env, 1158 offsetof(CPUHexagonState, mem_log_stores[i].data64), 1159 store_val64_names[i]); 1160 } 1161 for (i = 0; i < VSTORES_MAX; i++) { 1162 snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i); 1163 hex_vstore_addr[i] = tcg_global_mem_new(tcg_env, 1164 offsetof(CPUHexagonState, vstore[i].va), 1165 vstore_addr_names[i]); 1166 1167 snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i); 1168 hex_vstore_size[i] = tcg_global_mem_new(tcg_env, 1169 offsetof(CPUHexagonState, vstore[i].size), 1170 vstore_size_names[i]); 1171 1172 snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i); 1173 hex_vstore_pending[i] = tcg_global_mem_new(tcg_env, 1174 offsetof(CPUHexagonState, vstore_pending[i]), 1175 vstore_pending_names[i]); 1176 } 1177 } 1178