1 /* 2 * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #define QEMU_GENERATE 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/helper-gen.h" 24 #include "exec/helper-proto.h" 25 #include "exec/translation-block.h" 26 #include "exec/cpu_ldst.h" 27 #include "exec/log.h" 28 #include "exec/cpu_ldst.h" 29 #include "internal.h" 30 #include "attribs.h" 31 #include "insn.h" 32 #include "decode.h" 33 #include "translate.h" 34 #include "genptr.h" 35 #include "printinsn.h" 36 37 #define HELPER_H "helper.h" 38 #include "exec/helper-info.c.inc" 39 #undef HELPER_H 40 41 #include "analyze_funcs_generated.c.inc" 42 43 typedef void (*AnalyzeInsn)(DisasContext *ctx); 44 static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = { 45 #define OPCODE(X) [X] = analyze_##X 46 #include "opcodes_def_generated.h.inc" 47 #undef OPCODE 48 }; 49 50 TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; 51 TCGv hex_pred[NUM_PREGS]; 52 TCGv hex_slot_cancelled; 53 TCGv hex_new_value_usr; 54 TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; 55 TCGv hex_store_addr[STORES_MAX]; 56 TCGv hex_store_width[STORES_MAX]; 57 TCGv hex_store_val32[STORES_MAX]; 58 TCGv_i64 hex_store_val64[STORES_MAX]; 59 TCGv hex_llsc_addr; 60 TCGv hex_llsc_val; 61 TCGv_i64 hex_llsc_val_i64; 62 TCGv hex_vstore_addr[VSTORES_MAX]; 63 TCGv hex_vstore_size[VSTORES_MAX]; 64 TCGv hex_vstore_pending[VSTORES_MAX]; 65 66 static const char * const hexagon_prednames[] = { 67 "p0", "p1", "p2", "p3" 68 }; 69 70 intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum, 71 int num, bool alloc_ok) 72 { 73 intptr_t offset; 74 75 if (!ctx->need_commit) { 76 return offsetof(CPUHexagonState, VRegs[regnum]); 77 } 78 79 /* See if it is already allocated */ 80 for (int i = 0; i < ctx->future_vregs_idx; i++) { 81 if (ctx->future_vregs_num[i] == regnum) { 82 return offsetof(CPUHexagonState, future_VRegs[i]); 83 } 84 } 85 86 g_assert(alloc_ok); 87 offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]); 88 for (int i = 0; i < num; i++) { 89 ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++; 90 } 91 ctx->future_vregs_idx += num; 92 g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX); 93 return offset; 94 } 95 96 intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum, 97 int num, bool alloc_ok) 98 { 99 intptr_t offset; 100 101 /* See if it is already allocated */ 102 for (int i = 0; i < ctx->tmp_vregs_idx; i++) { 103 if (ctx->tmp_vregs_num[i] == regnum) { 104 return offsetof(CPUHexagonState, tmp_VRegs[i]); 105 } 106 } 107 108 g_assert(alloc_ok); 109 offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]); 110 for (int i = 0; i < num; i++) { 111 ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++; 112 } 113 ctx->tmp_vregs_idx += num; 114 g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX); 115 return offset; 116 } 117 118 static void gen_exception_raw(int excp) 119 { 120 gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp)); 121 } 122 123 static void gen_exec_counters(DisasContext *ctx) 124 { 125 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT], 126 hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets); 127 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT], 128 hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns); 129 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT], 130 hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns); 131 } 132 133 static bool use_goto_tb(DisasContext *ctx, target_ulong dest) 134 { 135 return translator_use_goto_tb(&ctx->base, dest); 136 } 137 138 static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest, bool 139 move_to_pc) 140 { 141 if (use_goto_tb(ctx, dest)) { 142 tcg_gen_goto_tb(idx); 143 if (move_to_pc) { 144 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest); 145 } 146 tcg_gen_exit_tb(ctx->base.tb, idx); 147 } else { 148 if (move_to_pc) { 149 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest); 150 } 151 tcg_gen_lookup_and_goto_ptr(); 152 } 153 } 154 155 static void gen_end_tb(DisasContext *ctx) 156 { 157 Packet *pkt = ctx->pkt; 158 159 gen_exec_counters(ctx); 160 161 if (ctx->branch_cond != TCG_COND_NEVER) { 162 if (ctx->branch_cond != TCG_COND_ALWAYS) { 163 TCGLabel *skip = gen_new_label(); 164 tcg_gen_brcondi_tl(ctx->branch_cond, ctx->branch_taken, 0, skip); 165 gen_goto_tb(ctx, 0, ctx->branch_dest, true); 166 gen_set_label(skip); 167 gen_goto_tb(ctx, 1, ctx->next_PC, false); 168 } else { 169 gen_goto_tb(ctx, 0, ctx->branch_dest, true); 170 } 171 } else if (ctx->is_tight_loop && 172 pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) { 173 /* 174 * When we're in a tight loop, we defer the endloop0 processing 175 * to take advantage of direct block chaining 176 */ 177 TCGLabel *skip = gen_new_label(); 178 tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip); 179 tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1); 180 gen_goto_tb(ctx, 0, ctx->base.tb->pc, true); 181 gen_set_label(skip); 182 gen_goto_tb(ctx, 1, ctx->next_PC, false); 183 } else { 184 tcg_gen_lookup_and_goto_ptr(); 185 } 186 187 ctx->base.is_jmp = DISAS_NORETURN; 188 } 189 190 static void gen_exception_end_tb(DisasContext *ctx, int excp) 191 { 192 gen_exec_counters(ctx); 193 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC); 194 gen_exception_raw(excp); 195 ctx->base.is_jmp = DISAS_NORETURN; 196 197 } 198 199 #define PACKET_BUFFER_LEN 1028 200 static void print_pkt(Packet *pkt) 201 { 202 GString *buf = g_string_sized_new(PACKET_BUFFER_LEN); 203 snprint_a_pkt_debug(buf, pkt); 204 HEX_DEBUG_LOG("%s", buf->str); 205 g_string_free(buf, true); 206 } 207 #define HEX_DEBUG_PRINT_PKT(pkt) \ 208 do { \ 209 if (HEX_DEBUG) { \ 210 print_pkt(pkt); \ 211 } \ 212 } while (0) 213 214 static int read_packet_words(CPUHexagonState *env, DisasContext *ctx, 215 uint32_t words[]) 216 { 217 bool found_end = false; 218 int nwords, max_words; 219 220 memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t)); 221 for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) { 222 words[nwords] = 223 translator_ldl(env, &ctx->base, 224 ctx->base.pc_next + nwords * sizeof(uint32_t)); 225 found_end = is_packet_end(words[nwords]); 226 } 227 if (!found_end) { 228 /* Read too many words without finding the end */ 229 return 0; 230 } 231 232 /* Check for page boundary crossing */ 233 max_words = -(ctx->base.pc_next | TARGET_PAGE_MASK) / sizeof(uint32_t); 234 if (nwords > max_words) { 235 /* We can only cross a page boundary at the beginning of a TB */ 236 g_assert(ctx->base.num_insns == 1); 237 } 238 239 HEX_DEBUG_LOG("decode_packet: pc = 0x%" VADDR_PRIx "\n", 240 ctx->base.pc_next); 241 HEX_DEBUG_LOG(" words = { "); 242 for (int i = 0; i < nwords; i++) { 243 HEX_DEBUG_LOG("0x%x, ", words[i]); 244 } 245 HEX_DEBUG_LOG("}\n"); 246 247 return nwords; 248 } 249 250 static bool check_for_attrib(Packet *pkt, int attrib) 251 { 252 for (int i = 0; i < pkt->num_insns; i++) { 253 if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) { 254 return true; 255 } 256 } 257 return false; 258 } 259 260 static bool need_slot_cancelled(Packet *pkt) 261 { 262 /* We only need slot_cancelled for conditional store instructions */ 263 for (int i = 0; i < pkt->num_insns; i++) { 264 uint16_t opcode = pkt->insn[i].opcode; 265 if (GET_ATTRIB(opcode, A_CONDEXEC) && 266 GET_ATTRIB(opcode, A_SCALAR_STORE)) { 267 return true; 268 } 269 } 270 return false; 271 } 272 273 static bool need_next_PC(DisasContext *ctx) 274 { 275 Packet *pkt = ctx->pkt; 276 277 /* Check for conditional control flow or HW loop end */ 278 for (int i = 0; i < pkt->num_insns; i++) { 279 uint16_t opcode = pkt->insn[i].opcode; 280 if (GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) { 281 return true; 282 } 283 if (GET_ATTRIB(opcode, A_HWLOOP0_END) || 284 GET_ATTRIB(opcode, A_HWLOOP1_END)) { 285 return true; 286 } 287 } 288 return false; 289 } 290 291 /* 292 * The opcode_analyze functions mark most of the writes in a packet 293 * However, there are some implicit writes marked as attributes 294 * of the applicable instructions. 295 */ 296 static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum) 297 { 298 uint16_t opcode = ctx->insn->opcode; 299 if (GET_ATTRIB(opcode, attrib)) { 300 /* 301 * USR is used to set overflow and FP exceptions, 302 * so treat it as conditional 303 */ 304 bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) || 305 rnum == HEX_REG_USR; 306 307 /* LC0/LC1 is conditionally written by endloop instructions */ 308 if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) && 309 (opcode == J2_endloop0 || 310 opcode == J2_endloop1 || 311 opcode == J2_endloop01)) { 312 is_predicated = true; 313 } 314 315 ctx_log_reg_write(ctx, rnum, is_predicated); 316 } 317 } 318 319 static void mark_implicit_reg_writes(DisasContext *ctx) 320 { 321 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP, HEX_REG_FP); 322 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP, HEX_REG_SP); 323 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR, HEX_REG_LR); 324 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0); 325 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0); 326 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1); 327 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1); 328 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR); 329 mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR); 330 } 331 332 static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum) 333 { 334 if (GET_ATTRIB(ctx->insn->opcode, attrib)) { 335 ctx_log_pred_write(ctx, pnum); 336 } 337 } 338 339 static void mark_implicit_pred_writes(DisasContext *ctx) 340 { 341 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0); 342 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1); 343 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2); 344 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3); 345 } 346 347 static bool pkt_raises_exception(Packet *pkt) 348 { 349 if (check_for_attrib(pkt, A_LOAD) || 350 check_for_attrib(pkt, A_STORE)) { 351 return true; 352 } 353 return false; 354 } 355 356 static bool need_commit(DisasContext *ctx) 357 { 358 Packet *pkt = ctx->pkt; 359 360 /* 361 * If the short-circuit property is set to false, we'll always do the commit 362 */ 363 if (!ctx->short_circuit) { 364 return true; 365 } 366 367 if (pkt_raises_exception(pkt)) { 368 return true; 369 } 370 371 /* Registers with immutability flags require new_value */ 372 for (int i = 0; i < ctx->reg_log_idx; i++) { 373 int rnum = ctx->reg_log[i]; 374 if (reg_immut_masks[rnum]) { 375 return true; 376 } 377 } 378 379 /* Floating point instructions are hard-coded to use new_value */ 380 if (check_for_attrib(pkt, A_FPOP)) { 381 return true; 382 } 383 384 if (ctx->read_after_write || ctx->has_hvx_overlap) { 385 return true; 386 } 387 388 return false; 389 } 390 391 static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum) 392 { 393 if (GET_ATTRIB(ctx->insn->opcode, attrib)) { 394 ctx_log_pred_read(ctx, pnum); 395 } 396 } 397 398 static void mark_implicit_pred_reads(DisasContext *ctx) 399 { 400 mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P0, 0); 401 mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P1, 1); 402 mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 2); 403 mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 3); 404 } 405 406 static void analyze_packet(DisasContext *ctx) 407 { 408 Packet *pkt = ctx->pkt; 409 ctx->read_after_write = false; 410 ctx->has_hvx_overlap = false; 411 for (int i = 0; i < pkt->num_insns; i++) { 412 Insn *insn = &pkt->insn[i]; 413 ctx->insn = insn; 414 if (opcode_analyze[insn->opcode]) { 415 opcode_analyze[insn->opcode](ctx); 416 } 417 mark_implicit_reg_writes(ctx); 418 mark_implicit_pred_writes(ctx); 419 mark_implicit_pred_reads(ctx); 420 } 421 422 ctx->need_commit = need_commit(ctx); 423 } 424 425 static void gen_start_packet(DisasContext *ctx) 426 { 427 Packet *pkt = ctx->pkt; 428 target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes; 429 int i; 430 431 /* Clear out the disassembly context */ 432 ctx->next_PC = next_PC; 433 ctx->reg_log_idx = 0; 434 bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS); 435 bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); 436 ctx->preg_log_idx = 0; 437 bitmap_zero(ctx->pregs_written, NUM_PREGS); 438 ctx->future_vregs_idx = 0; 439 ctx->tmp_vregs_idx = 0; 440 ctx->vreg_log_idx = 0; 441 bitmap_zero(ctx->vregs_written, NUM_VREGS); 442 bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS); 443 bitmap_zero(ctx->vregs_updated, NUM_VREGS); 444 bitmap_zero(ctx->vregs_select, NUM_VREGS); 445 bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS); 446 bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS); 447 bitmap_zero(ctx->qregs_written, NUM_QREGS); 448 ctx->qreg_log_idx = 0; 449 for (i = 0; i < STORES_MAX; i++) { 450 ctx->store_width[i] = 0; 451 } 452 ctx->s1_store_processed = false; 453 ctx->pre_commit = true; 454 for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { 455 ctx->new_value[i] = NULL; 456 } 457 for (i = 0; i < NUM_PREGS; i++) { 458 ctx->new_pred_value[i] = NULL; 459 } 460 461 analyze_packet(ctx); 462 463 /* 464 * pregs_written is used both in the analyze phase as well as the code 465 * gen phase, so clear it again. 466 */ 467 bitmap_zero(ctx->pregs_written, NUM_PREGS); 468 469 if (HEX_DEBUG) { 470 /* Handy place to set a breakpoint before the packet executes */ 471 gen_helper_debug_start_packet(tcg_env); 472 } 473 474 /* Initialize the runtime state for packet semantics */ 475 if (need_slot_cancelled(pkt)) { 476 tcg_gen_movi_tl(hex_slot_cancelled, 0); 477 } 478 ctx->branch_taken = NULL; 479 if (pkt->pkt_has_cof) { 480 ctx->branch_taken = tcg_temp_new(); 481 if (pkt->pkt_has_multi_cof) { 482 tcg_gen_movi_tl(ctx->branch_taken, 0); 483 } 484 if (need_next_PC(ctx)) { 485 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC); 486 } 487 } 488 if (HEX_DEBUG) { 489 ctx->pred_written = tcg_temp_new(); 490 tcg_gen_movi_tl(ctx->pred_written, 0); 491 } 492 493 /* Preload the predicated registers into get_result_gpr(ctx, i) */ 494 if (ctx->need_commit && 495 !bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { 496 i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); 497 while (i < TOTAL_PER_THREAD_REGS) { 498 tcg_gen_mov_tl(get_result_gpr(ctx, i), hex_gpr[i]); 499 i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS, 500 i + 1); 501 } 502 } 503 504 /* 505 * Preload the predicated pred registers into ctx->new_pred_value[pred_num] 506 * Only endloop instructions conditionally write to pred registers 507 */ 508 if (ctx->need_commit && pkt->pkt_has_endloop) { 509 for (i = 0; i < ctx->preg_log_idx; i++) { 510 int pred_num = ctx->preg_log[i]; 511 ctx->new_pred_value[pred_num] = tcg_temp_new(); 512 tcg_gen_mov_tl(ctx->new_pred_value[pred_num], hex_pred[pred_num]); 513 } 514 } 515 516 /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */ 517 if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) { 518 i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS); 519 while (i < NUM_VREGS) { 520 const intptr_t VdV_off = 521 ctx_future_vreg_off(ctx, i, 1, true); 522 intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]); 523 tcg_gen_gvec_mov(MO_64, VdV_off, 524 src_off, 525 sizeof(MMVector), 526 sizeof(MMVector)); 527 i = find_next_bit(ctx->predicated_future_vregs, NUM_VREGS, i + 1); 528 } 529 } 530 if (!bitmap_empty(ctx->predicated_tmp_vregs, NUM_VREGS)) { 531 i = find_first_bit(ctx->predicated_tmp_vregs, NUM_VREGS); 532 while (i < NUM_VREGS) { 533 const intptr_t VdV_off = 534 ctx_tmp_vreg_off(ctx, i, 1, true); 535 intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]); 536 tcg_gen_gvec_mov(MO_64, VdV_off, 537 src_off, 538 sizeof(MMVector), 539 sizeof(MMVector)); 540 i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1); 541 } 542 } 543 } 544 545 bool is_gather_store_insn(DisasContext *ctx) 546 { 547 Packet *pkt = ctx->pkt; 548 Insn *insn = ctx->insn; 549 if (GET_ATTRIB(insn->opcode, A_CVI_NEW) && 550 insn->new_value_producer_slot == 1) { 551 /* Look for gather instruction */ 552 for (int i = 0; i < pkt->num_insns; i++) { 553 Insn *in = &pkt->insn[i]; 554 if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) { 555 return true; 556 } 557 } 558 } 559 return false; 560 } 561 562 static void mark_store_width(DisasContext *ctx) 563 { 564 uint16_t opcode = ctx->insn->opcode; 565 uint32_t slot = ctx->insn->slot; 566 uint8_t width = 0; 567 568 if (GET_ATTRIB(opcode, A_SCALAR_STORE)) { 569 if (GET_ATTRIB(opcode, A_MEMSIZE_0B)) { 570 return; 571 } 572 if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) { 573 width |= 1; 574 } 575 if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) { 576 width |= 2; 577 } 578 if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) { 579 width |= 4; 580 } 581 if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) { 582 width |= 8; 583 } 584 tcg_debug_assert(is_power_of_2(width)); 585 ctx->store_width[slot] = width; 586 } 587 } 588 589 static void gen_insn(DisasContext *ctx) 590 { 591 if (ctx->insn->generate) { 592 ctx->insn->generate(ctx); 593 mark_store_width(ctx); 594 } else { 595 gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE); 596 } 597 } 598 599 /* 600 * Helpers for generating the packet commit 601 */ 602 static void gen_reg_writes(DisasContext *ctx) 603 { 604 int i; 605 606 /* Early exit if not needed */ 607 if (!ctx->need_commit) { 608 return; 609 } 610 611 for (i = 0; i < ctx->reg_log_idx; i++) { 612 int reg_num = ctx->reg_log[i]; 613 614 tcg_gen_mov_tl(hex_gpr[reg_num], get_result_gpr(ctx, reg_num)); 615 616 /* 617 * ctx->is_tight_loop is set when SA0 points to the beginning of the TB. 618 * If we write to SA0, we have to turn off tight loop handling. 619 */ 620 if (reg_num == HEX_REG_SA0) { 621 ctx->is_tight_loop = false; 622 } 623 } 624 } 625 626 static void gen_pred_writes(DisasContext *ctx) 627 { 628 /* Early exit if not needed or the log is empty */ 629 if (!ctx->need_commit || !ctx->preg_log_idx) { 630 return; 631 } 632 633 for (int i = 0; i < ctx->preg_log_idx; i++) { 634 int pred_num = ctx->preg_log[i]; 635 tcg_gen_mov_tl(hex_pred[pred_num], ctx->new_pred_value[pred_num]); 636 } 637 } 638 639 static void gen_check_store_width(DisasContext *ctx, int slot_num) 640 { 641 if (HEX_DEBUG) { 642 TCGv slot = tcg_constant_tl(slot_num); 643 TCGv check = tcg_constant_tl(ctx->store_width[slot_num]); 644 gen_helper_debug_check_store_width(tcg_env, slot, check); 645 } 646 } 647 648 static bool slot_is_predicated(Packet *pkt, int slot_num) 649 { 650 for (int i = 0; i < pkt->num_insns; i++) { 651 if (pkt->insn[i].slot == slot_num) { 652 return GET_ATTRIB(pkt->insn[i].opcode, A_CONDEXEC); 653 } 654 } 655 /* If we get to here, we didn't find an instruction in the requested slot */ 656 g_assert_not_reached(); 657 } 658 659 void process_store(DisasContext *ctx, int slot_num) 660 { 661 bool is_predicated = slot_is_predicated(ctx->pkt, slot_num); 662 TCGLabel *label_end = NULL; 663 664 /* 665 * We may have already processed this store 666 * See CHECK_NOSHUF in macros.h 667 */ 668 if (slot_num == 1 && ctx->s1_store_processed) { 669 return; 670 } 671 ctx->s1_store_processed = true; 672 673 if (is_predicated) { 674 TCGv cancelled = tcg_temp_new(); 675 label_end = gen_new_label(); 676 677 /* Don't do anything if the slot was cancelled */ 678 tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1); 679 tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end); 680 } 681 { 682 TCGv address = tcg_temp_new(); 683 tcg_gen_mov_tl(address, hex_store_addr[slot_num]); 684 685 /* 686 * If we know the width from the DisasContext, we can 687 * generate much cleaner code. 688 * Unfortunately, not all instructions execute the fSTORE 689 * macro during code generation. Anything that uses the 690 * generic helper will have this problem. Instructions 691 * that use fWRAP to generate proper TCG code will be OK. 692 */ 693 switch (ctx->store_width[slot_num]) { 694 case 1: 695 gen_check_store_width(ctx, slot_num); 696 tcg_gen_qemu_st_tl(hex_store_val32[slot_num], 697 hex_store_addr[slot_num], 698 ctx->mem_idx, MO_UB); 699 break; 700 case 2: 701 gen_check_store_width(ctx, slot_num); 702 tcg_gen_qemu_st_tl(hex_store_val32[slot_num], 703 hex_store_addr[slot_num], 704 ctx->mem_idx, MO_TEUW); 705 break; 706 case 4: 707 gen_check_store_width(ctx, slot_num); 708 tcg_gen_qemu_st_tl(hex_store_val32[slot_num], 709 hex_store_addr[slot_num], 710 ctx->mem_idx, MO_TEUL); 711 break; 712 case 8: 713 gen_check_store_width(ctx, slot_num); 714 tcg_gen_qemu_st_i64(hex_store_val64[slot_num], 715 hex_store_addr[slot_num], 716 ctx->mem_idx, MO_TEUQ); 717 break; 718 default: 719 { 720 /* 721 * If we get to here, we don't know the width at 722 * TCG generation time, we'll use a helper to 723 * avoid branching based on the width at runtime. 724 */ 725 TCGv slot = tcg_constant_tl(slot_num); 726 gen_helper_commit_store(tcg_env, slot); 727 } 728 } 729 } 730 if (is_predicated) { 731 gen_set_label(label_end); 732 } 733 } 734 735 static void process_store_log(DisasContext *ctx) 736 { 737 /* 738 * When a packet has two stores, the hardware processes 739 * slot 1 and then slot 0. This will be important when 740 * the memory accesses overlap. 741 */ 742 Packet *pkt = ctx->pkt; 743 if (pkt->pkt_has_store_s1) { 744 g_assert(!pkt->pkt_has_dczeroa); 745 process_store(ctx, 1); 746 } 747 if (pkt->pkt_has_store_s0) { 748 g_assert(!pkt->pkt_has_dczeroa); 749 process_store(ctx, 0); 750 } 751 } 752 753 /* Zero out a 32-bit cache line */ 754 static void process_dczeroa(DisasContext *ctx) 755 { 756 if (ctx->pkt->pkt_has_dczeroa) { 757 /* Store 32 bytes of zero starting at (addr & ~0x1f) */ 758 TCGv addr = tcg_temp_new(); 759 TCGv_i64 zero = tcg_constant_i64(0); 760 761 tcg_gen_andi_tl(addr, ctx->dczero_addr, ~0x1f); 762 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); 763 tcg_gen_addi_tl(addr, addr, 8); 764 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); 765 tcg_gen_addi_tl(addr, addr, 8); 766 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); 767 tcg_gen_addi_tl(addr, addr, 8); 768 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); 769 } 770 } 771 772 static bool pkt_has_hvx_store(Packet *pkt) 773 { 774 int i; 775 for (i = 0; i < pkt->num_insns; i++) { 776 int opcode = pkt->insn[i].opcode; 777 if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) { 778 return true; 779 } 780 } 781 return false; 782 } 783 784 static void gen_commit_hvx(DisasContext *ctx) 785 { 786 int i; 787 788 /* Early exit if not needed */ 789 if (!ctx->need_commit) { 790 g_assert(!pkt_has_hvx_store(ctx->pkt)); 791 return; 792 } 793 794 /* 795 * for (i = 0; i < ctx->vreg_log_idx; i++) { 796 * int rnum = ctx->vreg_log[i]; 797 * env->VRegs[rnum] = env->future_VRegs[rnum]; 798 * } 799 */ 800 for (i = 0; i < ctx->vreg_log_idx; i++) { 801 int rnum = ctx->vreg_log[i]; 802 intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]); 803 intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false); 804 size_t size = sizeof(MMVector); 805 806 tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); 807 } 808 809 /* 810 * for (i = 0; i < ctx->qreg_log_idx; i++) { 811 * int rnum = ctx->qreg_log[i]; 812 * env->QRegs[rnum] = env->future_QRegs[rnum]; 813 * } 814 */ 815 for (i = 0; i < ctx->qreg_log_idx; i++) { 816 int rnum = ctx->qreg_log[i]; 817 intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]); 818 intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]); 819 size_t size = sizeof(MMQReg); 820 821 tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); 822 } 823 824 if (pkt_has_hvx_store(ctx->pkt)) { 825 gen_helper_commit_hvx_stores(tcg_env); 826 } 827 } 828 829 static void update_exec_counters(DisasContext *ctx) 830 { 831 Packet *pkt = ctx->pkt; 832 int num_insns = pkt->num_insns; 833 int num_real_insns = 0; 834 int num_hvx_insns = 0; 835 836 for (int i = 0; i < num_insns; i++) { 837 if (!pkt->insn[i].is_endloop && 838 !pkt->insn[i].part1 && 839 !GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) { 840 num_real_insns++; 841 } 842 if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) { 843 num_hvx_insns++; 844 } 845 } 846 847 ctx->num_packets++; 848 ctx->num_insns += num_real_insns; 849 ctx->num_hvx_insns += num_hvx_insns; 850 } 851 852 static void gen_commit_packet(DisasContext *ctx) 853 { 854 /* 855 * If there is more than one store in a packet, make sure they are all OK 856 * before proceeding with the rest of the packet commit. 857 * 858 * dczeroa has to be the only store operation in the packet, so we go 859 * ahead and process that first. 860 * 861 * When there is an HVX store, there can also be a scalar store in either 862 * slot 0 or slot1, so we create a mask for the helper to indicate what 863 * work to do. 864 * 865 * When there are two scalar stores, we probe the one in slot 0. 866 * 867 * Note that we don't call the probe helper for packets with only one 868 * store. Therefore, we call process_store_log before anything else 869 * involved in committing the packet. 870 */ 871 Packet *pkt = ctx->pkt; 872 bool has_store_s0 = pkt->pkt_has_store_s0; 873 bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed); 874 bool has_hvx_store = pkt_has_hvx_store(pkt); 875 if (pkt->pkt_has_dczeroa) { 876 /* 877 * The dczeroa will be the store in slot 0, check that we don't have 878 * a store in slot 1 or an HVX store. 879 */ 880 g_assert(!has_store_s1 && !has_hvx_store); 881 process_dczeroa(ctx); 882 } else if (has_hvx_store) { 883 if (!has_store_s0 && !has_store_s1) { 884 TCGv mem_idx = tcg_constant_tl(ctx->mem_idx); 885 gen_helper_probe_hvx_stores(tcg_env, mem_idx); 886 } else { 887 int mask = 0; 888 889 if (has_store_s0) { 890 mask = 891 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 1); 892 } 893 if (has_store_s1) { 894 mask = 895 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1); 896 } 897 if (has_hvx_store) { 898 mask = 899 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, 900 HAS_HVX_STORES, 1); 901 } 902 if (has_store_s0 && slot_is_predicated(pkt, 0)) { 903 mask = 904 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, 905 S0_IS_PRED, 1); 906 } 907 if (has_store_s1 && slot_is_predicated(pkt, 1)) { 908 mask = 909 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, 910 S1_IS_PRED, 1); 911 } 912 mask = FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, MMU_IDX, 913 ctx->mem_idx); 914 gen_helper_probe_pkt_scalar_hvx_stores(tcg_env, 915 tcg_constant_tl(mask)); 916 } 917 } else if (has_store_s0 && has_store_s1) { 918 /* 919 * process_store_log will execute the slot 1 store first, 920 * so we only have to probe the store in slot 0 921 */ 922 int args = 0; 923 args = 924 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, ctx->mem_idx); 925 if (slot_is_predicated(pkt, 0)) { 926 args = 927 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 1); 928 } 929 TCGv args_tcgv = tcg_constant_tl(args); 930 gen_helper_probe_pkt_scalar_store_s0(tcg_env, args_tcgv); 931 } 932 933 process_store_log(ctx); 934 935 gen_reg_writes(ctx); 936 gen_pred_writes(ctx); 937 if (pkt->pkt_has_hvx) { 938 gen_commit_hvx(ctx); 939 } 940 update_exec_counters(ctx); 941 if (HEX_DEBUG) { 942 TCGv has_st0 = 943 tcg_constant_tl(pkt->pkt_has_store_s0 && !pkt->pkt_has_dczeroa); 944 TCGv has_st1 = 945 tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa); 946 947 /* Handy place to set a breakpoint at the end of execution */ 948 gen_helper_debug_commit_end(tcg_env, tcg_constant_tl(ctx->pkt->pc), 949 ctx->pred_written, has_st0, has_st1); 950 } 951 952 if (pkt->vhist_insn != NULL) { 953 ctx->pre_commit = false; 954 ctx->insn = pkt->vhist_insn; 955 pkt->vhist_insn->generate(ctx); 956 } 957 958 if (pkt->pkt_has_cof) { 959 gen_end_tb(ctx); 960 } 961 } 962 963 static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx) 964 { 965 uint32_t words[PACKET_WORDS_MAX]; 966 int nwords; 967 Packet pkt; 968 int i; 969 970 nwords = read_packet_words(env, ctx, words); 971 if (!nwords) { 972 gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET); 973 return; 974 } 975 976 ctx->pkt = &pkt; 977 if (decode_packet(ctx, nwords, words, &pkt, false) > 0) { 978 pkt.pc = ctx->base.pc_next; 979 HEX_DEBUG_PRINT_PKT(&pkt); 980 gen_start_packet(ctx); 981 for (i = 0; i < pkt.num_insns; i++) { 982 ctx->insn = &pkt.insn[i]; 983 gen_insn(ctx); 984 } 985 gen_commit_packet(ctx); 986 ctx->base.pc_next += pkt.encod_pkt_size_in_bytes; 987 } else { 988 gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET); 989 } 990 } 991 992 static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, 993 CPUState *cs) 994 { 995 DisasContext *ctx = container_of(dcbase, DisasContext, base); 996 HexagonCPU *hex_cpu = env_archcpu(cpu_env(cs)); 997 uint32_t hex_flags = dcbase->tb->flags; 998 999 ctx->mem_idx = MMU_USER_IDX; 1000 ctx->num_packets = 0; 1001 ctx->num_insns = 0; 1002 ctx->num_hvx_insns = 0; 1003 ctx->branch_cond = TCG_COND_NEVER; 1004 ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP); 1005 ctx->short_circuit = hex_cpu->short_circuit; 1006 } 1007 1008 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) 1009 { 1010 } 1011 1012 static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 1013 { 1014 DisasContext *ctx = container_of(dcbase, DisasContext, base); 1015 1016 tcg_gen_insn_start(ctx->base.pc_next); 1017 } 1018 1019 static bool pkt_crosses_page(CPUHexagonState *env, DisasContext *ctx) 1020 { 1021 target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK; 1022 bool found_end = false; 1023 int nwords; 1024 1025 for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) { 1026 uint32_t word = cpu_ldl_code(env, 1027 ctx->base.pc_next + nwords * sizeof(uint32_t)); 1028 found_end = is_packet_end(word); 1029 } 1030 uint32_t next_ptr = ctx->base.pc_next + nwords * sizeof(uint32_t); 1031 return found_end && next_ptr - page_start >= TARGET_PAGE_SIZE; 1032 } 1033 1034 static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu) 1035 { 1036 DisasContext *ctx = container_of(dcbase, DisasContext, base); 1037 CPUHexagonState *env = cpu_env(cpu); 1038 1039 decode_and_translate_packet(env, ctx); 1040 1041 if (ctx->base.is_jmp == DISAS_NEXT) { 1042 target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK; 1043 target_ulong bytes_max = PACKET_WORDS_MAX * sizeof(target_ulong); 1044 1045 if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE || 1046 (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE - bytes_max && 1047 pkt_crosses_page(env, ctx))) { 1048 ctx->base.is_jmp = DISAS_TOO_MANY; 1049 } 1050 1051 /* 1052 * The CPU log is used to compare against LLDB single stepping, 1053 * so end the TLB after every packet. 1054 */ 1055 HexagonCPU *hex_cpu = env_archcpu(env); 1056 if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) { 1057 ctx->base.is_jmp = DISAS_TOO_MANY; 1058 } 1059 } 1060 } 1061 1062 static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 1063 { 1064 DisasContext *ctx = container_of(dcbase, DisasContext, base); 1065 1066 switch (ctx->base.is_jmp) { 1067 case DISAS_TOO_MANY: 1068 gen_exec_counters(ctx); 1069 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next); 1070 tcg_gen_exit_tb(NULL, 0); 1071 break; 1072 case DISAS_NORETURN: 1073 break; 1074 default: 1075 g_assert_not_reached(); 1076 } 1077 } 1078 1079 static const TranslatorOps hexagon_tr_ops = { 1080 .init_disas_context = hexagon_tr_init_disas_context, 1081 .tb_start = hexagon_tr_tb_start, 1082 .insn_start = hexagon_tr_insn_start, 1083 .translate_insn = hexagon_tr_translate_packet, 1084 .tb_stop = hexagon_tr_tb_stop, 1085 }; 1086 1087 void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, 1088 vaddr pc, void *host_pc) 1089 { 1090 DisasContext ctx; 1091 1092 translator_loop(cs, tb, max_insns, pc, host_pc, 1093 &hexagon_tr_ops, &ctx.base); 1094 } 1095 1096 #define NAME_LEN 64 1097 static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; 1098 static char store_addr_names[STORES_MAX][NAME_LEN]; 1099 static char store_width_names[STORES_MAX][NAME_LEN]; 1100 static char store_val32_names[STORES_MAX][NAME_LEN]; 1101 static char store_val64_names[STORES_MAX][NAME_LEN]; 1102 static char vstore_addr_names[VSTORES_MAX][NAME_LEN]; 1103 static char vstore_size_names[VSTORES_MAX][NAME_LEN]; 1104 static char vstore_pending_names[VSTORES_MAX][NAME_LEN]; 1105 1106 void hexagon_translate_init(void) 1107 { 1108 int i; 1109 1110 opcode_init(); 1111 1112 for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { 1113 hex_gpr[i] = tcg_global_mem_new(tcg_env, 1114 offsetof(CPUHexagonState, gpr[i]), 1115 hexagon_regnames[i]); 1116 1117 if (HEX_DEBUG) { 1118 snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s", 1119 hexagon_regnames[i]); 1120 hex_reg_written[i] = tcg_global_mem_new(tcg_env, 1121 offsetof(CPUHexagonState, reg_written[i]), 1122 reg_written_names[i]); 1123 } 1124 } 1125 hex_new_value_usr = tcg_global_mem_new(tcg_env, 1126 offsetof(CPUHexagonState, new_value_usr), "new_value_usr"); 1127 1128 for (i = 0; i < NUM_PREGS; i++) { 1129 hex_pred[i] = tcg_global_mem_new(tcg_env, 1130 offsetof(CPUHexagonState, pred[i]), 1131 hexagon_prednames[i]); 1132 } 1133 hex_slot_cancelled = tcg_global_mem_new(tcg_env, 1134 offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled"); 1135 hex_llsc_addr = tcg_global_mem_new(tcg_env, 1136 offsetof(CPUHexagonState, llsc_addr), "llsc_addr"); 1137 hex_llsc_val = tcg_global_mem_new(tcg_env, 1138 offsetof(CPUHexagonState, llsc_val), "llsc_val"); 1139 hex_llsc_val_i64 = tcg_global_mem_new_i64(tcg_env, 1140 offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64"); 1141 for (i = 0; i < STORES_MAX; i++) { 1142 snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i); 1143 hex_store_addr[i] = tcg_global_mem_new(tcg_env, 1144 offsetof(CPUHexagonState, mem_log_stores[i].va), 1145 store_addr_names[i]); 1146 1147 snprintf(store_width_names[i], NAME_LEN, "store_width_%d", i); 1148 hex_store_width[i] = tcg_global_mem_new(tcg_env, 1149 offsetof(CPUHexagonState, mem_log_stores[i].width), 1150 store_width_names[i]); 1151 1152 snprintf(store_val32_names[i], NAME_LEN, "store_val32_%d", i); 1153 hex_store_val32[i] = tcg_global_mem_new(tcg_env, 1154 offsetof(CPUHexagonState, mem_log_stores[i].data32), 1155 store_val32_names[i]); 1156 1157 snprintf(store_val64_names[i], NAME_LEN, "store_val64_%d", i); 1158 hex_store_val64[i] = tcg_global_mem_new_i64(tcg_env, 1159 offsetof(CPUHexagonState, mem_log_stores[i].data64), 1160 store_val64_names[i]); 1161 } 1162 for (i = 0; i < VSTORES_MAX; i++) { 1163 snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i); 1164 hex_vstore_addr[i] = tcg_global_mem_new(tcg_env, 1165 offsetof(CPUHexagonState, vstore[i].va), 1166 vstore_addr_names[i]); 1167 1168 snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i); 1169 hex_vstore_size[i] = tcg_global_mem_new(tcg_env, 1170 offsetof(CPUHexagonState, vstore[i].size), 1171 vstore_size_names[i]); 1172 1173 snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i); 1174 hex_vstore_pending[i] = tcg_global_mem_new(tcg_env, 1175 offsetof(CPUHexagonState, vstore_pending[i]), 1176 vstore_pending_names[i]); 1177 } 1178 } 1179