1 /* 2 * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #define QEMU_GENERATE 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/cpu_ldst.h" 24 #include "exec/log.h" 25 #include "internal.h" 26 #include "attribs.h" 27 #include "insn.h" 28 #include "decode.h" 29 #include "translate.h" 30 #include "printinsn.h" 31 32 #include "analyze_funcs_generated.c.inc" 33 34 typedef void (*AnalyzeInsn)(DisasContext *ctx); 35 static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = { 36 #define OPCODE(X) [X] = analyze_##X 37 #include "opcodes_def_generated.h.inc" 38 #undef OPCODE 39 }; 40 41 TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; 42 TCGv hex_pred[NUM_PREGS]; 43 TCGv hex_this_PC; 44 TCGv hex_slot_cancelled; 45 TCGv hex_branch_taken; 46 TCGv hex_new_value[TOTAL_PER_THREAD_REGS]; 47 TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; 48 TCGv hex_new_pred_value[NUM_PREGS]; 49 TCGv hex_pred_written; 50 TCGv hex_store_addr[STORES_MAX]; 51 TCGv hex_store_width[STORES_MAX]; 52 TCGv hex_store_val32[STORES_MAX]; 53 TCGv_i64 hex_store_val64[STORES_MAX]; 54 TCGv hex_pkt_has_store_s1; 55 TCGv hex_dczero_addr; 56 TCGv hex_llsc_addr; 57 TCGv hex_llsc_val; 58 TCGv_i64 hex_llsc_val_i64; 59 TCGv hex_vstore_addr[VSTORES_MAX]; 60 TCGv hex_vstore_size[VSTORES_MAX]; 61 TCGv hex_vstore_pending[VSTORES_MAX]; 62 63 static const char * const hexagon_prednames[] = { 64 "p0", "p1", "p2", "p3" 65 }; 66 67 intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum, 68 int num, bool alloc_ok) 69 { 70 intptr_t offset; 71 72 /* See if it is already allocated */ 73 for (int i = 0; i < ctx->future_vregs_idx; i++) { 74 if (ctx->future_vregs_num[i] == regnum) { 75 return offsetof(CPUHexagonState, future_VRegs[i]); 76 } 77 } 78 79 g_assert(alloc_ok); 80 offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]); 81 for (int i = 0; i < num; i++) { 82 ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++; 83 } 84 ctx->future_vregs_idx += num; 85 g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX); 86 return offset; 87 } 88 89 intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum, 90 int num, bool alloc_ok) 91 { 92 intptr_t offset; 93 94 /* See if it is already allocated */ 95 for (int i = 0; i < ctx->tmp_vregs_idx; i++) { 96 if (ctx->tmp_vregs_num[i] == regnum) { 97 return offsetof(CPUHexagonState, tmp_VRegs[i]); 98 } 99 } 100 101 g_assert(alloc_ok); 102 offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]); 103 for (int i = 0; i < num; i++) { 104 ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++; 105 } 106 ctx->tmp_vregs_idx += num; 107 g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX); 108 return offset; 109 } 110 111 static void gen_exception_raw(int excp) 112 { 113 gen_helper_raise_exception(cpu_env, tcg_constant_i32(excp)); 114 } 115 116 static void gen_exec_counters(DisasContext *ctx) 117 { 118 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT], 119 hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets); 120 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT], 121 hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns); 122 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT], 123 hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns); 124 } 125 126 static bool use_goto_tb(DisasContext *ctx, target_ulong dest) 127 { 128 return translator_use_goto_tb(&ctx->base, dest); 129 } 130 131 static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest, bool 132 move_to_pc) 133 { 134 if (use_goto_tb(ctx, dest)) { 135 tcg_gen_goto_tb(idx); 136 if (move_to_pc) { 137 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest); 138 } 139 tcg_gen_exit_tb(ctx->base.tb, idx); 140 } else { 141 if (move_to_pc) { 142 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest); 143 } 144 tcg_gen_lookup_and_goto_ptr(); 145 } 146 } 147 148 static void gen_end_tb(DisasContext *ctx) 149 { 150 Packet *pkt = ctx->pkt; 151 152 gen_exec_counters(ctx); 153 154 if (ctx->branch_cond != TCG_COND_NEVER) { 155 if (ctx->branch_cond != TCG_COND_ALWAYS) { 156 TCGLabel *skip = gen_new_label(); 157 tcg_gen_brcondi_tl(ctx->branch_cond, hex_branch_taken, 0, skip); 158 gen_goto_tb(ctx, 0, ctx->branch_dest, true); 159 gen_set_label(skip); 160 gen_goto_tb(ctx, 1, ctx->next_PC, false); 161 } else { 162 gen_goto_tb(ctx, 0, ctx->branch_dest, true); 163 } 164 } else if (ctx->is_tight_loop && 165 pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) { 166 /* 167 * When we're in a tight loop, we defer the endloop0 processing 168 * to take advantage of direct block chaining 169 */ 170 TCGLabel *skip = gen_new_label(); 171 tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip); 172 tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1); 173 gen_goto_tb(ctx, 0, ctx->base.tb->pc, true); 174 gen_set_label(skip); 175 gen_goto_tb(ctx, 1, ctx->next_PC, false); 176 } else { 177 tcg_gen_lookup_and_goto_ptr(); 178 } 179 180 ctx->base.is_jmp = DISAS_NORETURN; 181 } 182 183 static void gen_exception_end_tb(DisasContext *ctx, int excp) 184 { 185 gen_exec_counters(ctx); 186 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC); 187 gen_exception_raw(excp); 188 ctx->base.is_jmp = DISAS_NORETURN; 189 190 } 191 192 #define PACKET_BUFFER_LEN 1028 193 static void print_pkt(Packet *pkt) 194 { 195 GString *buf = g_string_sized_new(PACKET_BUFFER_LEN); 196 snprint_a_pkt_debug(buf, pkt); 197 HEX_DEBUG_LOG("%s", buf->str); 198 g_string_free(buf, true); 199 } 200 #define HEX_DEBUG_PRINT_PKT(pkt) \ 201 do { \ 202 if (HEX_DEBUG) { \ 203 print_pkt(pkt); \ 204 } \ 205 } while (0) 206 207 static int read_packet_words(CPUHexagonState *env, DisasContext *ctx, 208 uint32_t words[]) 209 { 210 bool found_end = false; 211 int nwords, max_words; 212 213 memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t)); 214 for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) { 215 words[nwords] = 216 translator_ldl(env, &ctx->base, 217 ctx->base.pc_next + nwords * sizeof(uint32_t)); 218 found_end = is_packet_end(words[nwords]); 219 } 220 if (!found_end) { 221 /* Read too many words without finding the end */ 222 return 0; 223 } 224 225 /* Check for page boundary crossing */ 226 max_words = -(ctx->base.pc_next | TARGET_PAGE_MASK) / sizeof(uint32_t); 227 if (nwords > max_words) { 228 /* We can only cross a page boundary at the beginning of a TB */ 229 g_assert(ctx->base.num_insns == 1); 230 } 231 232 HEX_DEBUG_LOG("decode_packet: pc = 0x%x\n", ctx->base.pc_next); 233 HEX_DEBUG_LOG(" words = { "); 234 for (int i = 0; i < nwords; i++) { 235 HEX_DEBUG_LOG("0x%x, ", words[i]); 236 } 237 HEX_DEBUG_LOG("}\n"); 238 239 return nwords; 240 } 241 242 static bool check_for_attrib(Packet *pkt, int attrib) 243 { 244 for (int i = 0; i < pkt->num_insns; i++) { 245 if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) { 246 return true; 247 } 248 } 249 return false; 250 } 251 252 static bool need_slot_cancelled(Packet *pkt) 253 { 254 /* We only need slot_cancelled for conditional store instructions */ 255 for (int i = 0; i < pkt->num_insns; i++) { 256 uint16_t opcode = pkt->insn[i].opcode; 257 if (GET_ATTRIB(opcode, A_CONDEXEC) && 258 GET_ATTRIB(opcode, A_SCALAR_STORE)) { 259 return true; 260 } 261 } 262 return false; 263 } 264 265 static bool need_pred_written(Packet *pkt) 266 { 267 return check_for_attrib(pkt, A_WRITES_PRED_REG); 268 } 269 270 static bool need_next_PC(DisasContext *ctx) 271 { 272 Packet *pkt = ctx->pkt; 273 274 /* Check for conditional control flow or HW loop end */ 275 for (int i = 0; i < pkt->num_insns; i++) { 276 uint16_t opcode = pkt->insn[i].opcode; 277 if (GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) { 278 return true; 279 } 280 if (GET_ATTRIB(opcode, A_HWLOOP0_END) || 281 GET_ATTRIB(opcode, A_HWLOOP1_END)) { 282 return true; 283 } 284 } 285 return false; 286 } 287 288 /* 289 * The opcode_analyze functions mark most of the writes in a packet 290 * However, there are some implicit writes marked as attributes 291 * of the applicable instructions. 292 */ 293 static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum) 294 { 295 uint16_t opcode = ctx->insn->opcode; 296 if (GET_ATTRIB(opcode, attrib)) { 297 /* 298 * USR is used to set overflow and FP exceptions, 299 * so treat it as conditional 300 */ 301 bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) || 302 rnum == HEX_REG_USR; 303 304 /* LC0/LC1 is conditionally written by endloop instructions */ 305 if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) && 306 (opcode == J2_endloop0 || 307 opcode == J2_endloop1 || 308 opcode == J2_endloop01)) { 309 is_predicated = true; 310 } 311 312 ctx_log_reg_write(ctx, rnum, is_predicated); 313 } 314 } 315 316 static void mark_implicit_reg_writes(DisasContext *ctx) 317 { 318 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP, HEX_REG_FP); 319 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP, HEX_REG_SP); 320 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR, HEX_REG_LR); 321 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0); 322 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0); 323 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1); 324 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1); 325 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR); 326 mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR); 327 } 328 329 static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum) 330 { 331 if (GET_ATTRIB(ctx->insn->opcode, attrib)) { 332 ctx_log_pred_write(ctx, pnum); 333 } 334 } 335 336 static void mark_implicit_pred_writes(DisasContext *ctx) 337 { 338 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0); 339 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1); 340 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2); 341 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3); 342 } 343 344 static void analyze_packet(DisasContext *ctx) 345 { 346 Packet *pkt = ctx->pkt; 347 ctx->need_pkt_has_store_s1 = false; 348 for (int i = 0; i < pkt->num_insns; i++) { 349 Insn *insn = &pkt->insn[i]; 350 ctx->insn = insn; 351 if (opcode_analyze[insn->opcode]) { 352 opcode_analyze[insn->opcode](ctx); 353 } 354 mark_implicit_reg_writes(ctx); 355 mark_implicit_pred_writes(ctx); 356 } 357 } 358 359 static void gen_start_packet(DisasContext *ctx) 360 { 361 Packet *pkt = ctx->pkt; 362 target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes; 363 int i; 364 365 /* Clear out the disassembly context */ 366 ctx->next_PC = next_PC; 367 ctx->reg_log_idx = 0; 368 bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS); 369 bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); 370 ctx->preg_log_idx = 0; 371 bitmap_zero(ctx->pregs_written, NUM_PREGS); 372 ctx->future_vregs_idx = 0; 373 ctx->tmp_vregs_idx = 0; 374 ctx->vreg_log_idx = 0; 375 bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS); 376 bitmap_zero(ctx->vregs_updated, NUM_VREGS); 377 bitmap_zero(ctx->vregs_select, NUM_VREGS); 378 bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS); 379 bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS); 380 ctx->qreg_log_idx = 0; 381 for (i = 0; i < STORES_MAX; i++) { 382 ctx->store_width[i] = 0; 383 } 384 ctx->s1_store_processed = false; 385 ctx->pre_commit = true; 386 387 analyze_packet(ctx); 388 389 if (ctx->need_pkt_has_store_s1) { 390 tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1); 391 } 392 393 /* 394 * pregs_written is used both in the analyze phase as well as the code 395 * gen phase, so clear it again. 396 */ 397 bitmap_zero(ctx->pregs_written, NUM_PREGS); 398 399 if (HEX_DEBUG) { 400 /* Handy place to set a breakpoint before the packet executes */ 401 gen_helper_debug_start_packet(cpu_env); 402 tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next); 403 } 404 405 /* Initialize the runtime state for packet semantics */ 406 if (need_slot_cancelled(pkt)) { 407 tcg_gen_movi_tl(hex_slot_cancelled, 0); 408 } 409 if (pkt->pkt_has_cof) { 410 if (pkt->pkt_has_multi_cof) { 411 tcg_gen_movi_tl(hex_branch_taken, 0); 412 } 413 if (need_next_PC(ctx)) { 414 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC); 415 } 416 } 417 if (need_pred_written(pkt)) { 418 tcg_gen_movi_tl(hex_pred_written, 0); 419 } 420 421 /* Preload the predicated registers into hex_new_value[i] */ 422 if (!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { 423 int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); 424 while (i < TOTAL_PER_THREAD_REGS) { 425 tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]); 426 i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS, 427 i + 1); 428 } 429 } 430 431 /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */ 432 if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) { 433 int i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS); 434 while (i < NUM_VREGS) { 435 const intptr_t VdV_off = 436 ctx_future_vreg_off(ctx, i, 1, true); 437 intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]); 438 tcg_gen_gvec_mov(MO_64, VdV_off, 439 src_off, 440 sizeof(MMVector), 441 sizeof(MMVector)); 442 i = find_next_bit(ctx->predicated_future_vregs, NUM_VREGS, i + 1); 443 } 444 } 445 if (!bitmap_empty(ctx->predicated_tmp_vregs, NUM_VREGS)) { 446 int i = find_first_bit(ctx->predicated_tmp_vregs, NUM_VREGS); 447 while (i < NUM_VREGS) { 448 const intptr_t VdV_off = 449 ctx_tmp_vreg_off(ctx, i, 1, true); 450 intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]); 451 tcg_gen_gvec_mov(MO_64, VdV_off, 452 src_off, 453 sizeof(MMVector), 454 sizeof(MMVector)); 455 i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1); 456 } 457 } 458 } 459 460 bool is_gather_store_insn(DisasContext *ctx) 461 { 462 Packet *pkt = ctx->pkt; 463 Insn *insn = ctx->insn; 464 if (GET_ATTRIB(insn->opcode, A_CVI_NEW) && 465 insn->new_value_producer_slot == 1) { 466 /* Look for gather instruction */ 467 for (int i = 0; i < pkt->num_insns; i++) { 468 Insn *in = &pkt->insn[i]; 469 if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) { 470 return true; 471 } 472 } 473 } 474 return false; 475 } 476 477 static void mark_store_width(DisasContext *ctx) 478 { 479 uint16_t opcode = ctx->insn->opcode; 480 uint32_t slot = ctx->insn->slot; 481 uint8_t width = 0; 482 483 if (GET_ATTRIB(opcode, A_SCALAR_STORE)) { 484 if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) { 485 width |= 1; 486 } 487 if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) { 488 width |= 2; 489 } 490 if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) { 491 width |= 4; 492 } 493 if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) { 494 width |= 8; 495 } 496 tcg_debug_assert(is_power_of_2(width)); 497 ctx->store_width[slot] = width; 498 } 499 } 500 501 static void gen_insn(DisasContext *ctx) 502 { 503 if (ctx->insn->generate) { 504 ctx->insn->generate(ctx); 505 mark_store_width(ctx); 506 } else { 507 gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE); 508 } 509 } 510 511 /* 512 * Helpers for generating the packet commit 513 */ 514 static void gen_reg_writes(DisasContext *ctx) 515 { 516 int i; 517 518 for (i = 0; i < ctx->reg_log_idx; i++) { 519 int reg_num = ctx->reg_log[i]; 520 521 tcg_gen_mov_tl(hex_gpr[reg_num], hex_new_value[reg_num]); 522 523 /* 524 * ctx->is_tight_loop is set when SA0 points to the beginning of the TB. 525 * If we write to SA0, we have to turn off tight loop handling. 526 */ 527 if (reg_num == HEX_REG_SA0) { 528 ctx->is_tight_loop = false; 529 } 530 } 531 } 532 533 static void gen_pred_writes(DisasContext *ctx) 534 { 535 int i; 536 537 /* Early exit if the log is empty */ 538 if (!ctx->preg_log_idx) { 539 return; 540 } 541 542 /* 543 * Only endloop instructions will conditionally 544 * write a predicate. If there are no endloop 545 * instructions, we can use the non-conditional 546 * write of the predicates. 547 */ 548 if (ctx->pkt->pkt_has_endloop) { 549 TCGv zero = tcg_constant_tl(0); 550 TCGv pred_written = tcg_temp_new(); 551 for (i = 0; i < ctx->preg_log_idx; i++) { 552 int pred_num = ctx->preg_log[i]; 553 554 tcg_gen_andi_tl(pred_written, hex_pred_written, 1 << pred_num); 555 tcg_gen_movcond_tl(TCG_COND_NE, hex_pred[pred_num], 556 pred_written, zero, 557 hex_new_pred_value[pred_num], 558 hex_pred[pred_num]); 559 } 560 } else { 561 for (i = 0; i < ctx->preg_log_idx; i++) { 562 int pred_num = ctx->preg_log[i]; 563 tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]); 564 if (HEX_DEBUG) { 565 /* Do this so HELPER(debug_commit_end) will know */ 566 tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 567 1 << pred_num); 568 } 569 } 570 } 571 } 572 573 static void gen_check_store_width(DisasContext *ctx, int slot_num) 574 { 575 if (HEX_DEBUG) { 576 TCGv slot = tcg_constant_tl(slot_num); 577 TCGv check = tcg_constant_tl(ctx->store_width[slot_num]); 578 gen_helper_debug_check_store_width(cpu_env, slot, check); 579 } 580 } 581 582 static bool slot_is_predicated(Packet *pkt, int slot_num) 583 { 584 for (int i = 0; i < pkt->num_insns; i++) { 585 if (pkt->insn[i].slot == slot_num) { 586 return GET_ATTRIB(pkt->insn[i].opcode, A_CONDEXEC); 587 } 588 } 589 /* If we get to here, we didn't find an instruction in the requested slot */ 590 g_assert_not_reached(); 591 } 592 593 void process_store(DisasContext *ctx, int slot_num) 594 { 595 bool is_predicated = slot_is_predicated(ctx->pkt, slot_num); 596 TCGLabel *label_end = NULL; 597 598 /* 599 * We may have already processed this store 600 * See CHECK_NOSHUF in macros.h 601 */ 602 if (slot_num == 1 && ctx->s1_store_processed) { 603 return; 604 } 605 ctx->s1_store_processed = true; 606 607 if (is_predicated) { 608 TCGv cancelled = tcg_temp_new(); 609 label_end = gen_new_label(); 610 611 /* Don't do anything if the slot was cancelled */ 612 tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1); 613 tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end); 614 } 615 { 616 TCGv address = tcg_temp_new(); 617 tcg_gen_mov_tl(address, hex_store_addr[slot_num]); 618 619 /* 620 * If we know the width from the DisasContext, we can 621 * generate much cleaner code. 622 * Unfortunately, not all instructions execute the fSTORE 623 * macro during code generation. Anything that uses the 624 * generic helper will have this problem. Instructions 625 * that use fWRAP to generate proper TCG code will be OK. 626 */ 627 switch (ctx->store_width[slot_num]) { 628 case 1: 629 gen_check_store_width(ctx, slot_num); 630 tcg_gen_qemu_st_tl(hex_store_val32[slot_num], 631 hex_store_addr[slot_num], 632 ctx->mem_idx, MO_UB); 633 break; 634 case 2: 635 gen_check_store_width(ctx, slot_num); 636 tcg_gen_qemu_st_tl(hex_store_val32[slot_num], 637 hex_store_addr[slot_num], 638 ctx->mem_idx, MO_TEUW); 639 break; 640 case 4: 641 gen_check_store_width(ctx, slot_num); 642 tcg_gen_qemu_st_tl(hex_store_val32[slot_num], 643 hex_store_addr[slot_num], 644 ctx->mem_idx, MO_TEUL); 645 break; 646 case 8: 647 gen_check_store_width(ctx, slot_num); 648 tcg_gen_qemu_st_i64(hex_store_val64[slot_num], 649 hex_store_addr[slot_num], 650 ctx->mem_idx, MO_TEUQ); 651 break; 652 default: 653 { 654 /* 655 * If we get to here, we don't know the width at 656 * TCG generation time, we'll use a helper to 657 * avoid branching based on the width at runtime. 658 */ 659 TCGv slot = tcg_constant_tl(slot_num); 660 gen_helper_commit_store(cpu_env, slot); 661 } 662 } 663 } 664 if (is_predicated) { 665 gen_set_label(label_end); 666 } 667 } 668 669 static void process_store_log(DisasContext *ctx) 670 { 671 /* 672 * When a packet has two stores, the hardware processes 673 * slot 1 and then slot 0. This will be important when 674 * the memory accesses overlap. 675 */ 676 Packet *pkt = ctx->pkt; 677 if (pkt->pkt_has_store_s1) { 678 g_assert(!pkt->pkt_has_dczeroa); 679 process_store(ctx, 1); 680 } 681 if (pkt->pkt_has_store_s0) { 682 g_assert(!pkt->pkt_has_dczeroa); 683 process_store(ctx, 0); 684 } 685 } 686 687 /* Zero out a 32-bit cache line */ 688 static void process_dczeroa(DisasContext *ctx) 689 { 690 if (ctx->pkt->pkt_has_dczeroa) { 691 /* Store 32 bytes of zero starting at (addr & ~0x1f) */ 692 TCGv addr = tcg_temp_new(); 693 TCGv_i64 zero = tcg_constant_i64(0); 694 695 tcg_gen_andi_tl(addr, hex_dczero_addr, ~0x1f); 696 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); 697 tcg_gen_addi_tl(addr, addr, 8); 698 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); 699 tcg_gen_addi_tl(addr, addr, 8); 700 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); 701 tcg_gen_addi_tl(addr, addr, 8); 702 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); 703 } 704 } 705 706 static bool pkt_has_hvx_store(Packet *pkt) 707 { 708 int i; 709 for (i = 0; i < pkt->num_insns; i++) { 710 int opcode = pkt->insn[i].opcode; 711 if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) { 712 return true; 713 } 714 } 715 return false; 716 } 717 718 static void gen_commit_hvx(DisasContext *ctx) 719 { 720 int i; 721 722 /* 723 * for (i = 0; i < ctx->vreg_log_idx; i++) { 724 * int rnum = ctx->vreg_log[i]; 725 * env->VRegs[rnum] = env->future_VRegs[rnum]; 726 * } 727 */ 728 for (i = 0; i < ctx->vreg_log_idx; i++) { 729 int rnum = ctx->vreg_log[i]; 730 intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]); 731 intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false); 732 size_t size = sizeof(MMVector); 733 734 tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); 735 } 736 737 /* 738 * for (i = 0; i < ctx->qreg_log_idx; i++) { 739 * int rnum = ctx->qreg_log[i]; 740 * env->QRegs[rnum] = env->future_QRegs[rnum]; 741 * } 742 */ 743 for (i = 0; i < ctx->qreg_log_idx; i++) { 744 int rnum = ctx->qreg_log[i]; 745 intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]); 746 intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]); 747 size_t size = sizeof(MMQReg); 748 749 tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); 750 } 751 752 if (pkt_has_hvx_store(ctx->pkt)) { 753 gen_helper_commit_hvx_stores(cpu_env); 754 } 755 } 756 757 static void update_exec_counters(DisasContext *ctx) 758 { 759 Packet *pkt = ctx->pkt; 760 int num_insns = pkt->num_insns; 761 int num_real_insns = 0; 762 int num_hvx_insns = 0; 763 764 for (int i = 0; i < num_insns; i++) { 765 if (!pkt->insn[i].is_endloop && 766 !pkt->insn[i].part1 && 767 !GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) { 768 num_real_insns++; 769 } 770 if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) { 771 num_hvx_insns++; 772 } 773 } 774 775 ctx->num_packets++; 776 ctx->num_insns += num_real_insns; 777 ctx->num_hvx_insns += num_hvx_insns; 778 } 779 780 static void gen_commit_packet(DisasContext *ctx) 781 { 782 /* 783 * If there is more than one store in a packet, make sure they are all OK 784 * before proceeding with the rest of the packet commit. 785 * 786 * dczeroa has to be the only store operation in the packet, so we go 787 * ahead and process that first. 788 * 789 * When there is an HVX store, there can also be a scalar store in either 790 * slot 0 or slot1, so we create a mask for the helper to indicate what 791 * work to do. 792 * 793 * When there are two scalar stores, we probe the one in slot 0. 794 * 795 * Note that we don't call the probe helper for packets with only one 796 * store. Therefore, we call process_store_log before anything else 797 * involved in committing the packet. 798 */ 799 Packet *pkt = ctx->pkt; 800 bool has_store_s0 = pkt->pkt_has_store_s0; 801 bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed); 802 bool has_hvx_store = pkt_has_hvx_store(pkt); 803 if (pkt->pkt_has_dczeroa) { 804 /* 805 * The dczeroa will be the store in slot 0, check that we don't have 806 * a store in slot 1 or an HVX store. 807 */ 808 g_assert(!has_store_s1 && !has_hvx_store); 809 process_dczeroa(ctx); 810 } else if (has_hvx_store) { 811 if (!has_store_s0 && !has_store_s1) { 812 TCGv mem_idx = tcg_constant_tl(ctx->mem_idx); 813 gen_helper_probe_hvx_stores(cpu_env, mem_idx); 814 } else { 815 int mask = 0; 816 817 if (has_store_s0) { 818 mask = 819 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 1); 820 } 821 if (has_store_s1) { 822 mask = 823 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1); 824 } 825 if (has_hvx_store) { 826 mask = 827 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, 828 HAS_HVX_STORES, 1); 829 } 830 if (has_store_s0 && slot_is_predicated(pkt, 0)) { 831 mask = 832 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, 833 S0_IS_PRED, 1); 834 } 835 if (has_store_s1 && slot_is_predicated(pkt, 1)) { 836 mask = 837 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, 838 S1_IS_PRED, 1); 839 } 840 mask = FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, MMU_IDX, 841 ctx->mem_idx); 842 gen_helper_probe_pkt_scalar_hvx_stores(cpu_env, 843 tcg_constant_tl(mask)); 844 } 845 } else if (has_store_s0 && has_store_s1) { 846 /* 847 * process_store_log will execute the slot 1 store first, 848 * so we only have to probe the store in slot 0 849 */ 850 int args = 0; 851 args = 852 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, ctx->mem_idx); 853 if (slot_is_predicated(pkt, 0)) { 854 args = 855 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 1); 856 } 857 TCGv args_tcgv = tcg_constant_tl(args); 858 gen_helper_probe_pkt_scalar_store_s0(cpu_env, args_tcgv); 859 } 860 861 process_store_log(ctx); 862 863 gen_reg_writes(ctx); 864 gen_pred_writes(ctx); 865 if (pkt->pkt_has_hvx) { 866 gen_commit_hvx(ctx); 867 } 868 update_exec_counters(ctx); 869 if (HEX_DEBUG) { 870 TCGv has_st0 = 871 tcg_constant_tl(pkt->pkt_has_store_s0 && !pkt->pkt_has_dczeroa); 872 TCGv has_st1 = 873 tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa); 874 875 /* Handy place to set a breakpoint at the end of execution */ 876 gen_helper_debug_commit_end(cpu_env, has_st0, has_st1); 877 } 878 879 if (pkt->vhist_insn != NULL) { 880 ctx->pre_commit = false; 881 ctx->insn = pkt->vhist_insn; 882 pkt->vhist_insn->generate(ctx); 883 } 884 885 if (pkt->pkt_has_cof) { 886 gen_end_tb(ctx); 887 } 888 } 889 890 static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx) 891 { 892 uint32_t words[PACKET_WORDS_MAX]; 893 int nwords; 894 Packet pkt; 895 int i; 896 897 nwords = read_packet_words(env, ctx, words); 898 if (!nwords) { 899 gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET); 900 return; 901 } 902 903 if (decode_packet(nwords, words, &pkt, false) > 0) { 904 pkt.pc = ctx->base.pc_next; 905 HEX_DEBUG_PRINT_PKT(&pkt); 906 ctx->pkt = &pkt; 907 gen_start_packet(ctx); 908 for (i = 0; i < pkt.num_insns; i++) { 909 ctx->insn = &pkt.insn[i]; 910 gen_insn(ctx); 911 } 912 gen_commit_packet(ctx); 913 ctx->base.pc_next += pkt.encod_pkt_size_in_bytes; 914 } else { 915 gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET); 916 } 917 } 918 919 static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, 920 CPUState *cs) 921 { 922 DisasContext *ctx = container_of(dcbase, DisasContext, base); 923 uint32_t hex_flags = dcbase->tb->flags; 924 925 ctx->mem_idx = MMU_USER_IDX; 926 ctx->num_packets = 0; 927 ctx->num_insns = 0; 928 ctx->num_hvx_insns = 0; 929 ctx->branch_cond = TCG_COND_NEVER; 930 ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP); 931 } 932 933 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) 934 { 935 } 936 937 static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 938 { 939 DisasContext *ctx = container_of(dcbase, DisasContext, base); 940 941 tcg_gen_insn_start(ctx->base.pc_next); 942 } 943 944 static bool pkt_crosses_page(CPUHexagonState *env, DisasContext *ctx) 945 { 946 target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK; 947 bool found_end = false; 948 int nwords; 949 950 for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) { 951 uint32_t word = cpu_ldl_code(env, 952 ctx->base.pc_next + nwords * sizeof(uint32_t)); 953 found_end = is_packet_end(word); 954 } 955 uint32_t next_ptr = ctx->base.pc_next + nwords * sizeof(uint32_t); 956 return found_end && next_ptr - page_start >= TARGET_PAGE_SIZE; 957 } 958 959 static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu) 960 { 961 DisasContext *ctx = container_of(dcbase, DisasContext, base); 962 CPUHexagonState *env = cpu->env_ptr; 963 964 decode_and_translate_packet(env, ctx); 965 966 if (ctx->base.is_jmp == DISAS_NEXT) { 967 target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK; 968 target_ulong bytes_max = PACKET_WORDS_MAX * sizeof(target_ulong); 969 970 if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE || 971 (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE - bytes_max && 972 pkt_crosses_page(env, ctx))) { 973 ctx->base.is_jmp = DISAS_TOO_MANY; 974 } 975 976 /* 977 * The CPU log is used to compare against LLDB single stepping, 978 * so end the TLB after every packet. 979 */ 980 HexagonCPU *hex_cpu = env_archcpu(env); 981 if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) { 982 ctx->base.is_jmp = DISAS_TOO_MANY; 983 } 984 } 985 } 986 987 static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 988 { 989 DisasContext *ctx = container_of(dcbase, DisasContext, base); 990 991 switch (ctx->base.is_jmp) { 992 case DISAS_TOO_MANY: 993 gen_exec_counters(ctx); 994 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next); 995 tcg_gen_exit_tb(NULL, 0); 996 break; 997 case DISAS_NORETURN: 998 break; 999 default: 1000 g_assert_not_reached(); 1001 } 1002 } 1003 1004 static void hexagon_tr_disas_log(const DisasContextBase *dcbase, 1005 CPUState *cpu, FILE *logfile) 1006 { 1007 fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first)); 1008 target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size); 1009 } 1010 1011 1012 static const TranslatorOps hexagon_tr_ops = { 1013 .init_disas_context = hexagon_tr_init_disas_context, 1014 .tb_start = hexagon_tr_tb_start, 1015 .insn_start = hexagon_tr_insn_start, 1016 .translate_insn = hexagon_tr_translate_packet, 1017 .tb_stop = hexagon_tr_tb_stop, 1018 .disas_log = hexagon_tr_disas_log, 1019 }; 1020 1021 void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, 1022 target_ulong pc, void *host_pc) 1023 { 1024 DisasContext ctx; 1025 1026 translator_loop(cs, tb, max_insns, pc, host_pc, 1027 &hexagon_tr_ops, &ctx.base); 1028 } 1029 1030 #define NAME_LEN 64 1031 static char new_value_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; 1032 static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; 1033 static char new_pred_value_names[NUM_PREGS][NAME_LEN]; 1034 static char store_addr_names[STORES_MAX][NAME_LEN]; 1035 static char store_width_names[STORES_MAX][NAME_LEN]; 1036 static char store_val32_names[STORES_MAX][NAME_LEN]; 1037 static char store_val64_names[STORES_MAX][NAME_LEN]; 1038 static char vstore_addr_names[VSTORES_MAX][NAME_LEN]; 1039 static char vstore_size_names[VSTORES_MAX][NAME_LEN]; 1040 static char vstore_pending_names[VSTORES_MAX][NAME_LEN]; 1041 1042 void hexagon_translate_init(void) 1043 { 1044 int i; 1045 1046 opcode_init(); 1047 1048 for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { 1049 hex_gpr[i] = tcg_global_mem_new(cpu_env, 1050 offsetof(CPUHexagonState, gpr[i]), 1051 hexagon_regnames[i]); 1052 1053 snprintf(new_value_names[i], NAME_LEN, "new_%s", hexagon_regnames[i]); 1054 hex_new_value[i] = tcg_global_mem_new(cpu_env, 1055 offsetof(CPUHexagonState, new_value[i]), 1056 new_value_names[i]); 1057 1058 if (HEX_DEBUG) { 1059 snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s", 1060 hexagon_regnames[i]); 1061 hex_reg_written[i] = tcg_global_mem_new(cpu_env, 1062 offsetof(CPUHexagonState, reg_written[i]), 1063 reg_written_names[i]); 1064 } 1065 } 1066 for (i = 0; i < NUM_PREGS; i++) { 1067 hex_pred[i] = tcg_global_mem_new(cpu_env, 1068 offsetof(CPUHexagonState, pred[i]), 1069 hexagon_prednames[i]); 1070 1071 snprintf(new_pred_value_names[i], NAME_LEN, "new_pred_%s", 1072 hexagon_prednames[i]); 1073 hex_new_pred_value[i] = tcg_global_mem_new(cpu_env, 1074 offsetof(CPUHexagonState, new_pred_value[i]), 1075 new_pred_value_names[i]); 1076 } 1077 hex_pred_written = tcg_global_mem_new(cpu_env, 1078 offsetof(CPUHexagonState, pred_written), "pred_written"); 1079 hex_this_PC = tcg_global_mem_new(cpu_env, 1080 offsetof(CPUHexagonState, this_PC), "this_PC"); 1081 hex_slot_cancelled = tcg_global_mem_new(cpu_env, 1082 offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled"); 1083 hex_branch_taken = tcg_global_mem_new(cpu_env, 1084 offsetof(CPUHexagonState, branch_taken), "branch_taken"); 1085 hex_pkt_has_store_s1 = tcg_global_mem_new(cpu_env, 1086 offsetof(CPUHexagonState, pkt_has_store_s1), "pkt_has_store_s1"); 1087 hex_dczero_addr = tcg_global_mem_new(cpu_env, 1088 offsetof(CPUHexagonState, dczero_addr), "dczero_addr"); 1089 hex_llsc_addr = tcg_global_mem_new(cpu_env, 1090 offsetof(CPUHexagonState, llsc_addr), "llsc_addr"); 1091 hex_llsc_val = tcg_global_mem_new(cpu_env, 1092 offsetof(CPUHexagonState, llsc_val), "llsc_val"); 1093 hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env, 1094 offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64"); 1095 for (i = 0; i < STORES_MAX; i++) { 1096 snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i); 1097 hex_store_addr[i] = tcg_global_mem_new(cpu_env, 1098 offsetof(CPUHexagonState, mem_log_stores[i].va), 1099 store_addr_names[i]); 1100 1101 snprintf(store_width_names[i], NAME_LEN, "store_width_%d", i); 1102 hex_store_width[i] = tcg_global_mem_new(cpu_env, 1103 offsetof(CPUHexagonState, mem_log_stores[i].width), 1104 store_width_names[i]); 1105 1106 snprintf(store_val32_names[i], NAME_LEN, "store_val32_%d", i); 1107 hex_store_val32[i] = tcg_global_mem_new(cpu_env, 1108 offsetof(CPUHexagonState, mem_log_stores[i].data32), 1109 store_val32_names[i]); 1110 1111 snprintf(store_val64_names[i], NAME_LEN, "store_val64_%d", i); 1112 hex_store_val64[i] = tcg_global_mem_new_i64(cpu_env, 1113 offsetof(CPUHexagonState, mem_log_stores[i].data64), 1114 store_val64_names[i]); 1115 } 1116 for (int i = 0; i < VSTORES_MAX; i++) { 1117 snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i); 1118 hex_vstore_addr[i] = tcg_global_mem_new(cpu_env, 1119 offsetof(CPUHexagonState, vstore[i].va), 1120 vstore_addr_names[i]); 1121 1122 snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i); 1123 hex_vstore_size[i] = tcg_global_mem_new(cpu_env, 1124 offsetof(CPUHexagonState, vstore[i].size), 1125 vstore_size_names[i]); 1126 1127 snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i); 1128 hex_vstore_pending[i] = tcg_global_mem_new(cpu_env, 1129 offsetof(CPUHexagonState, vstore_pending[i]), 1130 vstore_pending_names[i]); 1131 } 1132 } 1133