1 /* 2 * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #define QEMU_GENERATE 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/cpu_ldst.h" 24 #include "exec/log.h" 25 #include "internal.h" 26 #include "attribs.h" 27 #include "insn.h" 28 #include "decode.h" 29 #include "translate.h" 30 #include "printinsn.h" 31 32 #include "analyze_funcs_generated.c.inc" 33 34 typedef void (*AnalyzeInsn)(DisasContext *ctx); 35 static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = { 36 #define OPCODE(X) [X] = analyze_##X 37 #include "opcodes_def_generated.h.inc" 38 #undef OPCODE 39 }; 40 41 TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; 42 TCGv hex_pred[NUM_PREGS]; 43 TCGv hex_this_PC; 44 TCGv hex_slot_cancelled; 45 TCGv hex_branch_taken; 46 TCGv hex_new_value[TOTAL_PER_THREAD_REGS]; 47 TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; 48 TCGv hex_new_pred_value[NUM_PREGS]; 49 TCGv hex_pred_written; 50 TCGv hex_store_addr[STORES_MAX]; 51 TCGv hex_store_width[STORES_MAX]; 52 TCGv hex_store_val32[STORES_MAX]; 53 TCGv_i64 hex_store_val64[STORES_MAX]; 54 TCGv hex_pkt_has_store_s1; 55 TCGv hex_dczero_addr; 56 TCGv hex_llsc_addr; 57 TCGv hex_llsc_val; 58 TCGv_i64 hex_llsc_val_i64; 59 TCGv hex_vstore_addr[VSTORES_MAX]; 60 TCGv hex_vstore_size[VSTORES_MAX]; 61 TCGv hex_vstore_pending[VSTORES_MAX]; 62 63 static const char * const hexagon_prednames[] = { 64 "p0", "p1", "p2", "p3" 65 }; 66 67 intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum, 68 int num, bool alloc_ok) 69 { 70 intptr_t offset; 71 72 /* See if it is already allocated */ 73 for (int i = 0; i < ctx->future_vregs_idx; i++) { 74 if (ctx->future_vregs_num[i] == regnum) { 75 return offsetof(CPUHexagonState, future_VRegs[i]); 76 } 77 } 78 79 g_assert(alloc_ok); 80 offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]); 81 for (int i = 0; i < num; i++) { 82 ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++; 83 } 84 ctx->future_vregs_idx += num; 85 g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX); 86 return offset; 87 } 88 89 intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum, 90 int num, bool alloc_ok) 91 { 92 intptr_t offset; 93 94 /* See if it is already allocated */ 95 for (int i = 0; i < ctx->tmp_vregs_idx; i++) { 96 if (ctx->tmp_vregs_num[i] == regnum) { 97 return offsetof(CPUHexagonState, tmp_VRegs[i]); 98 } 99 } 100 101 g_assert(alloc_ok); 102 offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]); 103 for (int i = 0; i < num; i++) { 104 ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++; 105 } 106 ctx->tmp_vregs_idx += num; 107 g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX); 108 return offset; 109 } 110 111 static void gen_exception_raw(int excp) 112 { 113 gen_helper_raise_exception(cpu_env, tcg_constant_i32(excp)); 114 } 115 116 static void gen_exec_counters(DisasContext *ctx) 117 { 118 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT], 119 hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets); 120 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT], 121 hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns); 122 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT], 123 hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns); 124 } 125 126 static bool use_goto_tb(DisasContext *ctx, target_ulong dest) 127 { 128 return translator_use_goto_tb(&ctx->base, dest); 129 } 130 131 static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest) 132 { 133 if (use_goto_tb(ctx, dest)) { 134 tcg_gen_goto_tb(idx); 135 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest); 136 tcg_gen_exit_tb(ctx->base.tb, idx); 137 } else { 138 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest); 139 tcg_gen_lookup_and_goto_ptr(); 140 } 141 } 142 143 static void gen_end_tb(DisasContext *ctx) 144 { 145 Packet *pkt = ctx->pkt; 146 147 gen_exec_counters(ctx); 148 149 if (ctx->branch_cond != TCG_COND_NEVER) { 150 if (ctx->branch_cond != TCG_COND_ALWAYS) { 151 TCGLabel *skip = gen_new_label(); 152 tcg_gen_brcondi_tl(ctx->branch_cond, hex_branch_taken, 0, skip); 153 gen_goto_tb(ctx, 0, ctx->branch_dest); 154 gen_set_label(skip); 155 gen_goto_tb(ctx, 1, ctx->next_PC); 156 } else { 157 gen_goto_tb(ctx, 0, ctx->branch_dest); 158 } 159 } else if (ctx->is_tight_loop && 160 pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) { 161 /* 162 * When we're in a tight loop, we defer the endloop0 processing 163 * to take advantage of direct block chaining 164 */ 165 TCGLabel *skip = gen_new_label(); 166 tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip); 167 tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1); 168 gen_goto_tb(ctx, 0, ctx->base.tb->pc); 169 gen_set_label(skip); 170 gen_goto_tb(ctx, 1, ctx->next_PC); 171 } else { 172 tcg_gen_lookup_and_goto_ptr(); 173 } 174 175 ctx->base.is_jmp = DISAS_NORETURN; 176 } 177 178 static void gen_exception_end_tb(DisasContext *ctx, int excp) 179 { 180 gen_exec_counters(ctx); 181 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC); 182 gen_exception_raw(excp); 183 ctx->base.is_jmp = DISAS_NORETURN; 184 185 } 186 187 #define PACKET_BUFFER_LEN 1028 188 static void print_pkt(Packet *pkt) 189 { 190 GString *buf = g_string_sized_new(PACKET_BUFFER_LEN); 191 snprint_a_pkt_debug(buf, pkt); 192 HEX_DEBUG_LOG("%s", buf->str); 193 g_string_free(buf, true); 194 } 195 #define HEX_DEBUG_PRINT_PKT(pkt) \ 196 do { \ 197 if (HEX_DEBUG) { \ 198 print_pkt(pkt); \ 199 } \ 200 } while (0) 201 202 static int read_packet_words(CPUHexagonState *env, DisasContext *ctx, 203 uint32_t words[]) 204 { 205 bool found_end = false; 206 int nwords, max_words; 207 208 memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t)); 209 for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) { 210 words[nwords] = 211 translator_ldl(env, &ctx->base, 212 ctx->base.pc_next + nwords * sizeof(uint32_t)); 213 found_end = is_packet_end(words[nwords]); 214 } 215 if (!found_end) { 216 /* Read too many words without finding the end */ 217 return 0; 218 } 219 220 /* Check for page boundary crossing */ 221 max_words = -(ctx->base.pc_next | TARGET_PAGE_MASK) / sizeof(uint32_t); 222 if (nwords > max_words) { 223 /* We can only cross a page boundary at the beginning of a TB */ 224 g_assert(ctx->base.num_insns == 1); 225 } 226 227 HEX_DEBUG_LOG("decode_packet: pc = 0x%x\n", ctx->base.pc_next); 228 HEX_DEBUG_LOG(" words = { "); 229 for (int i = 0; i < nwords; i++) { 230 HEX_DEBUG_LOG("0x%x, ", words[i]); 231 } 232 HEX_DEBUG_LOG("}\n"); 233 234 return nwords; 235 } 236 237 static bool check_for_attrib(Packet *pkt, int attrib) 238 { 239 for (int i = 0; i < pkt->num_insns; i++) { 240 if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) { 241 return true; 242 } 243 } 244 return false; 245 } 246 247 static bool need_slot_cancelled(Packet *pkt) 248 { 249 /* We only need slot_cancelled for conditional store instructions */ 250 for (int i = 0; i < pkt->num_insns; i++) { 251 uint16_t opcode = pkt->insn[i].opcode; 252 if (GET_ATTRIB(opcode, A_CONDEXEC) && 253 GET_ATTRIB(opcode, A_SCALAR_STORE)) { 254 return true; 255 } 256 } 257 return false; 258 } 259 260 static bool need_pred_written(Packet *pkt) 261 { 262 return check_for_attrib(pkt, A_WRITES_PRED_REG); 263 } 264 265 static bool need_next_PC(DisasContext *ctx) 266 { 267 Packet *pkt = ctx->pkt; 268 269 /* Check for conditional control flow or HW loop end */ 270 for (int i = 0; i < pkt->num_insns; i++) { 271 uint16_t opcode = pkt->insn[i].opcode; 272 if (GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) { 273 return true; 274 } 275 if (GET_ATTRIB(opcode, A_HWLOOP0_END) || 276 GET_ATTRIB(opcode, A_HWLOOP1_END)) { 277 return true; 278 } 279 } 280 return false; 281 } 282 283 /* 284 * The opcode_analyze functions mark most of the writes in a packet 285 * However, there are some implicit writes marked as attributes 286 * of the applicable instructions. 287 */ 288 static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum) 289 { 290 uint16_t opcode = ctx->insn->opcode; 291 if (GET_ATTRIB(opcode, attrib)) { 292 /* 293 * USR is used to set overflow and FP exceptions, 294 * so treat it as conditional 295 */ 296 bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) || 297 rnum == HEX_REG_USR; 298 299 /* LC0/LC1 is conditionally written by endloop instructions */ 300 if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) && 301 (opcode == J2_endloop0 || 302 opcode == J2_endloop1 || 303 opcode == J2_endloop01)) { 304 is_predicated = true; 305 } 306 307 ctx_log_reg_write(ctx, rnum, is_predicated); 308 } 309 } 310 311 static void mark_implicit_reg_writes(DisasContext *ctx) 312 { 313 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP, HEX_REG_FP); 314 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP, HEX_REG_SP); 315 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR, HEX_REG_LR); 316 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0); 317 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0); 318 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1); 319 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1); 320 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR); 321 mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR); 322 } 323 324 static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum) 325 { 326 if (GET_ATTRIB(ctx->insn->opcode, attrib)) { 327 ctx_log_pred_write(ctx, pnum); 328 } 329 } 330 331 static void mark_implicit_pred_writes(DisasContext *ctx) 332 { 333 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0); 334 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1); 335 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2); 336 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3); 337 } 338 339 static void analyze_packet(DisasContext *ctx) 340 { 341 Packet *pkt = ctx->pkt; 342 ctx->need_pkt_has_store_s1 = false; 343 for (int i = 0; i < pkt->num_insns; i++) { 344 Insn *insn = &pkt->insn[i]; 345 ctx->insn = insn; 346 if (opcode_analyze[insn->opcode]) { 347 opcode_analyze[insn->opcode](ctx); 348 } 349 mark_implicit_reg_writes(ctx); 350 mark_implicit_pred_writes(ctx); 351 } 352 } 353 354 static void gen_start_packet(DisasContext *ctx) 355 { 356 Packet *pkt = ctx->pkt; 357 target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes; 358 int i; 359 360 /* Clear out the disassembly context */ 361 ctx->next_PC = next_PC; 362 ctx->reg_log_idx = 0; 363 bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS); 364 bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); 365 ctx->preg_log_idx = 0; 366 bitmap_zero(ctx->pregs_written, NUM_PREGS); 367 ctx->future_vregs_idx = 0; 368 ctx->tmp_vregs_idx = 0; 369 ctx->vreg_log_idx = 0; 370 bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS); 371 bitmap_zero(ctx->vregs_updated, NUM_VREGS); 372 bitmap_zero(ctx->vregs_select, NUM_VREGS); 373 bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS); 374 bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS); 375 ctx->qreg_log_idx = 0; 376 for (i = 0; i < STORES_MAX; i++) { 377 ctx->store_width[i] = 0; 378 } 379 ctx->s1_store_processed = false; 380 ctx->pre_commit = true; 381 382 analyze_packet(ctx); 383 384 if (ctx->need_pkt_has_store_s1) { 385 tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1); 386 } 387 388 /* 389 * pregs_written is used both in the analyze phase as well as the code 390 * gen phase, so clear it again. 391 */ 392 bitmap_zero(ctx->pregs_written, NUM_PREGS); 393 394 if (HEX_DEBUG) { 395 /* Handy place to set a breakpoint before the packet executes */ 396 gen_helper_debug_start_packet(cpu_env); 397 tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next); 398 } 399 400 /* Initialize the runtime state for packet semantics */ 401 if (need_slot_cancelled(pkt)) { 402 tcg_gen_movi_tl(hex_slot_cancelled, 0); 403 } 404 if (pkt->pkt_has_cof) { 405 if (pkt->pkt_has_multi_cof) { 406 tcg_gen_movi_tl(hex_branch_taken, 0); 407 } 408 if (need_next_PC(ctx)) { 409 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC); 410 } 411 } 412 if (need_pred_written(pkt)) { 413 tcg_gen_movi_tl(hex_pred_written, 0); 414 } 415 416 /* Preload the predicated registers into hex_new_value[i] */ 417 if (!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { 418 int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); 419 while (i < TOTAL_PER_THREAD_REGS) { 420 tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]); 421 i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS, 422 i + 1); 423 } 424 } 425 426 /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */ 427 if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) { 428 int i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS); 429 while (i < NUM_VREGS) { 430 const intptr_t VdV_off = 431 ctx_future_vreg_off(ctx, i, 1, true); 432 intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]); 433 tcg_gen_gvec_mov(MO_64, VdV_off, 434 src_off, 435 sizeof(MMVector), 436 sizeof(MMVector)); 437 i = find_next_bit(ctx->predicated_future_vregs, NUM_VREGS, i + 1); 438 } 439 } 440 if (!bitmap_empty(ctx->predicated_tmp_vregs, NUM_VREGS)) { 441 int i = find_first_bit(ctx->predicated_tmp_vregs, NUM_VREGS); 442 while (i < NUM_VREGS) { 443 const intptr_t VdV_off = 444 ctx_tmp_vreg_off(ctx, i, 1, true); 445 intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]); 446 tcg_gen_gvec_mov(MO_64, VdV_off, 447 src_off, 448 sizeof(MMVector), 449 sizeof(MMVector)); 450 i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1); 451 } 452 } 453 } 454 455 bool is_gather_store_insn(DisasContext *ctx) 456 { 457 Packet *pkt = ctx->pkt; 458 Insn *insn = ctx->insn; 459 if (GET_ATTRIB(insn->opcode, A_CVI_NEW) && 460 insn->new_value_producer_slot == 1) { 461 /* Look for gather instruction */ 462 for (int i = 0; i < pkt->num_insns; i++) { 463 Insn *in = &pkt->insn[i]; 464 if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) { 465 return true; 466 } 467 } 468 } 469 return false; 470 } 471 472 static void mark_store_width(DisasContext *ctx) 473 { 474 uint16_t opcode = ctx->insn->opcode; 475 uint32_t slot = ctx->insn->slot; 476 uint8_t width = 0; 477 478 if (GET_ATTRIB(opcode, A_SCALAR_STORE)) { 479 if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) { 480 width |= 1; 481 } 482 if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) { 483 width |= 2; 484 } 485 if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) { 486 width |= 4; 487 } 488 if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) { 489 width |= 8; 490 } 491 tcg_debug_assert(is_power_of_2(width)); 492 ctx->store_width[slot] = width; 493 } 494 } 495 496 static void gen_insn(DisasContext *ctx) 497 { 498 if (ctx->insn->generate) { 499 ctx->insn->generate(ctx); 500 mark_store_width(ctx); 501 } else { 502 gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE); 503 } 504 } 505 506 /* 507 * Helpers for generating the packet commit 508 */ 509 static void gen_reg_writes(DisasContext *ctx) 510 { 511 int i; 512 513 for (i = 0; i < ctx->reg_log_idx; i++) { 514 int reg_num = ctx->reg_log[i]; 515 516 tcg_gen_mov_tl(hex_gpr[reg_num], hex_new_value[reg_num]); 517 518 /* 519 * ctx->is_tight_loop is set when SA0 points to the beginning of the TB. 520 * If we write to SA0, we have to turn off tight loop handling. 521 */ 522 if (reg_num == HEX_REG_SA0) { 523 ctx->is_tight_loop = false; 524 } 525 } 526 } 527 528 static void gen_pred_writes(DisasContext *ctx) 529 { 530 int i; 531 532 /* Early exit if the log is empty */ 533 if (!ctx->preg_log_idx) { 534 return; 535 } 536 537 /* 538 * Only endloop instructions will conditionally 539 * write a predicate. If there are no endloop 540 * instructions, we can use the non-conditional 541 * write of the predicates. 542 */ 543 if (ctx->pkt->pkt_has_endloop) { 544 TCGv zero = tcg_constant_tl(0); 545 TCGv pred_written = tcg_temp_new(); 546 for (i = 0; i < ctx->preg_log_idx; i++) { 547 int pred_num = ctx->preg_log[i]; 548 549 tcg_gen_andi_tl(pred_written, hex_pred_written, 1 << pred_num); 550 tcg_gen_movcond_tl(TCG_COND_NE, hex_pred[pred_num], 551 pred_written, zero, 552 hex_new_pred_value[pred_num], 553 hex_pred[pred_num]); 554 } 555 } else { 556 for (i = 0; i < ctx->preg_log_idx; i++) { 557 int pred_num = ctx->preg_log[i]; 558 tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]); 559 if (HEX_DEBUG) { 560 /* Do this so HELPER(debug_commit_end) will know */ 561 tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 562 1 << pred_num); 563 } 564 } 565 } 566 } 567 568 static void gen_check_store_width(DisasContext *ctx, int slot_num) 569 { 570 if (HEX_DEBUG) { 571 TCGv slot = tcg_constant_tl(slot_num); 572 TCGv check = tcg_constant_tl(ctx->store_width[slot_num]); 573 gen_helper_debug_check_store_width(cpu_env, slot, check); 574 } 575 } 576 577 static bool slot_is_predicated(Packet *pkt, int slot_num) 578 { 579 for (int i = 0; i < pkt->num_insns; i++) { 580 if (pkt->insn[i].slot == slot_num) { 581 return GET_ATTRIB(pkt->insn[i].opcode, A_CONDEXEC); 582 } 583 } 584 /* If we get to here, we didn't find an instruction in the requested slot */ 585 g_assert_not_reached(); 586 } 587 588 void process_store(DisasContext *ctx, int slot_num) 589 { 590 bool is_predicated = slot_is_predicated(ctx->pkt, slot_num); 591 TCGLabel *label_end = NULL; 592 593 /* 594 * We may have already processed this store 595 * See CHECK_NOSHUF in macros.h 596 */ 597 if (slot_num == 1 && ctx->s1_store_processed) { 598 return; 599 } 600 ctx->s1_store_processed = true; 601 602 if (is_predicated) { 603 TCGv cancelled = tcg_temp_new(); 604 label_end = gen_new_label(); 605 606 /* Don't do anything if the slot was cancelled */ 607 tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1); 608 tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end); 609 } 610 { 611 TCGv address = tcg_temp_new(); 612 tcg_gen_mov_tl(address, hex_store_addr[slot_num]); 613 614 /* 615 * If we know the width from the DisasContext, we can 616 * generate much cleaner code. 617 * Unfortunately, not all instructions execute the fSTORE 618 * macro during code generation. Anything that uses the 619 * generic helper will have this problem. Instructions 620 * that use fWRAP to generate proper TCG code will be OK. 621 */ 622 switch (ctx->store_width[slot_num]) { 623 case 1: 624 gen_check_store_width(ctx, slot_num); 625 tcg_gen_qemu_st8(hex_store_val32[slot_num], 626 hex_store_addr[slot_num], 627 ctx->mem_idx); 628 break; 629 case 2: 630 gen_check_store_width(ctx, slot_num); 631 tcg_gen_qemu_st16(hex_store_val32[slot_num], 632 hex_store_addr[slot_num], 633 ctx->mem_idx); 634 break; 635 case 4: 636 gen_check_store_width(ctx, slot_num); 637 tcg_gen_qemu_st32(hex_store_val32[slot_num], 638 hex_store_addr[slot_num], 639 ctx->mem_idx); 640 break; 641 case 8: 642 gen_check_store_width(ctx, slot_num); 643 tcg_gen_qemu_st64(hex_store_val64[slot_num], 644 hex_store_addr[slot_num], 645 ctx->mem_idx); 646 break; 647 default: 648 { 649 /* 650 * If we get to here, we don't know the width at 651 * TCG generation time, we'll use a helper to 652 * avoid branching based on the width at runtime. 653 */ 654 TCGv slot = tcg_constant_tl(slot_num); 655 gen_helper_commit_store(cpu_env, slot); 656 } 657 } 658 } 659 if (is_predicated) { 660 gen_set_label(label_end); 661 } 662 } 663 664 static void process_store_log(DisasContext *ctx) 665 { 666 /* 667 * When a packet has two stores, the hardware processes 668 * slot 1 and then slot 0. This will be important when 669 * the memory accesses overlap. 670 */ 671 Packet *pkt = ctx->pkt; 672 if (pkt->pkt_has_store_s1) { 673 g_assert(!pkt->pkt_has_dczeroa); 674 process_store(ctx, 1); 675 } 676 if (pkt->pkt_has_store_s0) { 677 g_assert(!pkt->pkt_has_dczeroa); 678 process_store(ctx, 0); 679 } 680 } 681 682 /* Zero out a 32-bit cache line */ 683 static void process_dczeroa(DisasContext *ctx) 684 { 685 if (ctx->pkt->pkt_has_dczeroa) { 686 /* Store 32 bytes of zero starting at (addr & ~0x1f) */ 687 TCGv addr = tcg_temp_new(); 688 TCGv_i64 zero = tcg_constant_i64(0); 689 690 tcg_gen_andi_tl(addr, hex_dczero_addr, ~0x1f); 691 tcg_gen_qemu_st64(zero, addr, ctx->mem_idx); 692 tcg_gen_addi_tl(addr, addr, 8); 693 tcg_gen_qemu_st64(zero, addr, ctx->mem_idx); 694 tcg_gen_addi_tl(addr, addr, 8); 695 tcg_gen_qemu_st64(zero, addr, ctx->mem_idx); 696 tcg_gen_addi_tl(addr, addr, 8); 697 tcg_gen_qemu_st64(zero, addr, ctx->mem_idx); 698 } 699 } 700 701 static bool pkt_has_hvx_store(Packet *pkt) 702 { 703 int i; 704 for (i = 0; i < pkt->num_insns; i++) { 705 int opcode = pkt->insn[i].opcode; 706 if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) { 707 return true; 708 } 709 } 710 return false; 711 } 712 713 static void gen_commit_hvx(DisasContext *ctx) 714 { 715 int i; 716 717 /* 718 * for (i = 0; i < ctx->vreg_log_idx; i++) { 719 * int rnum = ctx->vreg_log[i]; 720 * env->VRegs[rnum] = env->future_VRegs[rnum]; 721 * } 722 */ 723 for (i = 0; i < ctx->vreg_log_idx; i++) { 724 int rnum = ctx->vreg_log[i]; 725 intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]); 726 intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false); 727 size_t size = sizeof(MMVector); 728 729 tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); 730 } 731 732 /* 733 * for (i = 0; i < ctx->qreg_log_idx; i++) { 734 * int rnum = ctx->qreg_log[i]; 735 * env->QRegs[rnum] = env->future_QRegs[rnum]; 736 * } 737 */ 738 for (i = 0; i < ctx->qreg_log_idx; i++) { 739 int rnum = ctx->qreg_log[i]; 740 intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]); 741 intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]); 742 size_t size = sizeof(MMQReg); 743 744 tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); 745 } 746 747 if (pkt_has_hvx_store(ctx->pkt)) { 748 gen_helper_commit_hvx_stores(cpu_env); 749 } 750 } 751 752 static void update_exec_counters(DisasContext *ctx) 753 { 754 Packet *pkt = ctx->pkt; 755 int num_insns = pkt->num_insns; 756 int num_real_insns = 0; 757 int num_hvx_insns = 0; 758 759 for (int i = 0; i < num_insns; i++) { 760 if (!pkt->insn[i].is_endloop && 761 !pkt->insn[i].part1 && 762 !GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) { 763 num_real_insns++; 764 } 765 if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) { 766 num_hvx_insns++; 767 } 768 } 769 770 ctx->num_packets++; 771 ctx->num_insns += num_real_insns; 772 ctx->num_hvx_insns += num_hvx_insns; 773 } 774 775 static void gen_commit_packet(DisasContext *ctx) 776 { 777 /* 778 * If there is more than one store in a packet, make sure they are all OK 779 * before proceeding with the rest of the packet commit. 780 * 781 * dczeroa has to be the only store operation in the packet, so we go 782 * ahead and process that first. 783 * 784 * When there is an HVX store, there can also be a scalar store in either 785 * slot 0 or slot1, so we create a mask for the helper to indicate what 786 * work to do. 787 * 788 * When there are two scalar stores, we probe the one in slot 0. 789 * 790 * Note that we don't call the probe helper for packets with only one 791 * store. Therefore, we call process_store_log before anything else 792 * involved in committing the packet. 793 */ 794 Packet *pkt = ctx->pkt; 795 bool has_store_s0 = pkt->pkt_has_store_s0; 796 bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed); 797 bool has_hvx_store = pkt_has_hvx_store(pkt); 798 if (pkt->pkt_has_dczeroa) { 799 /* 800 * The dczeroa will be the store in slot 0, check that we don't have 801 * a store in slot 1 or an HVX store. 802 */ 803 g_assert(!has_store_s1 && !has_hvx_store); 804 process_dczeroa(ctx); 805 } else if (has_hvx_store) { 806 TCGv mem_idx = tcg_constant_tl(ctx->mem_idx); 807 808 if (!has_store_s0 && !has_store_s1) { 809 gen_helper_probe_hvx_stores(cpu_env, mem_idx); 810 } else { 811 int mask = 0; 812 TCGv mask_tcgv; 813 814 if (has_store_s0) { 815 mask = 816 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 1); 817 } 818 if (has_store_s1) { 819 mask = 820 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1); 821 } 822 if (has_hvx_store) { 823 mask = 824 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, 825 HAS_HVX_STORES, 1); 826 } 827 if (has_store_s0 && slot_is_predicated(pkt, 0)) { 828 mask = 829 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, 830 S0_IS_PRED, 1); 831 } 832 if (has_store_s1 && slot_is_predicated(pkt, 1)) { 833 mask = 834 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, 835 S1_IS_PRED, 1); 836 } 837 mask_tcgv = tcg_constant_tl(mask); 838 gen_helper_probe_pkt_scalar_hvx_stores(cpu_env, mask_tcgv, mem_idx); 839 } 840 } else if (has_store_s0 && has_store_s1) { 841 /* 842 * process_store_log will execute the slot 1 store first, 843 * so we only have to probe the store in slot 0 844 */ 845 int args = 0; 846 args = 847 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, ctx->mem_idx); 848 if (slot_is_predicated(pkt, 0)) { 849 args = 850 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 1); 851 } 852 TCGv args_tcgv = tcg_constant_tl(args); 853 gen_helper_probe_pkt_scalar_store_s0(cpu_env, args_tcgv); 854 } 855 856 process_store_log(ctx); 857 858 gen_reg_writes(ctx); 859 gen_pred_writes(ctx); 860 if (pkt->pkt_has_hvx) { 861 gen_commit_hvx(ctx); 862 } 863 update_exec_counters(ctx); 864 if (HEX_DEBUG) { 865 TCGv has_st0 = 866 tcg_constant_tl(pkt->pkt_has_store_s0 && !pkt->pkt_has_dczeroa); 867 TCGv has_st1 = 868 tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa); 869 870 /* Handy place to set a breakpoint at the end of execution */ 871 gen_helper_debug_commit_end(cpu_env, has_st0, has_st1); 872 } 873 874 if (pkt->vhist_insn != NULL) { 875 ctx->pre_commit = false; 876 ctx->insn = pkt->vhist_insn; 877 pkt->vhist_insn->generate(ctx); 878 } 879 880 if (pkt->pkt_has_cof) { 881 gen_end_tb(ctx); 882 } 883 } 884 885 static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx) 886 { 887 uint32_t words[PACKET_WORDS_MAX]; 888 int nwords; 889 Packet pkt; 890 int i; 891 892 nwords = read_packet_words(env, ctx, words); 893 if (!nwords) { 894 gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET); 895 return; 896 } 897 898 if (decode_packet(nwords, words, &pkt, false) > 0) { 899 pkt.pc = ctx->base.pc_next; 900 HEX_DEBUG_PRINT_PKT(&pkt); 901 ctx->pkt = &pkt; 902 gen_start_packet(ctx); 903 for (i = 0; i < pkt.num_insns; i++) { 904 ctx->insn = &pkt.insn[i]; 905 gen_insn(ctx); 906 } 907 gen_commit_packet(ctx); 908 ctx->base.pc_next += pkt.encod_pkt_size_in_bytes; 909 } else { 910 gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET); 911 } 912 } 913 914 static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, 915 CPUState *cs) 916 { 917 DisasContext *ctx = container_of(dcbase, DisasContext, base); 918 uint32_t hex_flags = dcbase->tb->flags; 919 920 ctx->mem_idx = MMU_USER_IDX; 921 ctx->num_packets = 0; 922 ctx->num_insns = 0; 923 ctx->num_hvx_insns = 0; 924 ctx->branch_cond = TCG_COND_NEVER; 925 ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP); 926 } 927 928 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) 929 { 930 } 931 932 static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 933 { 934 DisasContext *ctx = container_of(dcbase, DisasContext, base); 935 936 tcg_gen_insn_start(ctx->base.pc_next); 937 } 938 939 static bool pkt_crosses_page(CPUHexagonState *env, DisasContext *ctx) 940 { 941 target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK; 942 bool found_end = false; 943 int nwords; 944 945 for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) { 946 uint32_t word = cpu_ldl_code(env, 947 ctx->base.pc_next + nwords * sizeof(uint32_t)); 948 found_end = is_packet_end(word); 949 } 950 uint32_t next_ptr = ctx->base.pc_next + nwords * sizeof(uint32_t); 951 return found_end && next_ptr - page_start >= TARGET_PAGE_SIZE; 952 } 953 954 static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu) 955 { 956 DisasContext *ctx = container_of(dcbase, DisasContext, base); 957 CPUHexagonState *env = cpu->env_ptr; 958 959 decode_and_translate_packet(env, ctx); 960 961 if (ctx->base.is_jmp == DISAS_NEXT) { 962 target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK; 963 target_ulong bytes_max = PACKET_WORDS_MAX * sizeof(target_ulong); 964 965 if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE || 966 (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE - bytes_max && 967 pkt_crosses_page(env, ctx))) { 968 ctx->base.is_jmp = DISAS_TOO_MANY; 969 } 970 971 /* 972 * The CPU log is used to compare against LLDB single stepping, 973 * so end the TLB after every packet. 974 */ 975 HexagonCPU *hex_cpu = env_archcpu(env); 976 if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) { 977 ctx->base.is_jmp = DISAS_TOO_MANY; 978 } 979 } 980 } 981 982 static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 983 { 984 DisasContext *ctx = container_of(dcbase, DisasContext, base); 985 986 switch (ctx->base.is_jmp) { 987 case DISAS_TOO_MANY: 988 gen_exec_counters(ctx); 989 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next); 990 tcg_gen_exit_tb(NULL, 0); 991 break; 992 case DISAS_NORETURN: 993 break; 994 default: 995 g_assert_not_reached(); 996 } 997 } 998 999 static void hexagon_tr_disas_log(const DisasContextBase *dcbase, 1000 CPUState *cpu, FILE *logfile) 1001 { 1002 fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first)); 1003 target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size); 1004 } 1005 1006 1007 static const TranslatorOps hexagon_tr_ops = { 1008 .init_disas_context = hexagon_tr_init_disas_context, 1009 .tb_start = hexagon_tr_tb_start, 1010 .insn_start = hexagon_tr_insn_start, 1011 .translate_insn = hexagon_tr_translate_packet, 1012 .tb_stop = hexagon_tr_tb_stop, 1013 .disas_log = hexagon_tr_disas_log, 1014 }; 1015 1016 void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, 1017 target_ulong pc, void *host_pc) 1018 { 1019 DisasContext ctx; 1020 1021 translator_loop(cs, tb, max_insns, pc, host_pc, 1022 &hexagon_tr_ops, &ctx.base); 1023 } 1024 1025 #define NAME_LEN 64 1026 static char new_value_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; 1027 static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; 1028 static char new_pred_value_names[NUM_PREGS][NAME_LEN]; 1029 static char store_addr_names[STORES_MAX][NAME_LEN]; 1030 static char store_width_names[STORES_MAX][NAME_LEN]; 1031 static char store_val32_names[STORES_MAX][NAME_LEN]; 1032 static char store_val64_names[STORES_MAX][NAME_LEN]; 1033 static char vstore_addr_names[VSTORES_MAX][NAME_LEN]; 1034 static char vstore_size_names[VSTORES_MAX][NAME_LEN]; 1035 static char vstore_pending_names[VSTORES_MAX][NAME_LEN]; 1036 1037 void hexagon_translate_init(void) 1038 { 1039 int i; 1040 1041 opcode_init(); 1042 1043 for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { 1044 hex_gpr[i] = tcg_global_mem_new(cpu_env, 1045 offsetof(CPUHexagonState, gpr[i]), 1046 hexagon_regnames[i]); 1047 1048 snprintf(new_value_names[i], NAME_LEN, "new_%s", hexagon_regnames[i]); 1049 hex_new_value[i] = tcg_global_mem_new(cpu_env, 1050 offsetof(CPUHexagonState, new_value[i]), 1051 new_value_names[i]); 1052 1053 if (HEX_DEBUG) { 1054 snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s", 1055 hexagon_regnames[i]); 1056 hex_reg_written[i] = tcg_global_mem_new(cpu_env, 1057 offsetof(CPUHexagonState, reg_written[i]), 1058 reg_written_names[i]); 1059 } 1060 } 1061 for (i = 0; i < NUM_PREGS; i++) { 1062 hex_pred[i] = tcg_global_mem_new(cpu_env, 1063 offsetof(CPUHexagonState, pred[i]), 1064 hexagon_prednames[i]); 1065 1066 snprintf(new_pred_value_names[i], NAME_LEN, "new_pred_%s", 1067 hexagon_prednames[i]); 1068 hex_new_pred_value[i] = tcg_global_mem_new(cpu_env, 1069 offsetof(CPUHexagonState, new_pred_value[i]), 1070 new_pred_value_names[i]); 1071 } 1072 hex_pred_written = tcg_global_mem_new(cpu_env, 1073 offsetof(CPUHexagonState, pred_written), "pred_written"); 1074 hex_this_PC = tcg_global_mem_new(cpu_env, 1075 offsetof(CPUHexagonState, this_PC), "this_PC"); 1076 hex_slot_cancelled = tcg_global_mem_new(cpu_env, 1077 offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled"); 1078 hex_branch_taken = tcg_global_mem_new(cpu_env, 1079 offsetof(CPUHexagonState, branch_taken), "branch_taken"); 1080 hex_pkt_has_store_s1 = tcg_global_mem_new(cpu_env, 1081 offsetof(CPUHexagonState, pkt_has_store_s1), "pkt_has_store_s1"); 1082 hex_dczero_addr = tcg_global_mem_new(cpu_env, 1083 offsetof(CPUHexagonState, dczero_addr), "dczero_addr"); 1084 hex_llsc_addr = tcg_global_mem_new(cpu_env, 1085 offsetof(CPUHexagonState, llsc_addr), "llsc_addr"); 1086 hex_llsc_val = tcg_global_mem_new(cpu_env, 1087 offsetof(CPUHexagonState, llsc_val), "llsc_val"); 1088 hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env, 1089 offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64"); 1090 for (i = 0; i < STORES_MAX; i++) { 1091 snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i); 1092 hex_store_addr[i] = tcg_global_mem_new(cpu_env, 1093 offsetof(CPUHexagonState, mem_log_stores[i].va), 1094 store_addr_names[i]); 1095 1096 snprintf(store_width_names[i], NAME_LEN, "store_width_%d", i); 1097 hex_store_width[i] = tcg_global_mem_new(cpu_env, 1098 offsetof(CPUHexagonState, mem_log_stores[i].width), 1099 store_width_names[i]); 1100 1101 snprintf(store_val32_names[i], NAME_LEN, "store_val32_%d", i); 1102 hex_store_val32[i] = tcg_global_mem_new(cpu_env, 1103 offsetof(CPUHexagonState, mem_log_stores[i].data32), 1104 store_val32_names[i]); 1105 1106 snprintf(store_val64_names[i], NAME_LEN, "store_val64_%d", i); 1107 hex_store_val64[i] = tcg_global_mem_new_i64(cpu_env, 1108 offsetof(CPUHexagonState, mem_log_stores[i].data64), 1109 store_val64_names[i]); 1110 } 1111 for (int i = 0; i < VSTORES_MAX; i++) { 1112 snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i); 1113 hex_vstore_addr[i] = tcg_global_mem_new(cpu_env, 1114 offsetof(CPUHexagonState, vstore[i].va), 1115 vstore_addr_names[i]); 1116 1117 snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i); 1118 hex_vstore_size[i] = tcg_global_mem_new(cpu_env, 1119 offsetof(CPUHexagonState, vstore[i].size), 1120 vstore_size_names[i]); 1121 1122 snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i); 1123 hex_vstore_pending[i] = tcg_global_mem_new(cpu_env, 1124 offsetof(CPUHexagonState, vstore_pending[i]), 1125 vstore_pending_names[i]); 1126 } 1127 } 1128