Lines Matching +full:cross +full:- +full:win64 +full:- +full:system
94 /* The Win64 ABI has xmm6-xmm15 as caller-saves, and we do not save
95 any of them. Therefore only allow xmm0-xmm5 to be allocated. */
189 value -= (uintptr_t)tcg_splitwx_to_rx(code_ptr);
198 value -= (uintptr_t)tcg_splitwx_to_rx(code_ptr);
235 * TESTQ -> TESTL (uint32_t)
236 * TESTQ -> BT (is_power_of_2)
506 /* Group 1 opcode extensions for 0x80-0x83.
517 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
540 #define JCC_JMP (-1)
582 /* We should never be asking for both 16 and 64-bit operation. */
643 the 32-bit compilation paths. This method works with all versions of gcc,
663 VEX.W, VEX.B, VEX.X, or an m-mmmm field other than P_EXT. */
674 /* VEX.m-mmmm */
709 /* The entire 4-byte evex prefix; with R' and V' set. */
782 We handle either RM and INDEX missing with a negative value. In 64-bit
793 /* Try for a rip-relative addressing mode. This has replaced
794 the 32-bit-mode absolute addressing encoding. */
795 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
796 intptr_t disp = offset - pc;
805 rip-relative addressing. */
844 field indicates no index register. In 64-bit mode, the REX.X
882 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
888 tcg_out_vex_modrm_sib_offset(s, opc, r, v, rm, -1, 0, offset);
895 /* Absolute for 32-bit, pc-relative for 64-bit. */
904 /* Absolute for 32-bit, pc-relative for 64-bit. */
1042 if (arg == -1) {
1053 new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
1063 new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
1065 new_pool_l2(s, R_386_32, s->code_ptr - 4, 0, arg, arg >> 32);
1077 if (arg == -1) {
1085 new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
1087 new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
1096 if (arg == 0 && !s->carry_live) {
1111 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
1112 diff = tcg_pcrel_diff(s, (const void *)arg) - 7;
1174 store-load ordering. Experimentally, "lock orl $0,0(%esp)" is
1211 /* There is no instruction that can validate 8-byte alignment. */
1218 * and stores use a 16-byte aligned offset. Validate that the
1226 * The gvec infrastructure only requires 16-byte alignment,
1256 /* There is no instruction that can validate 8-byte alignment. */
1263 * and stores use a 16-byte aligned offset. Validate that the
1278 * The gvec infrastructure only requires 16-byte alignment,
1376 /* 32-bit mov zero extends. */
1414 rexw = c & -8;
1427 if (val == 1 || val == -1) {
1431 * The single-byte increment encodings are re-tasked
1443 * Facilitate using an 8-bit immediate. Carry is inverted
1447 val = -128;
1459 /* AND with no high bits set can use a 32-bit operation. */
1510 if (l->has_value) {
1511 val = tcg_pcrel_diff(s, l->u.value_ptr);
1512 val1 = val - 2;
1514 if (opc == -1) {
1522 if (opc == -1) {
1524 tcg_out32(s, val - 5);
1527 tcg_out32(s, val - 6);
1531 if (opc == -1) {
1536 tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
1537 s->code_ptr += 1;
1539 if (opc == -1) {
1544 tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
1545 s->code_ptr += 4;
1744 /* If arg2 is -1, convert to LTU/GEU vs 1. */
1771 * Relying on the carry bit, use SBB to produce -1 if LTU, 0 if GEU.
1777 /* X - X - C = -C = (C ? -1 : 0) */
1780 /* ~(C ? -1 : 0) = (C ? 0 : -1) */
1783 /* (C ? -1 : 0) + 1 = (C ? 0 : 1) */
1786 /* -(C ? -1 : 0) = (C ? 1 : 0) */
1813 * The XOR breaks any false dependency for the low-byte write to dest,
1940 intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
1946 /* rip-relative addressing into the constant pool.
1949 be able to re-use the pool constant for more calls. */
1952 new_pool_label(s, (uintptr_t)dest, R_386_PC32, s->code_ptr, -4);
1963 if (TCG_TARGET_REG_BITS == 32 && info->out_kind == TCG_CALL_RET_BY_REF) {
2019 * Reject 16-byte memop with 16-byte atomicity, i.e. VMOVDQA,
2020 * but do allow a pair of 64-bit operations, i.e. MOVBEQ.
2031 * Even then, a scratch is only needed for l->raddr. Rather than expose
2032 * a general-purpose scratch when we don't actually know it's available,
2041 tcg_out_movi(s, TCG_TYPE_PTR, arg, (uintptr_t)l->raddr);
2080 MemOp opc = get_memop(l->oi);
2081 tcg_insn_unit **label_ptr = &l->label_ptr[0];
2084 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
2086 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
2093 tcg_out_jmp(s, l->raddr);
2102 MemOp opc = get_memop(l->oi);
2103 tcg_insn_unit **label_ptr = &l->label_ptr[0];
2106 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
2108 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
2114 tcg_out_jmp(s, l->raddr);
2120 .index = -1
2171 h->index = TCG_REG_L0;
2172 h->ofs = 0;
2173 h->seg = 0;
2177 h->base = addr;
2178 h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
2179 a_mask = (1 << h->aa.align) - 1;
2188 unsigned s_mask = (1 << s_bits) - 1;
2193 ldst->is_ld = is_ld;
2194 ldst->oi = oi;
2195 ldst->addr_reg = addr;
2198 ttype = s->addr_type;
2209 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
2220 * check that we don't cross pages for the complete access.
2226 addr, s_mask - a_mask);
2237 ldst->label_ptr[0] = s->code_ptr;
2238 s->code_ptr += 4;
2247 ldst->is_ld = is_ld;
2248 ldst->oi = oi;
2249 ldst->addr_reg = addr;
2254 ldst->label_ptr[0] = s->code_ptr;
2255 s->code_ptr += 4;
2268 /* Do big-endian loads with movbe. */
2286 /* There is no extending movbe; only low 16-bits are modified. */
2356 * Without 16-byte atomicity, use integer regs.
2382 * With 16-byte atomicity, a vector load is required.
2383 * If we already have 16-byte alignment, then VMOVDQA always works.
2430 tcg_out_qemu_ld_direct(s, data, -1, h, type, get_memop(oi));
2433 ldst->type = type;
2434 ldst->datalo_reg = data;
2435 ldst->datahi_reg = -1;
2436 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2455 ldst->type = type;
2456 ldst->datalo_reg = datalo;
2457 ldst->datahi_reg = datahi;
2458 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2474 * Do big-endian stores with movbe or system-mode.
2475 * User-only without movbe will have its swapping done generically.
2519 * Without 16-byte atomicity, use integer regs.
2536 * With 16-byte atomicity, a vector store is required.
2537 * If we already have 16-byte alignment, then VMOVDQA always works.
2584 tcg_out_qemu_st_direct(s, data, -1, h, get_memop(oi));
2587 ldst->type = type;
2588 ldst->datalo_reg = data;
2589 ldst->datahi_reg = -1;
2590 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2617 ldst->type = type;
2618 ldst->datalo_reg = datalo;
2619 ldst->datahi_reg = datahi;
2620 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2646 int gap = QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4) - s->code_ptr;
2648 tcg_out_nopn(s, gap - 1);
2666 uintptr_t addr = tb->jmp_target_addr[n];
2667 qatomic_set((int32_t *)jmp_rw, addr - (jmp_rx + 4));
2694 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, -1, 0, a2);
2773 /* Implement 0 + 0 + C with -(x - x - c). */
2847 /* Since we have destroyed the flags from BSR, we have to re-test. */
3159 /* For small constant 3-operand shift, use LEA. */
3162 /* shl $1,a1,a0 -> lea (a1,a1),a0 */
3165 /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */
3166 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0);
3286 /* Output must be sign-extended. */
3295 /* Output must be zero-extended, but input isn't. */
3409 /* This is a 32-bit zero-extending right shift. */
3415 * On the off-chance that we can use the high-byte registers.
3417 * would have gotten from the normal tcg-op.c expansion.
3659 /* avx2 does not have 64-bit min/max; adjusted during expand. */
3751 tcg_out_dupi_vec(s, type, vece, TCG_TMP_VEC, -1);
3765 /* Swap to place constant in V4 to take advantage of zero-masking. */
3807 tcg_out8(s, (TCG_TMP_VEC - TCG_REG_XMM0) << 4);
3994 /* First merge the two 32-bit inputs to a single 64-bit element. */
3996 /* Then replicate the 64-bit elements across the rest of the vector. */
4238 return -1;
4241 return have_avx512vl && vece >= MO_32 ? 1 : -1;
4246 return vece == MO_8 ? -1 : 1;
4251 return -1;
4263 return type >= TCG_TYPE_V256 ? -1 : 0;
4280 return vece >= MO_16 ? -1 : 0;
4306 return have_avx512vbmi2 ? -1 : 0;
4309 return have_avx512vl ? 1 : have_avx2 ? -1 : 0;
4316 return -1;
4362 /* Unpack to 16-bit, shift, and repack. */
4382 * 32-bit shift and overwriting the high half of a 64-bit logical
4384 * does not, so we have to bound the smaller shift -- we get the
4394 * the sign-extend, shift and merge.
4399 tcg_gen_shli_vec(MO_64, t1, t1, 64 - imm);
4423 tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm);
4472 tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
4493 * This leaves the 8-bit result, x * y, with 8 bits of right padding.
4553 * Without AVX512, there are no 64-bit unsigned comparisons.
4562 TCGv_vec t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1));
4686 + TCG_TARGET_STACK_ALIGN - 1) \
4687 & ~(TCG_TARGET_STACK_ALIGN - 1))
4697 stack_addend = FRAME_SIZE - PUSH_SIZE;
4717 tcg_regset_set_reg(s->reserved_regs, x86_guest_base.index);
4724 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
4731 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
4737 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
4740 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
4744 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
4751 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
4796 s->reserved_regs = 0;
4797 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
4798 tcg_regset_set_reg(s->reserved_regs, TCG_TMP_VEC);
4801 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM6);
4802 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM7);
4803 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM8);
4804 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM9);
4805 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM10);
4806 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM11);
4807 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM12);
4808 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM13);
4809 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM14);
4810 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM15);
4828 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
4829 .h.cie.id = -1,
4832 .h.cie.data_align = 0x78, /* sleb128 -8 */
4836 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
4844 0x90, 1, /* DW_CFA_offset, %rip, -8 */
4846 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
4847 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
4848 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
4849 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
4850 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
4851 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
4857 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
4858 .h.cie.id = -1,
4861 .h.cie.data_align = 0x7c, /* sleb128 -4 */
4865 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
4873 0x88, 1, /* DW_CFA_offset, %eip, -4 */
4875 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
4876 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
4877 0x86, 4, /* DW_CFA_offset, %esi, -16 */
4878 0x87, 5, /* DW_CFA_offset, %edi, -20 */