Lines Matching +full:cross +full:- +full:i686 +full:- +full:system
94 /* The Win64 ABI has xmm6-xmm15 as caller-saves, and we do not save
95 any of them. Therefore only allow xmm0-xmm5 to be allocated. */
189 value -= (uintptr_t)tcg_splitwx_to_rx(code_ptr);
198 value -= (uintptr_t)tcg_splitwx_to_rx(code_ptr);
235 * TESTQ -> TESTL (uint32_t)
236 * TESTQ -> BT (is_power_of_2)
505 /* Group 1 opcode extensions for 0x80-0x83.
516 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
539 #define JCC_JMP (-1)
581 /* We should never be asking for both 16 and 64-bit operation. */
642 the 32-bit compilation paths. This method works with all versions of gcc,
662 VEX.W, VEX.B, VEX.X, or an m-mmmm field other than P_EXT. */
673 /* VEX.m-mmmm */
708 /* The entire 4-byte evex prefix; with R' and V' set. */
781 We handle either RM and INDEX missing with a negative value. In 64-bit
792 /* Try for a rip-relative addressing mode. This has replaced
793 the 32-bit-mode absolute addressing encoding. */
794 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
795 intptr_t disp = offset - pc;
804 rip-relative addressing. */
843 field indicates no index register. In 64-bit mode, the REX.X
881 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
887 tcg_out_vex_modrm_sib_offset(s, opc, r, v, rm, -1, 0, offset);
894 /* Absolute for 32-bit, pc-relative for 64-bit. */
903 /* Absolute for 32-bit, pc-relative for 64-bit. */
1041 if (arg == -1) {
1052 new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
1062 new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
1064 new_pool_l2(s, R_386_32, s->code_ptr - 4, 0, arg, arg >> 32);
1076 if (arg == -1) {
1084 new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
1086 new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
1110 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
1111 diff = tcg_pcrel_diff(s, (const void *)arg) - 7;
1173 store-load ordering. Experimentally, "lock orl $0,0(%esp)" is
1210 /* There is no instruction that can validate 8-byte alignment. */
1217 * and stores use a 16-byte aligned offset. Validate that the
1225 * The gvec infrastructure only requires 16-byte alignment,
1255 /* There is no instruction that can validate 8-byte alignment. */
1262 * and stores use a 16-byte aligned offset. Validate that the
1277 * The gvec infrastructure only requires 16-byte alignment,
1375 /* 32-bit mov zero extends. */
1413 rexw = c & -8;
1426 if (val == 1 || val == -1) {
1430 * The single-byte increment encodings are re-tasked
1442 * Facilitate using an 8-bit immediate. Carry is inverted
1446 val = -128;
1458 /* AND with no high bits set can use a 32-bit operation. */
1509 if (l->has_value) {
1510 val = tcg_pcrel_diff(s, l->u.value_ptr);
1511 val1 = val - 2;
1513 if (opc == -1) {
1521 if (opc == -1) {
1523 tcg_out32(s, val - 5);
1526 tcg_out32(s, val - 6);
1530 if (opc == -1) {
1535 tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
1536 s->code_ptr += 1;
1538 if (opc == -1) {
1543 tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
1544 s->code_ptr += 4;
1707 /* If arg2 is -1, convert to LTU/GEU vs 1. */
1734 * Relying on the carry bit, use SBB to produce -1 if LTU, 0 if GEU.
1740 /* X - X - C = -C = (C ? -1 : 0) */
1743 /* ~(C ? -1 : 0) = (C ? 0 : -1) */
1746 /* (C ? -1 : 0) + 1 = (C ? 0 : 1) */
1749 /* -(C ? -1 : 0) = (C ? 1 : 0) */
1776 * The XOR breaks any false dependency for the low-byte write to dest,
1892 /* Since we have destroyed the flags from BSR, we have to re-test. */
1900 intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
1906 /* rip-relative addressing into the constant pool.
1909 be able to re-use the pool constant for more calls. */
1912 new_pool_label(s, (uintptr_t)dest, R_386_PC32, s->code_ptr, -4);
1923 if (TCG_TARGET_REG_BITS == 32 && info->out_kind == TCG_CALL_RET_BY_REF) {
1979 * Reject 16-byte memop with 16-byte atomicity, i.e. VMOVDQA,
1980 * but do allow a pair of 64-bit operations, i.e. MOVBEQ.
1987 * Because i686 has no register parameters and because x86_64 has xchg
1991 * Even then, a scratch is only needed for l->raddr. Rather than expose
1992 * a general-purpose scratch when we don't actually know it's available,
2001 tcg_out_movi(s, TCG_TYPE_PTR, arg, (uintptr_t)l->raddr);
2040 MemOp opc = get_memop(l->oi);
2041 tcg_insn_unit **label_ptr = &l->label_ptr[0];
2044 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
2046 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
2053 tcg_out_jmp(s, l->raddr);
2062 MemOp opc = get_memop(l->oi);
2063 tcg_insn_unit **label_ptr = &l->label_ptr[0];
2066 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
2068 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
2074 tcg_out_jmp(s, l->raddr);
2080 .index = -1
2131 h->index = TCG_REG_L0;
2132 h->ofs = 0;
2133 h->seg = 0;
2137 h->base = addr;
2138 h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
2139 a_mask = (1 << h->aa.align) - 1;
2148 unsigned s_mask = (1 << s_bits) - 1;
2153 ldst->is_ld = is_ld;
2154 ldst->oi = oi;
2155 ldst->addr_reg = addr;
2158 ttype = s->addr_type;
2162 if (s->page_bits + s->tlb_dyn_max_bits > 32) {
2171 s->page_bits - CPU_TLB_ENTRY_BITS);
2182 * check that we don't cross pages for the complete access.
2188 addr, s_mask - a_mask);
2190 tlb_mask = s->page_mask | a_mask;
2199 ldst->label_ptr[0] = s->code_ptr;
2200 s->code_ptr += 4;
2209 ldst->is_ld = is_ld;
2210 ldst->oi = oi;
2211 ldst->addr_reg = addr;
2216 ldst->label_ptr[0] = s->code_ptr;
2217 s->code_ptr += 4;
2230 /* Do big-endian loads with movbe. */
2248 /* There is no extending movbe; only low 16-bits are modified. */
2318 * Without 16-byte atomicity, use integer regs.
2344 * With 16-byte atomicity, a vector load is required.
2345 * If we already have 16-byte alignment, then VMOVDQA always works.
2395 ldst->type = data_type;
2396 ldst->datalo_reg = datalo;
2397 ldst->datahi_reg = datahi;
2398 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2409 * Do big-endian stores with movbe or system-mode.
2410 * User-only without movbe will have its swapping done generically.
2454 * Without 16-byte atomicity, use integer regs.
2471 * With 16-byte atomicity, a vector store is required.
2472 * If we already have 16-byte alignment, then VMOVDQA always works.
2522 ldst->type = data_type;
2523 ldst->datalo_reg = datalo;
2524 ldst->datahi_reg = datahi;
2525 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2546 int gap = QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4) - s->code_ptr;
2548 tcg_out_nopn(s, gap - 1);
2560 uintptr_t addr = tb->jmp_target_addr[n];
2561 qatomic_set((int32_t *)jmp_rw, addr - (jmp_rx + 4));
2597 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
2604 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
2646 /* For 3-operand addition, use LEA. */
2650 c3 = a2, a2 = -1;
2716 /* For small constant 3-operand shift, use LEA. */
2717 if (const_a2 && a0 != a1 && (a2 - 1) < 3) {
2718 if (a2 - 1 == 0) {
2719 /* shl $1,a1,a0 -> lea (a1,a1),a0 */
2722 /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */
2723 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0);
2787 /* Output must be sign-extended. */
2796 /* Output must be zero-extended, but input isn't. */
2818 tcg_out_qemu_ld(s, a0, -1, a1, a2, TCG_TYPE_I32);
2822 tcg_out_qemu_ld(s, a0, -1, a1, a2, TCG_TYPE_I64);
2834 tcg_out_qemu_st(s, a0, -1, a1, a2, TCG_TYPE_I32);
2838 tcg_out_qemu_st(s, a0, -1, a1, a2, TCG_TYPE_I64);
2948 /* This is a 32-bit zero-extending right shift. */
2961 * On the off-chance that we can use the high-byte registers.
2963 * would have gotten from the normal tcg-op.c expansion.
3095 /* avx2 does not have 64-bit min/max; adjusted during expand. */
3187 tcg_out_dupi_vec(s, type, vece, TCG_TMP_VEC, -1);
3201 /* Swap to place constant in V4 to take advantage of zero-masking. */
3243 tcg_out8(s, (TCG_TMP_VEC - TCG_REG_XMM0) << 4);
3430 /* First merge the two 32-bit inputs to a single 64-bit element. */
3432 /* Then replicate the 64-bit elements across the rest of the vector. */
3850 return -1;
3853 return have_avx512vl && vece >= MO_32 ? 1 : -1;
3858 return vece == MO_8 ? -1 : 1;
3863 return -1;
3875 return type >= TCG_TYPE_V256 ? -1 : 0;
3892 return vece >= MO_16 ? -1 : 0;
3918 return have_avx512vbmi2 ? -1 : 0;
3921 return have_avx512vl ? 1 : have_avx2 ? -1 : 0;
3928 return -1;
3974 /* Unpack to 16-bit, shift, and repack. */
3994 * 32-bit shift and overwriting the high half of a 64-bit logical
3996 * does not, so we have to bound the smaller shift -- we get the
4006 * the sign-extend, shift and merge.
4011 tcg_gen_shli_vec(MO_64, t1, t1, 64 - imm);
4035 tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm);
4084 tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
4105 * This leaves the 8-bit result, x * y, with 8 bits of right padding.
4165 * Without AVX512, there are no 64-bit unsigned comparisons.
4174 TCGv_vec t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1));
4298 + TCG_TARGET_STACK_ALIGN - 1) \
4299 & ~(TCG_TARGET_STACK_ALIGN - 1))
4309 stack_addend = FRAME_SIZE - PUSH_SIZE;
4329 tcg_regset_set_reg(s->reserved_regs, x86_guest_base.index);
4336 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
4343 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
4349 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
4352 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
4356 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
4363 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
4408 s->reserved_regs = 0;
4409 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
4410 tcg_regset_set_reg(s->reserved_regs, TCG_TMP_VEC);
4413 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM6);
4414 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM7);
4415 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM8);
4416 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM9);
4417 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM10);
4418 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM11);
4419 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM12);
4420 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM13);
4421 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM14);
4422 tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM15);
4440 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
4441 .h.cie.id = -1,
4444 .h.cie.data_align = 0x78, /* sleb128 -8 */
4448 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
4456 0x90, 1, /* DW_CFA_offset, %rip, -8 */
4458 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
4459 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
4460 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
4461 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
4462 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
4463 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
4469 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
4470 .h.cie.id = -1,
4473 .h.cie.data_align = 0x7c, /* sleb128 -4 */
4477 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
4485 0x88, 1, /* DW_CFA_offset, %eip, -4 */
4487 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
4488 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
4489 0x86, 4, /* DW_CFA_offset, %esi, -16 */
4490 0x87, 5, /* DW_CFA_offset, %edi, -20 */