diff --git a/host/include/aarch64/host/atomic128-cas.h.inc b/host/include/aarch64/host/atomic128-cas.h.inc index aec27df182..52e98a0bdd 100644 --- a/host/include/aarch64/host/atomic128-cas.h.inc +++ b/host/include/aarch64/host/atomic128-cas.h.inc @@ -67,9 +67,9 @@ static inline Int128 atomic16_fetch_and(Int128 *ptr, Int128 new) "stlxp %w[tmp], %[tmpl], %[tmph], %[mem]\n\t" "cbnz %w[tmp], 0b" : [mem] "+m"(*ptr), [tmp] "=&r"(tmp), - [oldl] "=&r"(oldl), [oldh] "=&r"(oldh) - : [newl] "r"(newl), [newh] "r"(newh), - [tmpl] "r"(tmpl), [tmph] "r"(tmph) + [oldl] "=&r"(oldl), [oldh] "=&r"(oldh), + [tmpl] "=&r"(tmpl), [tmph] "=&r"(tmph) + : [newl] "r"(newl), [newh] "r"(newh) : "memory"); return int128_make128(oldl, oldh); @@ -87,9 +87,9 @@ static inline Int128 atomic16_fetch_or(Int128 *ptr, Int128 new) "stlxp %w[tmp], %[tmpl], %[tmph], %[mem]\n\t" "cbnz %w[tmp], 0b" : [mem] "+m"(*ptr), [tmp] "=&r"(tmp), - [oldl] "=&r"(oldl), [oldh] "=&r"(oldh) - : [newl] "r"(newl), [newh] "r"(newh), - [tmpl] "r"(tmpl), [tmph] "r"(tmph) + [oldl] "=&r"(oldl), [oldh] "=&r"(oldh), + [tmpl] "=&r"(tmpl), [tmph] "=&r"(tmph) + : [newl] "r"(newl), [newh] "r"(newh) : "memory"); return int128_make128(oldl, oldh); diff --git a/host/include/generic/host/atomic128-cas.h.inc b/host/include/generic/host/atomic128-cas.h.inc index 990162c56f..8bf5f47768 100644 --- a/host/include/generic/host/atomic128-cas.h.inc +++ b/host/include/generic/host/atomic128-cas.h.inc @@ -34,39 +34,45 @@ static inline Int128 ATTRIBUTE_ATOMIC128_OPT atomic16_xchg(Int128 *ptr, Int128 new) { __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16); - Int128 old = *ptr_align; + Int128Alias o, n; - while (!__atomic_compare_exchange_n(ptr_align, &old, new, true, + n.s = new; + o.i = *ptr_align; + while (!__atomic_compare_exchange_n(ptr_align, &o.i, n.i, true, __ATOMIC_SEQ_CST, 0)) { continue; } - return old; + return o.s; } static inline Int128 ATTRIBUTE_ATOMIC128_OPT atomic16_fetch_and(Int128 *ptr, Int128 val) { __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16); - Int128 old = *ptr_align; + Int128Alias o, v; - while (!__atomic_compare_exchange_n(ptr_align, &old, old & val, true, + v.s = val; + o.i = *ptr_align; + while (!__atomic_compare_exchange_n(ptr_align, &o.i, o.i & v.i, true, __ATOMIC_SEQ_CST, 0)) { continue; } - return old; + return o.s; } static inline Int128 ATTRIBUTE_ATOMIC128_OPT atomic16_fetch_or(Int128 *ptr, Int128 val) { __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16); - Int128 old = *ptr_align; + Int128Alias o, v; - while (!__atomic_compare_exchange_n(ptr_align, &old, old | val, true, + v.s = val; + o.i = *ptr_align; + while (!__atomic_compare_exchange_n(ptr_align, &o.i, o.i | v.i, true, __ATOMIC_SEQ_CST, 0)) { continue; } - return old; + return o.s; } # define HAVE_CMPXCHG128 1 #elif defined(CONFIG_CMPXCHG128) diff --git a/include/tcg/helper-info.h b/include/tcg/helper-info.h index 909fe73afa..49a27e4eae 100644 --- a/include/tcg/helper-info.h +++ b/include/tcg/helper-info.h @@ -10,7 +10,19 @@ #define TCG_HELPER_INFO_H #ifdef CONFIG_TCG_INTERPRETER +/* + * MacOSX 15 uses an old version of libffi which contains + * #if FFI_GO_CLOSURES + * but does not define that in , included from . + * This was fixed upstream with + * https://github.com/libffi/libffi/commit/c23e9a1c + * We don't care about go closures one way or the other; + * just suppress the warning. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wundef" #include +#pragma GCC diagnostic pop #endif #include "tcg-target-reg-bits.h" diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c index 67c15fd4d0..7716c3ad7c 100644 --- a/tcg/tcg-op-ldst.c +++ b/tcg/tcg-op-ldst.c @@ -135,6 +135,30 @@ static void tcg_gen_req_mo(TCGBar type) } } +static TCGTemp *tci_extend_addr(TCGTemp *addr) +{ +#ifdef CONFIG_TCG_INTERPRETER + /* + * 64-bit interpreter requires 64-bit addresses. + * Compare to the extension performed by tcg_out_{ld,st}_helper_args + * for native code generation. + */ + if (TCG_TARGET_REG_BITS == 64 && tcg_ctx->addr_type == TCG_TYPE_I32) { + TCGv_i64 temp = tcg_temp_ebb_new_i64(); + tcg_gen_extu_i32_i64(temp, temp_tcgv_i32(addr)); + return tcgv_i64_temp(temp); + } +#endif + return addr; +} + +static void maybe_free_addr(TCGTemp *addr, TCGTemp *copy) +{ + if (addr != copy) { + tcg_temp_free_internal(copy); + } +} + /* Only required for loads, where value might overlap addr. */ static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr) { @@ -158,23 +182,21 @@ static void plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi, enum qemu_plugin_mem_rw rw) { - if (tcg_ctx->plugin_insn != NULL) { - qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw); + qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw); - if (tcg_ctx->addr_type == TCG_TYPE_I32) { - if (!copy_addr) { - copy_addr = tcg_temp_ebb_new_i64(); - tcg_gen_extu_i32_i64(copy_addr, temp_tcgv_i32(orig_addr)); - } + if (tcg_ctx->addr_type == TCG_TYPE_I32) { + if (!copy_addr) { + copy_addr = tcg_temp_ebb_new_i64(); + tcg_gen_extu_i32_i64(copy_addr, temp_tcgv_i32(orig_addr)); + } + tcg_gen_plugin_mem_cb(copy_addr, info); + tcg_temp_free_i64(copy_addr); + } else { + if (copy_addr) { tcg_gen_plugin_mem_cb(copy_addr, info); tcg_temp_free_i64(copy_addr); } else { - if (copy_addr) { - tcg_gen_plugin_mem_cb(copy_addr, info); - tcg_temp_free_i64(copy_addr); - } else { - tcg_gen_plugin_mem_cb(temp_tcgv_i64(orig_addr), info); - } + tcg_gen_plugin_mem_cb(temp_tcgv_i64(orig_addr), info); } } } @@ -234,6 +256,7 @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr, MemOp orig_memop; MemOpIdx orig_oi, oi; TCGv_i64 copy_addr; + TCGTemp *addr_new; tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0); @@ -248,10 +271,12 @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr, oi = make_memop_idx(memop, idx); } + addr_new = tci_extend_addr(addr); copy_addr = plugin_maybe_preserve_addr(addr); - gen_ldst1(INDEX_op_qemu_ld, TCG_TYPE_I32, tcgv_i32_temp(val), addr, oi); + gen_ldst1(INDEX_op_qemu_ld, TCG_TYPE_I32, tcgv_i32_temp(val), addr_new, oi); plugin_gen_mem_callbacks_i32(val, copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R); + maybe_free_addr(addr, addr_new); if ((orig_memop ^ memop) & MO_BSWAP) { switch (orig_memop & MO_SIZE) { @@ -282,6 +307,7 @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr, { TCGv_i32 swap = NULL; MemOpIdx orig_oi, oi; + TCGTemp *addr_new; tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); memop = tcg_canonicalize_memop(memop, 0, 1); @@ -304,8 +330,10 @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr, oi = make_memop_idx(memop, idx); } - gen_ldst1(INDEX_op_qemu_st, TCG_TYPE_I32, tcgv_i32_temp(val), addr, oi); + addr_new = tci_extend_addr(addr); + gen_ldst1(INDEX_op_qemu_st, TCG_TYPE_I32, tcgv_i32_temp(val), addr_new, oi); plugin_gen_mem_callbacks_i32(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W); + maybe_free_addr(addr, addr_new); if (swap) { tcg_temp_free_i32(swap); @@ -326,6 +354,7 @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr, MemOp orig_memop; MemOpIdx orig_oi, oi; TCGv_i64 copy_addr; + TCGTemp *addr_new; if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) { tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop); @@ -350,10 +379,12 @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr, oi = make_memop_idx(memop, idx); } + addr_new = tci_extend_addr(addr); copy_addr = plugin_maybe_preserve_addr(addr); - gen_ld_i64(val, addr, oi); + gen_ld_i64(val, addr_new, oi); plugin_gen_mem_callbacks_i64(val, copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R); + maybe_free_addr(addr, addr_new); if ((orig_memop ^ memop) & MO_BSWAP) { int flags = (orig_memop & MO_SIGN @@ -388,6 +419,7 @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr, { TCGv_i64 swap = NULL; MemOpIdx orig_oi, oi; + TCGTemp *addr_new; if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) { tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop); @@ -418,8 +450,10 @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr, oi = make_memop_idx(memop, idx); } - gen_st_i64(val, addr, oi); + addr_new = tci_extend_addr(addr); + gen_st_i64(val, addr_new, oi); plugin_gen_mem_callbacks_i64(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W); + maybe_free_addr(addr, addr_new); if (swap) { tcg_temp_free_i64(swap); @@ -530,6 +564,7 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, { MemOpIdx orig_oi; TCGv_i64 ext_addr = NULL; + TCGTemp *addr_new; check_max_alignment(memop_alignment_bits(memop)); tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); @@ -557,8 +592,10 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, hi = TCGV128_HIGH(val); } + addr_new = tci_extend_addr(addr); gen_ldst2(INDEX_op_qemu_ld2, TCG_TYPE_I128, tcgv_i64_temp(lo), - tcgv_i64_temp(hi), addr, oi); + tcgv_i64_temp(hi), addr_new, oi); + maybe_free_addr(addr, addr_new); if (need_bswap) { tcg_gen_bswap64_i64(lo, lo); @@ -586,7 +623,9 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, y = TCGV128_LOW(val); } - gen_ld_i64(x, addr, make_memop_idx(mop[0], idx)); + addr_new = tci_extend_addr(addr); + gen_ld_i64(x, addr_new, make_memop_idx(mop[0], idx)); + maybe_free_addr(addr, addr_new); if (need_bswap) { tcg_gen_bswap64_i64(x, x); @@ -602,7 +641,9 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, addr_p8 = tcgv_i64_temp(t); } - gen_ld_i64(y, addr_p8, make_memop_idx(mop[1], idx)); + addr_new = tci_extend_addr(addr_p8); + gen_ld_i64(y, addr_new, make_memop_idx(mop[1], idx)); + maybe_free_addr(addr_p8, addr_new); tcg_temp_free_internal(addr_p8); if (need_bswap) { @@ -636,6 +677,7 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, { MemOpIdx orig_oi; TCGv_i64 ext_addr = NULL; + TCGTemp *addr_new; check_max_alignment(memop_alignment_bits(memop)); tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST); @@ -666,8 +708,10 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, hi = TCGV128_HIGH(val); } + addr_new = tci_extend_addr(addr); gen_ldst2(INDEX_op_qemu_st2, TCG_TYPE_I128, - tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi); + tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr_new, oi); + maybe_free_addr(addr, addr_new); if (need_bswap) { tcg_temp_free_i64(lo); @@ -694,7 +738,9 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, x = b; } - gen_st_i64(x, addr, make_memop_idx(mop[0], idx)); + addr_new = tci_extend_addr(addr); + gen_st_i64(x, addr_new, make_memop_idx(mop[0], idx)); + maybe_free_addr(addr, addr_new); if (tcg_ctx->addr_type == TCG_TYPE_I32) { TCGv_i32 t = tcg_temp_ebb_new_i32(); @@ -706,13 +752,15 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, addr_p8 = tcgv_i64_temp(t); } + addr_new = tci_extend_addr(addr_p8); if (b) { tcg_gen_bswap64_i64(b, y); - gen_st_i64(b, addr_p8, make_memop_idx(mop[1], idx)); + gen_st_i64(b, addr_new, make_memop_idx(mop[1], idx)); tcg_temp_free_i64(b); } else { - gen_st_i64(y, addr_p8, make_memop_idx(mop[1], idx)); + gen_st_i64(y, addr_new, make_memop_idx(mop[1], idx)); } + maybe_free_addr(addr_p8, addr_new); tcg_temp_free_internal(addr_p8); } else { if (tcg_ctx->addr_type == TCG_TYPE_I32) { diff --git a/tcg/tci.c b/tcg/tci.c index 700e672616..e15d4e8e08 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -794,12 +794,24 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, taddr = regs[r1]; regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr); break; + case INDEX_op_tci_qemu_ld_rrr: + tci_args_rrr(insn, &r0, &r1, &r2); + taddr = regs[r1]; + oi = regs[r2]; + regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr); + break; case INDEX_op_qemu_st: tci_args_rrm(insn, &r0, &r1, &oi); taddr = regs[r1]; tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr); break; + case INDEX_op_tci_qemu_st_rrr: + tci_args_rrr(insn, &r0, &r1, &r2); + taddr = regs[r1]; + oi = regs[r2]; + tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr); + break; case INDEX_op_qemu_ld2: tcg_debug_assert(TCG_TARGET_REG_BITS == 32); @@ -1050,6 +1062,13 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) op_name, str_r(r0), str_r(r1), oi); break; + case INDEX_op_tci_qemu_ld_rrr: + case INDEX_op_tci_qemu_st_rrr: + tci_args_rrr(insn, &r0, &r1, &r2); + info->fprintf_func(info->stream, "%-12s %s, %s, %s", + op_name, str_r(r0), str_r(r1), str_r(r2)); + break; + case INDEX_op_qemu_ld2: case INDEX_op_qemu_st2: tci_args_rrrr(insn, &r0, &r1, &r2, &r3); diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc index 4eb32ed736..f8bfffc125 100644 --- a/tcg/tci/tcg-target-opc.h.inc +++ b/tcg/tci/tcg-target-opc.h.inc @@ -13,3 +13,5 @@ DEF(tci_rotl32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_rotr32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_setcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT) DEF(tci_movcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT) +DEF(tci_qemu_ld_rrr, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_qemu_st_rrr, 0, 3, 0, TCG_OPF_NOT_PRESENT) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 35c66a4836..532f87262c 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -1188,7 +1188,12 @@ static const TCGOutOpStore outop_st = { static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data, TCGReg addr, MemOpIdx oi) { - tcg_out_op_rrm(s, INDEX_op_qemu_ld, data, addr, oi); + if (oi & ~0xffff) { + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, oi); + tcg_out_op_rrr(s, INDEX_op_tci_qemu_ld_rrr, data, addr, TCG_REG_TMP); + } else { + tcg_out_op_rrm(s, INDEX_op_qemu_ld, data, addr, oi); + } } static const TCGOutOpQemuLdSt outop_qemu_ld = { @@ -1213,7 +1218,12 @@ static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data, TCGReg addr, MemOpIdx oi) { - tcg_out_op_rrm(s, INDEX_op_qemu_st, data, addr, oi); + if (oi & ~0xffff) { + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, oi); + tcg_out_op_rrr(s, INDEX_op_tci_qemu_st_rrr, data, addr, TCG_REG_TMP); + } else { + tcg_out_op_rrm(s, INDEX_op_qemu_st, data, addr, oi); + } } static const TCGOutOpQemuLdSt outop_qemu_st = {