tcg: fixes for tci

host: fixes for 128-bit atomics
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmkzBDEdHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/YMggAgY0+rpQulo7k+fEo
 RP7cLweKSu8aahFvt304qyNGAWlGzBQwJSKWUfFyyMxh6FhO9iEsjkodArjDcK/J
 fou3pz4UmU/feMwVxFuRpCDEEKgpcpxgwj7XJFh96L4VFZ8OrHeuPG5KU5IA/vyy
 eHIzU8M50rejmKCmOL8FDGshWZdXkrgBp3ShIlqlVEb9HpuSFrti0Wh2euVUV67Y
 xG1F4iU5RVNW8OcGz5asLgwaNB7pK/v/FVDxR9rEAoiM9gZhV912fkogmVXTniTk
 rjTYR0k6d49EZ3+M4sUx2v2Nl+6O4wGUFWERU4vHmtUpv1F1UjqxOE3JWDeU2L0c
 3q9k2Q==
 =M1lm
 -----END PGP SIGNATURE-----

Merge tag 'pull-tcg-20251205' of https://gitlab.com/rth7680/qemu into staging

tcg: fixes for tci
host: fixes for 128-bit atomics

# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmkzBDEdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/YMggAgY0+rpQulo7k+fEo
# RP7cLweKSu8aahFvt304qyNGAWlGzBQwJSKWUfFyyMxh6FhO9iEsjkodArjDcK/J
# fou3pz4UmU/feMwVxFuRpCDEEKgpcpxgwj7XJFh96L4VFZ8OrHeuPG5KU5IA/vyy
# eHIzU8M50rejmKCmOL8FDGshWZdXkrgBp3ShIlqlVEb9HpuSFrti0Wh2euVUV67Y
# xG1F4iU5RVNW8OcGz5asLgwaNB7pK/v/FVDxR9rEAoiM9gZhV912fkogmVXTniTk
# rjTYR0k6d49EZ3+M4sUx2v2Nl+6O4wGUFWERU4vHmtUpv1F1UjqxOE3JWDeU2L0c
# 3q9k2Q==
# =M1lm
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 05 Dec 2025 10:11:29 AM CST
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]

* tag 'pull-tcg-20251205' of https://gitlab.com/rth7680/qemu:
  include/aarch64/host: Fix atomic16_fetch_{and,or}
  include/generic/host: Fix atomic128-cas.h.inc for Int128 structure
  tcg/tci: Disable -Wundef FFI_GO_CLOSURES warning
  tcg: Remove duplicate test from plugin_gen_mem_callbacks
  tcg/tci: Introduce INDEX_op_tci_qemu_{ld,st}_rrr
  tcg: Zero extend 32-bit addresses for TCI

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2025-12-05 10:20:51 -06:00
commit 47b6038187
7 changed files with 138 additions and 41 deletions

View file

@ -67,9 +67,9 @@ static inline Int128 atomic16_fetch_and(Int128 *ptr, Int128 new)
"stlxp %w[tmp], %[tmpl], %[tmph], %[mem]\n\t"
"cbnz %w[tmp], 0b"
: [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
[oldl] "=&r"(oldl), [oldh] "=&r"(oldh)
: [newl] "r"(newl), [newh] "r"(newh),
[tmpl] "r"(tmpl), [tmph] "r"(tmph)
[oldl] "=&r"(oldl), [oldh] "=&r"(oldh),
[tmpl] "=&r"(tmpl), [tmph] "=&r"(tmph)
: [newl] "r"(newl), [newh] "r"(newh)
: "memory");
return int128_make128(oldl, oldh);
@ -87,9 +87,9 @@ static inline Int128 atomic16_fetch_or(Int128 *ptr, Int128 new)
"stlxp %w[tmp], %[tmpl], %[tmph], %[mem]\n\t"
"cbnz %w[tmp], 0b"
: [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
[oldl] "=&r"(oldl), [oldh] "=&r"(oldh)
: [newl] "r"(newl), [newh] "r"(newh),
[tmpl] "r"(tmpl), [tmph] "r"(tmph)
[oldl] "=&r"(oldl), [oldh] "=&r"(oldh),
[tmpl] "=&r"(tmpl), [tmph] "=&r"(tmph)
: [newl] "r"(newl), [newh] "r"(newh)
: "memory");
return int128_make128(oldl, oldh);

View file

@ -34,39 +34,45 @@ static inline Int128 ATTRIBUTE_ATOMIC128_OPT
atomic16_xchg(Int128 *ptr, Int128 new)
{
__int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
Int128 old = *ptr_align;
Int128Alias o, n;
while (!__atomic_compare_exchange_n(ptr_align, &old, new, true,
n.s = new;
o.i = *ptr_align;
while (!__atomic_compare_exchange_n(ptr_align, &o.i, n.i, true,
__ATOMIC_SEQ_CST, 0)) {
continue;
}
return old;
return o.s;
}
static inline Int128 ATTRIBUTE_ATOMIC128_OPT
atomic16_fetch_and(Int128 *ptr, Int128 val)
{
__int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
Int128 old = *ptr_align;
Int128Alias o, v;
while (!__atomic_compare_exchange_n(ptr_align, &old, old & val, true,
v.s = val;
o.i = *ptr_align;
while (!__atomic_compare_exchange_n(ptr_align, &o.i, o.i & v.i, true,
__ATOMIC_SEQ_CST, 0)) {
continue;
}
return old;
return o.s;
}
static inline Int128 ATTRIBUTE_ATOMIC128_OPT
atomic16_fetch_or(Int128 *ptr, Int128 val)
{
__int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
Int128 old = *ptr_align;
Int128Alias o, v;
while (!__atomic_compare_exchange_n(ptr_align, &old, old | val, true,
v.s = val;
o.i = *ptr_align;
while (!__atomic_compare_exchange_n(ptr_align, &o.i, o.i | v.i, true,
__ATOMIC_SEQ_CST, 0)) {
continue;
}
return old;
return o.s;
}
# define HAVE_CMPXCHG128 1
#elif defined(CONFIG_CMPXCHG128)

View file

@ -10,7 +10,19 @@
#define TCG_HELPER_INFO_H
#ifdef CONFIG_TCG_INTERPRETER
/*
* MacOSX 15 uses an old version of libffi which contains
* #if FFI_GO_CLOSURES
* but does not define that in <ffitarget.h>, included from <ffi.h>.
* This was fixed upstream with
* https://github.com/libffi/libffi/commit/c23e9a1c
* We don't care about go closures one way or the other;
* just suppress the warning.
*/
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wundef"
#include <ffi.h>
#pragma GCC diagnostic pop
#endif
#include "tcg-target-reg-bits.h"

View file

@ -135,6 +135,30 @@ static void tcg_gen_req_mo(TCGBar type)
}
}
static TCGTemp *tci_extend_addr(TCGTemp *addr)
{
#ifdef CONFIG_TCG_INTERPRETER
/*
* 64-bit interpreter requires 64-bit addresses.
* Compare to the extension performed by tcg_out_{ld,st}_helper_args
* for native code generation.
*/
if (TCG_TARGET_REG_BITS == 64 && tcg_ctx->addr_type == TCG_TYPE_I32) {
TCGv_i64 temp = tcg_temp_ebb_new_i64();
tcg_gen_extu_i32_i64(temp, temp_tcgv_i32(addr));
return tcgv_i64_temp(temp);
}
#endif
return addr;
}
static void maybe_free_addr(TCGTemp *addr, TCGTemp *copy)
{
if (addr != copy) {
tcg_temp_free_internal(copy);
}
}
/* Only required for loads, where value might overlap addr. */
static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr)
{
@ -158,23 +182,21 @@ static void
plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
enum qemu_plugin_mem_rw rw)
{
if (tcg_ctx->plugin_insn != NULL) {
qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
if (tcg_ctx->addr_type == TCG_TYPE_I32) {
if (!copy_addr) {
copy_addr = tcg_temp_ebb_new_i64();
tcg_gen_extu_i32_i64(copy_addr, temp_tcgv_i32(orig_addr));
}
if (tcg_ctx->addr_type == TCG_TYPE_I32) {
if (!copy_addr) {
copy_addr = tcg_temp_ebb_new_i64();
tcg_gen_extu_i32_i64(copy_addr, temp_tcgv_i32(orig_addr));
}
tcg_gen_plugin_mem_cb(copy_addr, info);
tcg_temp_free_i64(copy_addr);
} else {
if (copy_addr) {
tcg_gen_plugin_mem_cb(copy_addr, info);
tcg_temp_free_i64(copy_addr);
} else {
if (copy_addr) {
tcg_gen_plugin_mem_cb(copy_addr, info);
tcg_temp_free_i64(copy_addr);
} else {
tcg_gen_plugin_mem_cb(temp_tcgv_i64(orig_addr), info);
}
tcg_gen_plugin_mem_cb(temp_tcgv_i64(orig_addr), info);
}
}
}
@ -234,6 +256,7 @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
MemOp orig_memop;
MemOpIdx orig_oi, oi;
TCGv_i64 copy_addr;
TCGTemp *addr_new;
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0);
@ -248,10 +271,12 @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
oi = make_memop_idx(memop, idx);
}
addr_new = tci_extend_addr(addr);
copy_addr = plugin_maybe_preserve_addr(addr);
gen_ldst1(INDEX_op_qemu_ld, TCG_TYPE_I32, tcgv_i32_temp(val), addr, oi);
gen_ldst1(INDEX_op_qemu_ld, TCG_TYPE_I32, tcgv_i32_temp(val), addr_new, oi);
plugin_gen_mem_callbacks_i32(val, copy_addr, addr, orig_oi,
QEMU_PLUGIN_MEM_R);
maybe_free_addr(addr, addr_new);
if ((orig_memop ^ memop) & MO_BSWAP) {
switch (orig_memop & MO_SIZE) {
@ -282,6 +307,7 @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
{
TCGv_i32 swap = NULL;
MemOpIdx orig_oi, oi;
TCGTemp *addr_new;
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
memop = tcg_canonicalize_memop(memop, 0, 1);
@ -304,8 +330,10 @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
oi = make_memop_idx(memop, idx);
}
gen_ldst1(INDEX_op_qemu_st, TCG_TYPE_I32, tcgv_i32_temp(val), addr, oi);
addr_new = tci_extend_addr(addr);
gen_ldst1(INDEX_op_qemu_st, TCG_TYPE_I32, tcgv_i32_temp(val), addr_new, oi);
plugin_gen_mem_callbacks_i32(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
maybe_free_addr(addr, addr_new);
if (swap) {
tcg_temp_free_i32(swap);
@ -326,6 +354,7 @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
MemOp orig_memop;
MemOpIdx orig_oi, oi;
TCGv_i64 copy_addr;
TCGTemp *addr_new;
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop);
@ -350,10 +379,12 @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
oi = make_memop_idx(memop, idx);
}
addr_new = tci_extend_addr(addr);
copy_addr = plugin_maybe_preserve_addr(addr);
gen_ld_i64(val, addr, oi);
gen_ld_i64(val, addr_new, oi);
plugin_gen_mem_callbacks_i64(val, copy_addr, addr, orig_oi,
QEMU_PLUGIN_MEM_R);
maybe_free_addr(addr, addr_new);
if ((orig_memop ^ memop) & MO_BSWAP) {
int flags = (orig_memop & MO_SIGN
@ -388,6 +419,7 @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
{
TCGv_i64 swap = NULL;
MemOpIdx orig_oi, oi;
TCGTemp *addr_new;
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop);
@ -418,8 +450,10 @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
oi = make_memop_idx(memop, idx);
}
gen_st_i64(val, addr, oi);
addr_new = tci_extend_addr(addr);
gen_st_i64(val, addr_new, oi);
plugin_gen_mem_callbacks_i64(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
maybe_free_addr(addr, addr_new);
if (swap) {
tcg_temp_free_i64(swap);
@ -530,6 +564,7 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
{
MemOpIdx orig_oi;
TCGv_i64 ext_addr = NULL;
TCGTemp *addr_new;
check_max_alignment(memop_alignment_bits(memop));
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
@ -557,8 +592,10 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
hi = TCGV128_HIGH(val);
}
addr_new = tci_extend_addr(addr);
gen_ldst2(INDEX_op_qemu_ld2, TCG_TYPE_I128, tcgv_i64_temp(lo),
tcgv_i64_temp(hi), addr, oi);
tcgv_i64_temp(hi), addr_new, oi);
maybe_free_addr(addr, addr_new);
if (need_bswap) {
tcg_gen_bswap64_i64(lo, lo);
@ -586,7 +623,9 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
y = TCGV128_LOW(val);
}
gen_ld_i64(x, addr, make_memop_idx(mop[0], idx));
addr_new = tci_extend_addr(addr);
gen_ld_i64(x, addr_new, make_memop_idx(mop[0], idx));
maybe_free_addr(addr, addr_new);
if (need_bswap) {
tcg_gen_bswap64_i64(x, x);
@ -602,7 +641,9 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
addr_p8 = tcgv_i64_temp(t);
}
gen_ld_i64(y, addr_p8, make_memop_idx(mop[1], idx));
addr_new = tci_extend_addr(addr_p8);
gen_ld_i64(y, addr_new, make_memop_idx(mop[1], idx));
maybe_free_addr(addr_p8, addr_new);
tcg_temp_free_internal(addr_p8);
if (need_bswap) {
@ -636,6 +677,7 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
{
MemOpIdx orig_oi;
TCGv_i64 ext_addr = NULL;
TCGTemp *addr_new;
check_max_alignment(memop_alignment_bits(memop));
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
@ -666,8 +708,10 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
hi = TCGV128_HIGH(val);
}
addr_new = tci_extend_addr(addr);
gen_ldst2(INDEX_op_qemu_st2, TCG_TYPE_I128,
tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr_new, oi);
maybe_free_addr(addr, addr_new);
if (need_bswap) {
tcg_temp_free_i64(lo);
@ -694,7 +738,9 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
x = b;
}
gen_st_i64(x, addr, make_memop_idx(mop[0], idx));
addr_new = tci_extend_addr(addr);
gen_st_i64(x, addr_new, make_memop_idx(mop[0], idx));
maybe_free_addr(addr, addr_new);
if (tcg_ctx->addr_type == TCG_TYPE_I32) {
TCGv_i32 t = tcg_temp_ebb_new_i32();
@ -706,13 +752,15 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
addr_p8 = tcgv_i64_temp(t);
}
addr_new = tci_extend_addr(addr_p8);
if (b) {
tcg_gen_bswap64_i64(b, y);
gen_st_i64(b, addr_p8, make_memop_idx(mop[1], idx));
gen_st_i64(b, addr_new, make_memop_idx(mop[1], idx));
tcg_temp_free_i64(b);
} else {
gen_st_i64(y, addr_p8, make_memop_idx(mop[1], idx));
gen_st_i64(y, addr_new, make_memop_idx(mop[1], idx));
}
maybe_free_addr(addr_p8, addr_new);
tcg_temp_free_internal(addr_p8);
} else {
if (tcg_ctx->addr_type == TCG_TYPE_I32) {

View file

@ -794,12 +794,24 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
taddr = regs[r1];
regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr);
break;
case INDEX_op_tci_qemu_ld_rrr:
tci_args_rrr(insn, &r0, &r1, &r2);
taddr = regs[r1];
oi = regs[r2];
regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr);
break;
case INDEX_op_qemu_st:
tci_args_rrm(insn, &r0, &r1, &oi);
taddr = regs[r1];
tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr);
break;
case INDEX_op_tci_qemu_st_rrr:
tci_args_rrr(insn, &r0, &r1, &r2);
taddr = regs[r1];
oi = regs[r2];
tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr);
break;
case INDEX_op_qemu_ld2:
tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
@ -1050,6 +1062,13 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
op_name, str_r(r0), str_r(r1), oi);
break;
case INDEX_op_tci_qemu_ld_rrr:
case INDEX_op_tci_qemu_st_rrr:
tci_args_rrr(insn, &r0, &r1, &r2);
info->fprintf_func(info->stream, "%-12s %s, %s, %s",
op_name, str_r(r0), str_r(r1), str_r(r2));
break;
case INDEX_op_qemu_ld2:
case INDEX_op_qemu_st2:
tci_args_rrrr(insn, &r0, &r1, &r2, &r3);

View file

@ -13,3 +13,5 @@ DEF(tci_rotl32, 1, 2, 0, TCG_OPF_NOT_PRESENT)
DEF(tci_rotr32, 1, 2, 0, TCG_OPF_NOT_PRESENT)
DEF(tci_setcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT)
DEF(tci_movcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT)
DEF(tci_qemu_ld_rrr, 1, 2, 0, TCG_OPF_NOT_PRESENT)
DEF(tci_qemu_st_rrr, 0, 3, 0, TCG_OPF_NOT_PRESENT)

View file

@ -1188,7 +1188,12 @@ static const TCGOutOpStore outop_st = {
static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data,
TCGReg addr, MemOpIdx oi)
{
tcg_out_op_rrm(s, INDEX_op_qemu_ld, data, addr, oi);
if (oi & ~0xffff) {
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, oi);
tcg_out_op_rrr(s, INDEX_op_tci_qemu_ld_rrr, data, addr, TCG_REG_TMP);
} else {
tcg_out_op_rrm(s, INDEX_op_qemu_ld, data, addr, oi);
}
}
static const TCGOutOpQemuLdSt outop_qemu_ld = {
@ -1213,7 +1218,12 @@ static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = {
static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data,
TCGReg addr, MemOpIdx oi)
{
tcg_out_op_rrm(s, INDEX_op_qemu_st, data, addr, oi);
if (oi & ~0xffff) {
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, oi);
tcg_out_op_rrr(s, INDEX_op_tci_qemu_st_rrr, data, addr, TCG_REG_TMP);
} else {
tcg_out_op_rrm(s, INDEX_op_qemu_st, data, addr, oi);
}
}
static const TCGOutOpQemuLdSt outop_qemu_st = {