From f86e8f3d138de112d66ec28792e0fd303bf129ca Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 3 Mar 2022 15:57:10 +0000 Subject: tcg: Add host memory barriers to cpu_ldst.h interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bring the helpers into line with the rest of tcg in respecting guest memory ordering. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 10 ++++++++++ accel/tcg/internal.h | 34 ++++++++++++++++++++++++++++++++++ accel/tcg/user-exec.c | 10 ++++++++++ 3 files changed, 54 insertions(+) (limited to 'accel') diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index e02cfc550e..5666a8e23a 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -2339,6 +2339,7 @@ static uint8_t do_ld1_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, MMULookupLocals l; bool crosspage; + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); tcg_debug_assert(!crosspage); @@ -2360,6 +2361,7 @@ static uint16_t do_ld2_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uint16_t ret; uint8_t a, b; + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); if (likely(!crosspage)) { return do_ld_2(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra); @@ -2390,6 +2392,7 @@ static uint32_t do_ld4_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, bool crosspage; uint32_t ret; + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); if (likely(!crosspage)) { return do_ld_4(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra); @@ -2417,6 +2420,7 @@ static uint64_t do_ld8_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, bool crosspage; uint64_t ret; + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); if (likely(!crosspage)) { return do_ld_8(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra); @@ -2469,6 +2473,7 @@ static Int128 do_ld16_mmu(CPUArchState *env, vaddr addr, Int128 ret; int first; + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l); if (likely(!crosspage)) { /* Perform the load host endian. */ @@ -2802,6 +2807,7 @@ void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val, bool crosspage; tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8); + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); tcg_debug_assert(!crosspage); @@ -2815,6 +2821,7 @@ static void do_st2_mmu(CPUArchState *env, vaddr addr, uint16_t val, bool crosspage; uint8_t a, b; + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); if (likely(!crosspage)) { do_st_2(env, &l.page[0], val, l.mmu_idx, l.memop, ra); @@ -2843,6 +2850,7 @@ static void do_st4_mmu(CPUArchState *env, vaddr addr, uint32_t val, MMULookupLocals l; bool crosspage; + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); if (likely(!crosspage)) { do_st_4(env, &l.page[0], val, l.mmu_idx, l.memop, ra); @@ -2870,6 +2878,7 @@ static void do_st8_mmu(CPUArchState *env, vaddr addr, uint64_t val, MMULookupLocals l; bool crosspage; + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); if (likely(!crosspage)) { do_st_8(env, &l.page[0], val, l.mmu_idx, l.memop, ra); @@ -2899,6 +2908,7 @@ static void do_st16_mmu(CPUArchState *env, vaddr addr, Int128 val, uint64_t a, b; int first; + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); if (likely(!crosspage)) { /* Swap to host endian if necessary, then store. */ diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h index 91f308bdfa..650c3ac53f 100644 --- a/accel/tcg/internal.h +++ b/accel/tcg/internal.h @@ -78,4 +78,38 @@ extern int64_t max_advance; extern bool one_insn_per_tb; +/** + * tcg_req_mo: + * @type: TCGBar + * + * Filter @type to the barrier that is required for the guest + * memory ordering vs the host memory ordering. A non-zero + * result indicates that some barrier is required. + * + * If TCG_GUEST_DEFAULT_MO is not defined, assume that the + * guest requires strict ordering. + * + * This is a macro so that it's constant even without optimization. + */ +#ifdef TCG_GUEST_DEFAULT_MO +# define tcg_req_mo(type) \ + ((type) & TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) +#else +# define tcg_req_mo(type) ((type) & ~TCG_TARGET_DEFAULT_MO) +#endif + +/** + * cpu_req_mo: + * @type: TCGBar + * + * If tcg_req_mo indicates a barrier for @type is required + * for the guest memory model, issue a host memory barrier. + */ +#define cpu_req_mo(type) \ + do { \ + if (tcg_req_mo(type)) { \ + smp_mb(); \ + } \ + } while (0) + #endif /* ACCEL_TCG_INTERNAL_H */ diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index f8b16d6ab8..8fbcbf9771 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -914,6 +914,7 @@ static uint8_t do_ld1_mmu(CPUArchState *env, abi_ptr addr, uint8_t ret; tcg_debug_assert((mop & MO_SIZE) == MO_8); + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); ret = ldub_p(haddr); clear_helper_retaddr(); @@ -947,6 +948,7 @@ static uint16_t do_ld2_mmu(CPUArchState *env, abi_ptr addr, uint16_t ret; tcg_debug_assert((mop & MO_SIZE) == MO_16); + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); ret = load_atom_2(env, ra, haddr, mop); clear_helper_retaddr(); @@ -984,6 +986,7 @@ static uint32_t do_ld4_mmu(CPUArchState *env, abi_ptr addr, uint32_t ret; tcg_debug_assert((mop & MO_SIZE) == MO_32); + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); ret = load_atom_4(env, ra, haddr, mop); clear_helper_retaddr(); @@ -1021,6 +1024,7 @@ static uint64_t do_ld8_mmu(CPUArchState *env, abi_ptr addr, uint64_t ret; tcg_debug_assert((mop & MO_SIZE) == MO_64); + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); ret = load_atom_8(env, ra, haddr, mop); clear_helper_retaddr(); @@ -1052,6 +1056,7 @@ static Int128 do_ld16_mmu(CPUArchState *env, abi_ptr addr, Int128 ret; tcg_debug_assert((mop & MO_SIZE) == MO_128); + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); ret = load_atom_16(env, ra, haddr, mop); clear_helper_retaddr(); @@ -1087,6 +1092,7 @@ static void do_st1_mmu(CPUArchState *env, abi_ptr addr, uint8_t val, void *haddr; tcg_debug_assert((mop & MO_SIZE) == MO_8); + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); stb_p(haddr, val); clear_helper_retaddr(); @@ -1111,6 +1117,7 @@ static void do_st2_mmu(CPUArchState *env, abi_ptr addr, uint16_t val, void *haddr; tcg_debug_assert((mop & MO_SIZE) == MO_16); + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); if (mop & MO_BSWAP) { @@ -1139,6 +1146,7 @@ static void do_st4_mmu(CPUArchState *env, abi_ptr addr, uint32_t val, void *haddr; tcg_debug_assert((mop & MO_SIZE) == MO_32); + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); if (mop & MO_BSWAP) { @@ -1167,6 +1175,7 @@ static void do_st8_mmu(CPUArchState *env, abi_ptr addr, uint64_t val, void *haddr; tcg_debug_assert((mop & MO_SIZE) == MO_64); + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); if (mop & MO_BSWAP) { @@ -1195,6 +1204,7 @@ static void do_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val, void *haddr; tcg_debug_assert((mop & MO_SIZE) == MO_128); + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); if (mop & MO_BSWAP) { -- cgit v1.2.3