From d01035767e47f69b4e545d1843dbaf08e6a74752 Mon Sep 17 00:00:00 2001 From: Andrei Warkentin Date: Tue, 13 Dec 2022 00:13:58 -0600 Subject: [PATCH] notdirty_write: fix store-related performance problems Every store would always cause the tb_invalidate_phys_page_fast path to be invoked, amounting to a 40x slowdown of stores compared to loads. Change this code to only worry about TB invalidation for regions marked as executable (i.e. emulated executable). Even without uc_set_native_thunks, this change fixes most of the performance issues seen with thunking to native calls. Signed-off-by: Andrei Warkentin --- qemu/aarch64.h | 1 + qemu/accel/tcg/cputlb.c | 71 ++++++++++++++++++++++++++-------- qemu/accel/tcg/translate-all.c | 5 +++ qemu/arm.h | 1 + qemu/include/exec/exec-all.h | 1 + qemu/include/exec/ram_addr.h | 6 --- qemu/m68k.h | 1 + qemu/mips.h | 1 + qemu/mips64.h | 1 + qemu/mips64el.h | 1 + qemu/mipsel.h | 1 + qemu/ppc.h | 1 + qemu/ppc64.h | 1 + qemu/riscv32.h | 1 + qemu/riscv64.h | 1 + qemu/s390x.h | 1 + qemu/sparc.h | 1 + qemu/sparc64.h | 1 + qemu/tricore.h | 1 + qemu/x86_64.h | 1 + 20 files changed, 76 insertions(+), 23 deletions(-) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 106ffd2f..acbff490 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -787,6 +787,7 @@ #define tlb_protect_code tlb_protect_code_aarch64 #define tlb_unprotect_code tlb_unprotect_code_aarch64 #define tlb_reset_dirty tlb_reset_dirty_aarch64 +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_aarch64 #define tlb_set_dirty tlb_set_dirty_aarch64 #define tlb_set_page_with_attrs tlb_set_page_with_attrs_aarch64 #define tlb_set_page tlb_set_page_aarch64 diff --git a/qemu/accel/tcg/cputlb.c b/qemu/accel/tcg/cputlb.c index 41ae6b07..81061bbc 100644 --- a/qemu/accel/tcg/cputlb.c +++ b/qemu/accel/tcg/cputlb.c @@ -661,6 +661,25 @@ static void tlb_reset_dirty_range_locked(struct uc_struct *uc, CPUTLBEntry *tlb_ } } +static void tlb_reset_dirty_range_by_vaddr_locked(struct uc_struct *uc, CPUTLBEntry *tlb_entry, + target_ulong start, target_ulong length) +{ + uintptr_t addr = tlb_entry->addr_write; + + if ((addr & (TLB_INVALID_MASK | TLB_MMIO | + TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { + addr &= TARGET_PAGE_MASK; + if ((addr - start) < length) { +#if TCG_OVERSIZED_GUEST + tlb_entry->addr_write |= TLB_NOTDIRTY; +#else + tlb_entry->addr_write = tlb_entry->addr_write | TLB_NOTDIRTY; +#endif + } + } +} + + /* * Called with tlb_c.lock held. * Called only from the vCPU context, i.e. the TLB's owner thread. @@ -699,6 +718,30 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) } } +void tlb_reset_dirty_by_vaddr(CPUState *cpu, target_ulong start1, target_ulong length) +{ + struct uc_struct *uc = cpu->uc; + CPUArchState *env; + + int mmu_idx; + + env = cpu->env_ptr; + for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { + unsigned int i; + unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]); + + for (i = 0; i < n; i++) { + tlb_reset_dirty_range_by_vaddr_locked(uc, &env_tlb(env)->f[mmu_idx].table[i], + start1, length); + } + + for (i = 0; i < CPU_VTLB_SIZE; i++) { + tlb_reset_dirty_range_by_vaddr_locked(uc, &env_tlb(env)->d[mmu_idx].vtable[i], + start1, length); + } + } +} + /* Called with tlb_c.lock held */ static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, target_ulong vaddr) @@ -1144,30 +1187,24 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) } static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, - CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) + CPUIOTLBEntry *iotlbentry, uintptr_t retaddr, + MemoryRegion *mr) { ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; - // trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); + if (mr == NULL) { + mr = memory_mapping(cpu->uc, mem_vaddr); + } - if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { + if ((mr->perms & UC_PROT_EXEC) != 0) { struct page_collection *pages = page_collection_lock(cpu->uc, ram_addr, ram_addr + size); tb_invalidate_phys_page_fast(cpu->uc, pages, ram_addr, size, retaddr); page_collection_unlock(pages); } - /* - * Set both VGA and migration bits for simplicity and to remove - * the notdirty callback faster. - */ - cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); - - /* We remove the notdirty callback only if the code has been flushed. */ - if (!cpu_physical_memory_is_clean(ram_addr)) { - // trace_memory_notdirty_set_dirty(mem_vaddr); - tlb_set_dirty(cpu, mem_vaddr); - } + /* For exec pages, this is cleared in tb_gen_code. */ + tlb_set_dirty(cpu, mem_vaddr); } /* @@ -1244,7 +1281,7 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, /* Handle clean RAM pages. */ if (tlb_addr & TLB_NOTDIRTY) { - notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); + notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr, NULL); } } @@ -1370,7 +1407,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, if (unlikely(tlb_addr & TLB_NOTDIRTY)) { notdirty_write(env_cpu(env), addr, 1 << s_bits, - &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); + &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr, NULL); } return hostaddr; @@ -2216,7 +2253,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, /* Handle clean RAM pages. */ if (tlb_addr & TLB_NOTDIRTY) { - notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); + notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr, mr); } haddr = (void *)((uintptr_t)addr + entry->addend); diff --git a/qemu/accel/tcg/translate-all.c b/qemu/accel/tcg/translate-all.c index b4977054..2486287b 100644 --- a/qemu/accel/tcg/translate-all.c +++ b/qemu/accel/tcg/translate-all.c @@ -1843,6 +1843,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu, if ((pc & TARGET_PAGE_MASK) != virt_page2) { phys_page2 = get_page_addr_code(env, virt_page2); } + + /* Undoes tlb_set_dirty in notdirty_write. */ + tlb_reset_dirty_by_vaddr(cpu, pc & TARGET_PAGE_MASK, + (pc & ~TARGET_PAGE_MASK) + tb->size); + /* * No explicit memory barrier is required -- tb_link_page() makes the * TB visible in a consistent state. diff --git a/qemu/arm.h b/qemu/arm.h index 42ad9c51..9b3357d0 100644 --- a/qemu/arm.h +++ b/qemu/arm.h @@ -787,6 +787,7 @@ #define tlb_protect_code tlb_protect_code_arm #define tlb_unprotect_code tlb_unprotect_code_arm #define tlb_reset_dirty tlb_reset_dirty_arm +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_arm #define tlb_set_dirty tlb_set_dirty_arm #define tlb_set_page_with_attrs tlb_set_page_with_attrs_arm #define tlb_set_page tlb_set_page_arm diff --git a/qemu/include/exec/exec-all.h b/qemu/include/exec/exec-all.h index f6154b84..b999c716 100644 --- a/qemu/include/exec/exec-all.h +++ b/qemu/include/exec/exec-all.h @@ -464,6 +464,7 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, void **hostp); void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length); +void tlb_reset_dirty_by_vaddr(CPUState *cpu, target_ulong start1, target_ulong length); void tlb_set_dirty(CPUState *cpu, target_ulong vaddr); /* exec.c */ diff --git a/qemu/include/exec/ram_addr.h b/qemu/include/exec/ram_addr.h index a56832e0..d6b96bc7 100644 --- a/qemu/include/exec/ram_addr.h +++ b/qemu/include/exec/ram_addr.h @@ -67,12 +67,6 @@ static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, return false; } -static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr, - unsigned client) -{ - return cpu_physical_memory_get_dirty(addr, 1, client); -} - static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) { return true; diff --git a/qemu/m68k.h b/qemu/m68k.h index a743b019..228d8b6d 100644 --- a/qemu/m68k.h +++ b/qemu/m68k.h @@ -787,6 +787,7 @@ #define tlb_protect_code tlb_protect_code_m68k #define tlb_unprotect_code tlb_unprotect_code_m68k #define tlb_reset_dirty tlb_reset_dirty_m68k +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_m68k #define tlb_set_dirty tlb_set_dirty_m68k #define tlb_set_page_with_attrs tlb_set_page_with_attrs_m68k #define tlb_set_page tlb_set_page_m68k diff --git a/qemu/mips.h b/qemu/mips.h index af08a938..086e51fa 100644 --- a/qemu/mips.h +++ b/qemu/mips.h @@ -787,6 +787,7 @@ #define tlb_protect_code tlb_protect_code_mips #define tlb_unprotect_code tlb_unprotect_code_mips #define tlb_reset_dirty tlb_reset_dirty_mips +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_mips #define tlb_set_dirty tlb_set_dirty_mips #define tlb_set_page_with_attrs tlb_set_page_with_attrs_mips #define tlb_set_page tlb_set_page_mips diff --git a/qemu/mips64.h b/qemu/mips64.h index 0b46bae1..f70c8b14 100644 --- a/qemu/mips64.h +++ b/qemu/mips64.h @@ -787,6 +787,7 @@ #define tlb_protect_code tlb_protect_code_mips64 #define tlb_unprotect_code tlb_unprotect_code_mips64 #define tlb_reset_dirty tlb_reset_dirty_mips64 +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_mips64 #define tlb_set_dirty tlb_set_dirty_mips64 #define tlb_set_page_with_attrs tlb_set_page_with_attrs_mips64 #define tlb_set_page tlb_set_page_mips64 diff --git a/qemu/mips64el.h b/qemu/mips64el.h index dd28ff03..e49d6ccc 100644 --- a/qemu/mips64el.h +++ b/qemu/mips64el.h @@ -787,6 +787,7 @@ #define tlb_protect_code tlb_protect_code_mips64el #define tlb_unprotect_code tlb_unprotect_code_mips64el #define tlb_reset_dirty tlb_reset_dirty_mips64el +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_mips64el #define tlb_set_dirty tlb_set_dirty_mips64el #define tlb_set_page_with_attrs tlb_set_page_with_attrs_mips64el #define tlb_set_page tlb_set_page_mips64el diff --git a/qemu/mipsel.h b/qemu/mipsel.h index 864857c8..87da686e 100644 --- a/qemu/mipsel.h +++ b/qemu/mipsel.h @@ -787,6 +787,7 @@ #define tlb_protect_code tlb_protect_code_mipsel #define tlb_unprotect_code tlb_unprotect_code_mipsel #define tlb_reset_dirty tlb_reset_dirty_mipsel +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_mipsel #define tlb_set_dirty tlb_set_dirty_mipsel #define tlb_set_page_with_attrs tlb_set_page_with_attrs_mipsel #define tlb_set_page tlb_set_page_mipsel diff --git a/qemu/ppc.h b/qemu/ppc.h index 710231b8..6e90dcb7 100644 --- a/qemu/ppc.h +++ b/qemu/ppc.h @@ -787,6 +787,7 @@ #define tlb_protect_code tlb_protect_code_ppc #define tlb_unprotect_code tlb_unprotect_code_ppc #define tlb_reset_dirty tlb_reset_dirty_ppc +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_ppc #define tlb_set_dirty tlb_set_dirty_ppc #define tlb_set_page_with_attrs tlb_set_page_with_attrs_ppc #define tlb_set_page tlb_set_page_ppc diff --git a/qemu/ppc64.h b/qemu/ppc64.h index 41a4a3e3..c03305ed 100644 --- a/qemu/ppc64.h +++ b/qemu/ppc64.h @@ -787,6 +787,7 @@ #define tlb_protect_code tlb_protect_code_ppc64 #define tlb_unprotect_code tlb_unprotect_code_ppc64 #define tlb_reset_dirty tlb_reset_dirty_ppc64 +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_ppc64 #define tlb_set_dirty tlb_set_dirty_ppc64 #define tlb_set_page_with_attrs tlb_set_page_with_attrs_ppc64 #define tlb_set_page tlb_set_page_ppc64 diff --git a/qemu/riscv32.h b/qemu/riscv32.h index e4e24f52..43c0b481 100644 --- a/qemu/riscv32.h +++ b/qemu/riscv32.h @@ -787,6 +787,7 @@ #define tlb_protect_code tlb_protect_code_riscv32 #define tlb_unprotect_code tlb_unprotect_code_riscv32 #define tlb_reset_dirty tlb_reset_dirty_riscv32 +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_riscv32 #define tlb_set_dirty tlb_set_dirty_riscv32 #define tlb_set_page_with_attrs tlb_set_page_with_attrs_riscv32 #define tlb_set_page tlb_set_page_riscv32 diff --git a/qemu/riscv64.h b/qemu/riscv64.h index 95a4b6ed..2c0b5cc0 100644 --- a/qemu/riscv64.h +++ b/qemu/riscv64.h @@ -787,6 +787,7 @@ #define tlb_protect_code tlb_protect_code_riscv64 #define tlb_unprotect_code tlb_unprotect_code_riscv64 #define tlb_reset_dirty tlb_reset_dirty_riscv64 +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_riscv64 #define tlb_set_dirty tlb_set_dirty_riscv64 #define tlb_set_page_with_attrs tlb_set_page_with_attrs_riscv64 #define tlb_set_page tlb_set_page_riscv64 diff --git a/qemu/s390x.h b/qemu/s390x.h index 97e8ecd7..70c8fbae 100644 --- a/qemu/s390x.h +++ b/qemu/s390x.h @@ -787,6 +787,7 @@ #define tlb_protect_code tlb_protect_code_s390x #define tlb_unprotect_code tlb_unprotect_code_s390x #define tlb_reset_dirty tlb_reset_dirty_s390x +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_s390x #define tlb_set_dirty tlb_set_dirty_s390x #define tlb_set_page_with_attrs tlb_set_page_with_attrs_s390x #define tlb_set_page tlb_set_page_s390x diff --git a/qemu/sparc.h b/qemu/sparc.h index 51d623da..2c81fd45 100644 --- a/qemu/sparc.h +++ b/qemu/sparc.h @@ -787,6 +787,7 @@ #define tlb_protect_code tlb_protect_code_sparc #define tlb_unprotect_code tlb_unprotect_code_sparc #define tlb_reset_dirty tlb_reset_dirty_sparc +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_sparc #define tlb_set_dirty tlb_set_dirty_sparc #define tlb_set_page_with_attrs tlb_set_page_with_attrs_sparc #define tlb_set_page tlb_set_page_sparc diff --git a/qemu/sparc64.h b/qemu/sparc64.h index 5779f315..8c31e2fa 100644 --- a/qemu/sparc64.h +++ b/qemu/sparc64.h @@ -787,6 +787,7 @@ #define tlb_protect_code tlb_protect_code_sparc64 #define tlb_unprotect_code tlb_unprotect_code_sparc64 #define tlb_reset_dirty tlb_reset_dirty_sparc64 +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_sparc64 #define tlb_set_dirty tlb_set_dirty_sparc64 #define tlb_set_page_with_attrs tlb_set_page_with_attrs_sparc64 #define tlb_set_page tlb_set_page_sparc64 diff --git a/qemu/tricore.h b/qemu/tricore.h index 5fc51625..ef8aa39f 100644 --- a/qemu/tricore.h +++ b/qemu/tricore.h @@ -787,6 +787,7 @@ #define tlb_protect_code tlb_protect_code_tricore #define tlb_unprotect_code tlb_unprotect_code_tricore #define tlb_reset_dirty tlb_reset_dirty_tricore +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_tricore #define tlb_set_dirty tlb_set_dirty_tricore #define tlb_set_page_with_attrs tlb_set_page_with_attrs_tricore #define tlb_set_page tlb_set_page_tricore diff --git a/qemu/x86_64.h b/qemu/x86_64.h index be528692..7ca33208 100644 --- a/qemu/x86_64.h +++ b/qemu/x86_64.h @@ -787,6 +787,7 @@ #define tlb_protect_code tlb_protect_code_x86_64 #define tlb_unprotect_code tlb_unprotect_code_x86_64 #define tlb_reset_dirty tlb_reset_dirty_x86_64 +#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_x86_64 #define tlb_set_dirty tlb_set_dirty_x86_64 #define tlb_set_page_with_attrs tlb_set_page_with_attrs_x86_64 #define tlb_set_page tlb_set_page_x86_64