diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index a8d891689..8445d2d0f 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1,4 +1,5 @@ set(SRCS + arm/cache/cache.cpp arm/disassembler/arm_disasm.cpp arm/disassembler/load_symbol_map.cpp arm/dyncom/arm_dyncom.cpp @@ -127,6 +128,7 @@ set(SRCS set(HEADERS arm/arm_interface.h + arm/cache/cache.h arm/disassembler/arm_disasm.h arm/disassembler/load_symbol_map.h arm/dyncom/arm_dyncom.h diff --git a/src/core/arm/cache/cache.cpp b/src/core/arm/cache/cache.cpp new file mode 100644 index 000000000..df3a3e349 --- /dev/null +++ b/src/core/arm/cache/cache.cpp @@ -0,0 +1,155 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "core/arm/cache/cache.h" + +namespace Cache { + +CacheBase::CacheBase(bool index_mode, OnClearCb clearcb) : index_mode(index_mode) { + page_pointers.fill(nullptr); + Clear(); + SetClearCallback(clearcb); + g_cachemanager.RegisterCache(this); +} + +CacheBase::~CacheBase() { + g_cachemanager.UnregisterCache(this); +} + +void CacheBase::Clear() { + if (OnClearCallback != nullptr) OnClearCallback(); + + for (auto& cache : ptr_caches) cache.data.assign(cache.data.size(), nullptr); + + if (index_mode) { + blocks_pc.assign(MAX_BLOCKS, INVALID_BLOCK); + next_block = num_blocks = 0; + } +} + +bool CacheBase::RemoveBlock(u32 pc) { + u8** ptr = page_pointers[pc >> Memory::PAGE_BITS]; + if (ptr != nullptr) { + ptr = &ptr[pc & Memory::PAGE_MASK]; + if (*ptr == nullptr) return false; + + if (index_mode) { + const u32 id = pointer_to_id(*ptr); + ASSERT(blocks_pc[id] == pc); + + blocks_pc[id] = INVALID_BLOCK; + if (id < next_block) next_block = id; + while (num_blocks > 0 && blocks_pc[num_blocks - 1] == INVALID_BLOCK) --num_blocks; + } + *ptr = nullptr; + return true; + } + return false; +} + +bool CacheBase::RemoveRange(u32 start, u32 end) { + bool result = false; + for (auto& cache : ptr_caches) { + for (int i = std::max(start, cache.addr); i < std::min(end, cache.addr_end); ++i) { + u8** ptr = &cache.data[i - cache.addr]; + if (*ptr == nullptr) continue; + + if (index_mode) { + const u32 id = pointer_to_id(*ptr); + ASSERT(blocks_pc[id] == i); + + blocks_pc[id] = INVALID_BLOCK; + if (id < next_block) next_block = id; + while (num_blocks > 0 && blocks_pc[num_blocks - 1] == INVALID_BLOCK) --num_blocks; + } + *ptr = nullptr; + result = true; + } + } + return result; +} + +void CacheBase::OnCodeLoad(u32 address, u32 size) { + const u32 end = address + size; + + // Check there is no overlapping + for (auto const& cache : ptr_caches) ASSERT((address >= cache.addr_end) || (end <= cache.addr)); + + ASSERT((address & Memory::PAGE_MASK) == 0 && (size & Memory::PAGE_MASK) == 0); + + BlockPtrCache cache{ address, address + size }; + cache.data.assign(size, nullptr); + + for (u32 i = address; i < end; i += Memory::PAGE_SIZE) { page_pointers[i >> Memory::PAGE_BITS] = &cache.data[i - address]; } + ptr_caches.emplace_back(std::move(cache)); +} + +void CacheBase::OnCodeUnload(u32 address, u32 size) { + const u32 end = address + size; + + ptr_caches.erase(std::remove_if(ptr_caches.begin(), ptr_caches.end(), + [&](auto const& cache) { + if ((address < cache.addr_end) && (end > cache.addr)) { + RemoveRange(cache.addr, cache.addr_end); + for (u32 i = cache.addr; i < cache.addr_end; i += Memory::PAGE_SIZE) { page_pointers[i >> Memory::PAGE_BITS] = nullptr; } + return true; + } + return false; + }), + ptr_caches.cend()); +} + +u8*& CacheBase::GetNewPtr(u32 pc) { + DEBUG_ASSERT(!index_mode || next_block == MAX_BLOCKS || ((next_block < MAX_BLOCKS) && blocks_pc[next_block] == INVALID_BLOCK)); + DEBUG_ASSERT(GetPtr(pc) == nullptr); + + u8** page_ptr = page_pointers[pc >> Memory::PAGE_BITS]; + if (page_ptr == nullptr) { + // pc isnt within mapped code + OnCodeLoad(pc & ~Memory::PAGE_MASK, Memory::PAGE_SIZE); + page_ptr = page_pointers[pc >> Memory::PAGE_BITS]; + } + + u8** block_ptr = &page_ptr[pc & Memory::PAGE_MASK]; + + DEBUG_ASSERT(*block_ptr == nullptr); + + if (index_mode) { + if (next_block == MAX_BLOCKS) Clear(); + + blocks_pc[next_block] = pc; + *block_ptr = id_to_pointer(next_block); + + do ++next_block; while (next_block <= num_blocks && blocks_pc[next_block] != INVALID_BLOCK); + if (next_block > num_blocks) num_blocks++; + } + + return *block_ptr; +} + + +void CacheManager::RegisterCode(u32 address, u32 size) const { + for (auto const& cache : caches) cache->OnCodeLoad(address, size); +} + +void CacheManager::UnregisterCode(u32 address, u32 size) const { + for (auto const& cache : caches) cache->OnCodeUnload(address, size); +} + +void CacheManager::ClearCache() const { + for (auto const& cache : caches) cache->Clear(); +} + +void CacheManager::RegisterCache(CacheBase* cache) { + caches.push_back(cache); +} + +void CacheManager::UnregisterCache(CacheBase* cache) { + caches.erase(std::remove(caches.begin(), caches.end(), cache), caches.end()); +} + +CacheManager g_cachemanager; + +} diff --git a/src/core/arm/cache/cache.h b/src/core/arm/cache/cache.h new file mode 100644 index 000000000..924396a70 --- /dev/null +++ b/src/core/arm/cache/cache.h @@ -0,0 +1,137 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" + +#include "core/memory.h" + +namespace Cache { + +using OnClearCb = std::function; + +const u32 MAX_BLOCKS = 0x40000; +const u32 INVALID_BLOCK = 0xFFFFFFFF; + +struct BlockPtrCache { + u32 addr; + u32 addr_end; + std::vector data; +}; + +class CacheBase { +protected: + explicit CacheBase(bool index_mode, OnClearCb clearcb); + ~CacheBase(); + +public: + /// Called when the cache needs to reset or Clear() is called + void SetClearCallback(OnClearCb cb) { OnClearCallback = cb; } + + /// Clear and call clear callback + void Clear(); + // returns true if block was found, false otherwise + bool RemoveBlock(u32 pc); + bool RemoveRange(u32 start, u32 end); + + void OnCodeLoad(u32 address, u32 size); + void OnCodeUnload(u32 address, u32 size); + +protected: + u8* GetPtr(u32 pc) const { + u8** ptr = page_pointers[pc >> Memory::PAGE_BITS]; + if (ptr != nullptr) { + DEBUG_ASSERT(!index_mode || blocks_pc[pointer_to_id(ptr[pc & Memory::PAGE_MASK])] == pc); + return ptr[pc & Memory::PAGE_MASK]; + } + return nullptr; + } + u8*& GetNewPtr(u32 pc); + + std::function id_to_pointer; + std::function pointer_to_id; + +private: + bool index_mode; + OnClearCb OnClearCallback = nullptr; + + std::vector ptr_caches; + std::array page_pointers; + + std::vector blocks_pc; + u32 next_block = 0; + u32 num_blocks = 0; +}; + +/// Use this if you only need to store a pointer +template +class PtrCache final : public CacheBase { +public: + explicit PtrCache(OnClearCb clearcb = nullptr) : CacheBase(false, clearcb) { + static_assert(std::is_pointer::value, "T must be a pointer"); + } + ~PtrCache() {} + + /// Get cached pointer for PC + T FindPtr(u32 pc) { return reinterpret_cast(GetPtr(pc)); } + + /// Get reference of pointer for PC + T& GetNewPtr(u32 pc) { return reinterpret_cast(CacheBase::GetNewPtr(pc)); } +}; + +/// Index based cache +template +class Cache final : public CacheBase { +public: + explicit Cache(OnClearCb clearcb = nullptr) : CacheBase(true, clearcb) { + id_to_pointer = [this](u32 id) -> u8* { + return reinterpret_cast(&blocks[id]); + }; + pointer_to_id = [this](u8* ptr) -> u32 { + return static_cast(reinterpret_cast(ptr) - &blocks[0]); + }; + } + ~Cache() {} + + /// Get block cached for PC + T* FindBlock(u32 pc) { return reinterpret_cast(GetPtr(pc)); } + + /// Allocate block for PC + T& GetNewBlock(u32 pc) { return *reinterpret_cast(GetNewPtr(pc)); } + +private: + std::array blocks; +}; + +class CacheManager { +public: + CacheManager() {} + ~CacheManager() {} + + /// Loaders call these when mapping/unmapping code + void RegisterCode(u32 address, u32 size) const; + void UnregisterCode(u32 address, u32 size = 1) const; + + /// Clear every cache + void ClearCache() const; + +private: + std::list caches; + +public: + void RegisterCache(CacheBase* cache); + void UnregisterCache(CacheBase* cache); +}; + +extern CacheManager g_cachemanager; + +} diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp index 647784208..99cf5dfee 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp @@ -14,6 +14,7 @@ #include "core/memory.h" #include "core/hle/svc.h" +#include "core/arm/cache/cache.h" #include "core/arm/disassembler/arm_disasm.h" #include "core/arm/dyncom/arm_dyncom_dec.h" #include "core/arm/dyncom/arm_dyncom_interpreter.h" @@ -1144,6 +1145,8 @@ static inline void *AllocBuffer(unsigned int size) { return (void *)&inst_buf[start]; } +Cache::PtrCache instr_cache([]() {top = 0;}); + static shtop_fp_t get_shtop(unsigned int inst) { if (BIT(inst, 25)) { return DPO(Immediate); @@ -3495,7 +3498,7 @@ static unsigned int InterpreterTranslateInstruction(const ARMul_State* cpu, cons return inst_size; } -static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) { +static int InterpreterTranslateBlock(ARMul_State* cpu, u32 addr) { Common::Profiling::ScopeTimer timer_decode(profile_decode); MICROPROFILE_SCOPE(DynCom_Decode); @@ -3506,7 +3509,6 @@ static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) ARM_INST_PTR inst_base = nullptr; int ret = NON_BRANCH; int size = 0; // instruction size of basic block - bb_start = top; u32 phys_addr = addr; u32 pc_start = cpu->Reg[15]; @@ -3524,17 +3526,14 @@ static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) ret = inst_base->br; }; - cpu->instruction_cache[pc_start] = bb_start; - return KEEP_GOING; } -static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) { +static int InterpreterTranslateSingle(ARMul_State* cpu, u32 addr) { Common::Profiling::ScopeTimer timer_decode(profile_decode); MICROPROFILE_SCOPE(DynCom_Decode); ARM_INST_PTR inst_base = nullptr; - bb_start = top; u32 phys_addr = addr; u32 pc_start = cpu->Reg[15]; @@ -3545,8 +3544,6 @@ static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) inst_base->br = SINGLE_STEP; } - cpu->instruction_cache[pc_start] = bb_start; - return KEEP_GOING; } @@ -3589,7 +3586,7 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) { #define SHIFTER_OPERAND inst_cream->shtop_func(cpu, inst_cream->shifter_operand) #define FETCH_INST if (inst_base->br != NON_BRANCH) goto DISPATCH; \ - inst_base = (arm_inst *)&inst_buf[ptr] + inst_base = reinterpret_cast(ptr) #define INC_PC(l) ptr += sizeof(arm_inst) + l #define INC_PC_STUB ptr += sizeof(arm_inst) @@ -3879,7 +3876,7 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) { unsigned int addr; unsigned int num_instrs = 0; - int ptr; + u8* ptr; LOAD_NZCVT; DISPATCH: @@ -3895,16 +3892,21 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) { else cpu->Reg[15] &= 0xfffffffc; + //clear cache if we dont have more than 10kb of buffer remaining + if ((top + (10 * 1024)) >= CACHE_BUFFER_SIZE) instr_cache.Clear(); + // Find the cached instruction cream, otherwise translate it... - auto itr = cpu->instruction_cache.find(cpu->Reg[15]); - if (itr != cpu->instruction_cache.end()) { - ptr = itr->second; - } else if (cpu->NumInstrsToExecute != 1) { - if (InterpreterTranslateBlock(cpu, ptr, cpu->Reg[15]) == FETCH_EXCEPTION) - goto END; - } else { - if (InterpreterTranslateSingle(cpu, ptr, cpu->Reg[15]) == FETCH_EXCEPTION) - goto END; + ptr = instr_cache.FindPtr(cpu->Reg[15]); + if (ptr == nullptr) { + ptr = instr_cache.GetNewPtr(cpu->Reg[15]) = reinterpret_cast(&inst_buf[top]); + if (cpu->NumInstrsToExecute != 1) { + if (InterpreterTranslateBlock(cpu, cpu->Reg[15]) == FETCH_EXCEPTION) + goto END; + } + else { + if (InterpreterTranslateSingle(cpu, cpu->Reg[15]) == FETCH_EXCEPTION) + goto END; + } } // Find breakpoint if one exists within the block @@ -3912,7 +3914,7 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) { breakpoint_data = GDBStub::GetNextBreakpointFromAddress(cpu->Reg[15], GDBStub::BreakpointType::Execute); } - inst_base = (arm_inst *)&inst_buf[ptr]; + inst_base = reinterpret_cast(ptr); GOTO_NEXT_INST; } ADC_INST: diff --git a/src/core/arm/skyeye_common/armstate.h b/src/core/arm/skyeye_common/armstate.h index d42ff2669..80ca48864 100644 --- a/src/core/arm/skyeye_common/armstate.h +++ b/src/core/arm/skyeye_common/armstate.h @@ -236,10 +236,6 @@ public: unsigned bigendSig; unsigned syscallSig; - // TODO(bunnei): Move this cache to a better place - it should be per codeset (likely per - // process for our purposes), not per ARMul_State (which tracks CPU core state). - std::unordered_map instruction_cache; - private: void ResetMPCoreCP15Registers(); diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 0546f6e16..1e1e459d9 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -8,6 +8,7 @@ #include "common/common_funcs.h" #include "common/logging/log.h" +#include "core/arm/cache/cache.h" #include "core/hle/kernel/memory.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" @@ -120,6 +121,10 @@ void Process::Run(s32 main_thread_priority, u32 stack_size) { MapSegment(codeset->rodata, VMAPermission::Read, MemoryState::Code); MapSegment(codeset->data, VMAPermission::ReadWrite, MemoryState::Private); + // Map cache + Cache::g_cachemanager.UnregisterCode(0, 0xFFFFFFFF); + Cache::g_cachemanager.RegisterCode(codeset->code.addr, codeset->code.size); + // Allocate and map stack vm_manager.MapMemoryBlock(Memory::HEAP_VADDR_END - stack_size, std::make_shared>(stack_size, 0), 0, stack_size, MemoryState::Locked