diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d55b84ce0..e41c08a4e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -44,6 +44,7 @@ set(HEADERS shader/shader_interpreter.h swrasterizer.h utils.h + vertex_cache.h vertex_loader.h video_core.h ) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index eb79974a8..ca1f4cc89 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -23,6 +23,7 @@ #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" #include "video_core/shader/shader.h" +#include "video_core/vertex_cache.h" #include "video_core/vertex_loader.h" #include "video_core/video_core.h" @@ -235,16 +236,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { DebugUtils::MemoryAccessTracker memory_accesses; - // Simple circular-replacement vertex cache - // The size has been tuned for optimal balance between hit-rate and the cost of lookup - const size_t VERTEX_CACHE_SIZE = 32; - std::array vertex_cache_ids; - std::array vertex_cache; Shader::OutputVertex output_vertex; - unsigned int vertex_cache_pos = 0; - vertex_cache_ids.fill(-1); - auto* shader_engine = Shader::GetEngine(); Shader::UnitState shader_unit; @@ -260,25 +253,15 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // the PICA supports it, and it would mess up the caching, guard against it here. ASSERT(vertex != -1); - bool vertex_cache_hit = false; - if (is_indexed) { if (g_debug_context && Pica::g_debug_context->recorder) { int size = index_u16 ? 2 : 1; memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); } - - for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { - if (vertex == vertex_cache_ids[i]) { - output_vertex = vertex_cache[i]; - vertex_cache_hit = true; - break; - } - } } - if (!vertex_cache_hit) { + if (!VertexCache::contains(vertex)) { // Initialize data for the current vertex Shader::InputVertex input; loader.LoadVertex(base_address, index, vertex, input, memory_accesses); @@ -294,12 +277,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { output_vertex = Shader::OutputVertex::FromRegisters(shader_unit.registers.output, regs, regs.vs.output_mask); - if (is_indexed) { - vertex_cache[vertex_cache_pos] = output_vertex; - vertex_cache_ids[vertex_cache_pos] = vertex; - vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; - } - } + if (is_indexed) + VertexCache::store(vertex, output_vertex); + } else + output_vertex = VertexCache::obtain(vertex); // Send to renderer using Pica::Shader::OutputVertex; @@ -316,6 +297,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { range.second, range.first); } + if (is_indexed) + VertexCache::clear(); break; } diff --git a/src/video_core/vertex_cache.h b/src/video_core/vertex_cache.h new file mode 100644 index 000000000..13f194ffb --- /dev/null +++ b/src/video_core/vertex_cache.h @@ -0,0 +1,81 @@ + +#pragma once + +#include +#include +#include "common/common_types.h" +#include "video_core/shader/shader.h" + +// This little module emulates PICA's vertex shader's post transform vertex cache. +namespace VertexCache { + +namespace { +constexpr size_t cache_max_size = 256 * 256; +// testing has proven that this size produces few misses in most games. +// in total the cache size is 64Kb +constexpr size_t cache_size = 512; + +// this is used to find if a vertex is in the cache +static std::array index_table = {}; + +#ifdef _DEBUG +static std::array seen_table = {}; +u32 cache_misses = 0; + +// The number of misses we can tolerate. +const u32 tolerated_misses = 16; +#endif + +// le cache +static std::array cache; + +// stores positions that are cached; +static std::array remapper = {}; + +// used for clearing the cache; +u32 min_index = cache_size; +u32 max_index = 0; +} // Anonymous namespace + +inline bool contains(u32 position) { +#ifdef _DEBUG + if (seen_table[position] != index_table[position]) + cache_misses++; +#endif + return index_table[position]; +} + +inline Pica::Shader::OutputVertex& obtain(u32 position) { + return cache[position % cache_size]; +} + +inline void store(u32 position, Pica::Shader::OutputVertex& vertex) { + const u32 remap_index = position % cache_size; + index_table[remapper[remap_index]] = false; + remapper[remap_index] = position; + index_table[position] = true; + cache[remap_index] = vertex; + max_index = std::max(max_index, remap_index); + min_index = std::min(min_index, remap_index); +#ifdef _DEBUG + seen_table[position] = true; +#endif +} + +void clear() { + max_index++; + auto result = std::minmax_element(remapper.begin() + min_index, remapper.begin() + max_index); + const u32 min = *result.first; + const u32 max = *result.second; + std::fill(index_table.begin() + min, index_table.begin() + max + 1, 0); + max_index = 0; + min_index = cache_size; +#ifdef _DEBUG + seen_table.fill(0); + if (cache_misses > tolerated_misses) + LOG_TRACE(HW_GPU, "The vertex cache had %d misses", cache_misses); + cache_misses = 0; +#endif +} + +} // VertexCache