From 26028969f83e1f83686dab2b427df1c72f554da3 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 20 Jan 2017 18:48:43 -0500 Subject: [PATCH] Improved Vertex Caching --- src/video_core/command_processor.cpp | 38 ++++++++++--------- .../renderer_opengl/gl_rasterizer.cpp | 2 + 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index ea58e9f54..907ebd162 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -44,6 +44,12 @@ static const u32 expand_bits_to_bytes[] = { 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff, }; +constexpr u32 VERTEX_CACHE_MAX_SIZE = (256 * 256); // 16-bit indices max +constexpr u32 VERTEX_CACHE_MIN_SIZE = 256; // 8-bit indices max + +static bool vertex_cache_ids[VERTEX_CACHE_MAX_SIZE] = {false}; // 64 Kb index cache +static std::array vertex_cache; // 8MB Cache + MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240)); static void WritePicaReg(u32 id, u32 value, u32 mask) { @@ -232,16 +238,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { } DebugUtils::MemoryAccessTracker memory_accesses; - - // Simple circular-replacement vertex cache - // The size has been tuned for optimal balance between hit-rate and the cost of lookup - const size_t VERTEX_CACHE_SIZE = 32; - std::array vertex_cache_ids; - std::array vertex_cache; Shader::OutputVertex output_vertex; - unsigned int vertex_cache_pos = 0; - vertex_cache_ids.fill(-1); + if (is_indexed) { + const u32 num_indices = index_u16 ? VERTEX_CACHE_MAX_SIZE : VERTEX_CACHE_MIN_SIZE; + std::memset(vertex_cache_ids, false, sizeof(bool) * num_indices); + } Shader::UnitState shader_unit; g_state.vs.Setup(); @@ -256,6 +258,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // the PICA supports it, and it would mess up the caching, guard against it here. ASSERT(vertex != -1); + // if the index was already in the cache, we set this to true bool vertex_cache_hit = false; if (is_indexed) { @@ -265,15 +268,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { size); } - for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { - if (vertex == vertex_cache_ids[i]) { - output_vertex = vertex_cache[i]; - vertex_cache_hit = true; - break; - } + // we check if we have already processed the vertex. + // in that case, we use the cached vertex + if (vertex_cache_ids[vertex]) { + output_vertex = vertex_cache[vertex]; + vertex_cache_hit = true; } } + // if the vertex wasn'tcached, then we pass it to the shader, process + // it and cache it. if (!vertex_cache_hit) { // Initialize data for the current vertex Shader::InputVertex input; @@ -288,10 +292,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // Retrieve vertex from register data output_vertex = shader_unit.output_registers.ToVertex(regs.vs); + // we cache the vertex so we don't have to recompute it again. if (is_indexed) { - vertex_cache[vertex_cache_pos] = output_vertex; - vertex_cache_ids[vertex_cache_pos] = vertex; - vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; + vertex_cache[vertex] = output_vertex; + vertex_cache_ids[vertex] = true; } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 5a306a5c8..f0b0f2b3d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -53,6 +53,8 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { state.draw.uniform_buffer = uniform_buffer.handle; state.Apply(); + vertex_batch.reserve(256 * 256); // 8MB for worst case scenario + // Bind the UBO to binding point 0 glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle);