Improved Vertex Caching

2017-01-20 18:48:43 -05:00
parent ebe0150635
commit 26028969f8
2 changed files with 23 additions and 17 deletions
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -44,6 +44,12 @@ static const u32 expand_bits_to_bytes[] = {
    0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff,
 };

+constexpr u32 VERTEX_CACHE_MAX_SIZE = (256 * 256);          // 16-bit indices max
+constexpr u32 VERTEX_CACHE_MIN_SIZE = 256;                  // 8-bit indices max
+
+static bool vertex_cache_ids[VERTEX_CACHE_MAX_SIZE] = {false};               // 64 Kb index cache
+static std::array<Shader::OutputVertex, VERTEX_CACHE_MAX_SIZE> vertex_cache; // 8MB Cache
+
 MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240));

 static void WritePicaReg(u32 id, u32 value, u32 mask) {
@@ -232,16 +238,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
        }

        DebugUtils::MemoryAccessTracker memory_accesses;
-
-        // Simple circular-replacement vertex cache
-        // The size has been tuned for optimal balance between hit-rate and the cost of lookup
-        const size_t VERTEX_CACHE_SIZE = 32;
-        std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
-        std::array<Shader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache;
        Shader::OutputVertex output_vertex;

-        unsigned int vertex_cache_pos = 0;
-        vertex_cache_ids.fill(-1);
+        if (is_indexed) {
+            const u32 num_indices = index_u16 ? VERTEX_CACHE_MAX_SIZE : VERTEX_CACHE_MIN_SIZE;
+            std::memset(vertex_cache_ids, false, sizeof(bool) * num_indices);
+        }

        Shader::UnitState shader_unit;
        g_state.vs.Setup();
@@ -256,6 +258,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
            // the PICA supports it, and it would mess up the caching, guard against it here.
            ASSERT(vertex != -1);

+            // if the index was already in the cache, we set this to true
            bool vertex_cache_hit = false;

            if (is_indexed) {
@@ -265,15 +268,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
                                              size);
                }

-                for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
-                    if (vertex == vertex_cache_ids[i]) {
-                        output_vertex = vertex_cache[i];
+                // we check if we have already processed the vertex.
+                // in that case, we use the cached vertex
+                if (vertex_cache_ids[vertex]) {
+                    output_vertex = vertex_cache[vertex];
                    vertex_cache_hit = true;
-                        break;
-                    }
                }
            }

+            // if the vertex wasn'tcached, then we pass it to the shader, process
+            // it and cache it.
            if (!vertex_cache_hit) {
                // Initialize data for the current vertex
                Shader::InputVertex input;
@@ -288,10 +292,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
                // Retrieve vertex from register data
                output_vertex = shader_unit.output_registers.ToVertex(regs.vs);

+                // we cache the vertex so we don't have to recompute it again.
                if (is_indexed) {
-                    vertex_cache[vertex_cache_pos] = output_vertex;
-                    vertex_cache_ids[vertex_cache_pos] = vertex;
-                    vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
+                    vertex_cache[vertex] = output_vertex;
+                    vertex_cache_ids[vertex] = true;
                }
            }

--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -53,6 +53,8 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
    state.draw.uniform_buffer = uniform_buffer.handle;
    state.Apply();

+    vertex_batch.reserve(256 * 256); // 8MB for worst case scenario
+
    // Bind the UBO to binding point 0
    glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle);