diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 907ebd162..4ba366004 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -44,12 +44,6 @@ static const u32 expand_bits_to_bytes[] = { 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff, }; -constexpr u32 VERTEX_CACHE_MAX_SIZE = (256 * 256); // 16-bit indices max -constexpr u32 VERTEX_CACHE_MIN_SIZE = 256; // 8-bit indices max - -static bool vertex_cache_ids[VERTEX_CACHE_MAX_SIZE] = {false}; // 64 Kb index cache -static std::array vertex_cache; // 8MB Cache - MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240)); static void WritePicaReg(u32 id, u32 value, u32 mask) { @@ -221,6 +215,30 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { const u16* index_address_16 = reinterpret_cast(index_address_8); bool index_u16 = index_info.format != 0; + constexpr size_t VERTEX_CACHE_MAX_SIZE = (256 * 256); // 16-bit indices max + + static std::array vertex_cache_ids = {}; // 64 Kb index cache + static std::array vertex_cache; // 8MB Cache + if (is_indexed) { + u32 min_index; + u32 max_index; + const size_t last_index = regs.num_vertices; + // TODO: is there any easier way to get the min/max indices? + // NOTE: also regs.num_vertices is actualy the number of indices + // in the element being rendered. + if (index_u16) { + auto result = std::minmax_element(index_address_16, index_address_16 + last_index); + min_index = *result.first; + max_index = *result.second; + } else { + auto result = std::minmax_element(index_address_8, index_address_8 + last_index); + min_index = *result.first; + max_index = *result.second; + } + std::fill(vertex_cache_ids.begin() + min_index, + vertex_cache_ids.begin() + max_index + 1, false); + } + PrimitiveAssembler& primitive_assembler = g_state.primitive_assembler; if (g_debug_context && g_debug_context->recorder) { @@ -240,11 +258,6 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { DebugUtils::MemoryAccessTracker memory_accesses; Shader::OutputVertex output_vertex; - if (is_indexed) { - const u32 num_indices = index_u16 ? VERTEX_CACHE_MAX_SIZE : VERTEX_CACHE_MIN_SIZE; - std::memset(vertex_cache_ids, false, sizeof(bool) * num_indices); - } - Shader::UnitState shader_unit; g_state.vs.Setup(); @@ -276,7 +289,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { } } - // if the vertex wasn'tcached, then we pass it to the shader, process + // if the vertex wasn't cached, then we pass it to the shader, process // it and cache it. if (!vertex_cache_hit) { // Initialize data for the current vertex diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f0b0f2b3d..1c36423e5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -53,7 +53,10 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { state.draw.uniform_buffer = uniform_buffer.handle; state.Apply(); - vertex_batch.reserve(256 * 256); // 8MB for worst case scenario + // reserve 8MB for worst case scenario. + // this will avoid vector resizing as we send vertices. + constexpr size_t VERTEX_ARRAY_MAX_SIZE = 256 * 256; + vertex_batch.reserve(VERTEX_ARRAY_MAX_SIZE); // Bind the UBO to binding point 0 glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle);