diff --git a/src/citra_qt/debugger/graphics_breakpoints.cpp b/src/citra_qt/debugger/graphics_breakpoints.cpp index fe66918a8..b92a55c0a 100644 --- a/src/citra_qt/debugger/graphics_breakpoints.cpp +++ b/src/citra_qt/debugger/graphics_breakpoints.cpp @@ -45,6 +45,7 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const { Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") }, { Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") }, { Pica::DebugContext::Event::VertexShaderInvocation, tr("Vertex shader invocation") }, + { Pica::DebugContext::Event::GeometryShaderInvocation, tr("Geometry shader invocation") }, { Pica::DebugContext::Event::IncomingDisplayTransfer, tr("Incoming display transfer") }, { Pica::DebugContext::Event::GSPCommandProcessed, tr("GSP command processed") }, { Pica::DebugContext::Event::BufferSwapped, tr("Buffers swapped") } diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 5b920eb28..2e3def7a9 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -139,7 +139,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { if (immediate_attribute_id >= regs.vs.num_input_attributes+1) { immediate_attribute_id = 0; - Shader::UnitState shader_unit; + auto& shader_unit = Shader::GetShaderUnit(false); g_state.vs.Setup(); // Send to vertex shader @@ -232,9 +232,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { unsigned int vertex_cache_pos = 0; vertex_cache_ids.fill(-1); - Shader::UnitState shader_unit; + auto& vs_shader_unit = Shader::GetShaderUnit(false); g_state.vs.Setup(); + auto& gs_unit_state = Shader::GetShaderUnit(true); + g_state.gs.Setup(); + for (unsigned int index = 0; index < regs.num_vertices; ++index) { // Indexed rendering doesn't use the start offset @@ -270,8 +273,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // Send to vertex shader if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); - g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes(), regs.vs); - output_registers = shader_unit.output_registers; + g_state.vs.Run(vs_shader_unit, input, loader.GetNumTotalAttributes(), regs.vs); + output_registers = vs_shader_unit.output_registers; if (is_indexed) { vertex_cache[vertex_cache_pos] = output_registers; @@ -280,17 +283,56 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { } } - // Retreive vertex from register data - Shader::OutputVertex output_vertex = output_registers.ToVertex(regs.vs); - - // Send to renderer + // Helper to send triangle to renderer using Pica::Shader::OutputVertex; auto AddTriangle = []( const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) { VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); }; - primitive_assembler.SubmitVertex(output_vertex, AddTriangle); + if (Shader::UseGS()) { + + auto& regs = g_state.regs; + auto& gs_regs = g_state.regs.gs; + auto& gs_buf = g_state.gs_input_buffer; + + // Vertex Shader Outputs are converted into Geometry Shader inputs by filling up a buffer + // For example, if we have a geoshader that takes 6 inputs, and the vertex shader outputs 2 attributes + // It would take 3 vertices to fill up the Geometry Shader buffer + unsigned int gs_input_count = gs_regs.num_input_attributes + 1; + unsigned int vs_output_count = regs.vs_outmap_total2 + 1; + ASSERT_MSG(regs.vs_outmap_total1 == regs.vs_outmap_total2, "VS_OUTMAP_TOTAL1 and VS_OUTMAP_TOTAL2 don't match!"); + // copy into the geoshader buffer + for (unsigned int i = 0; i < vs_output_count; i++) { + if (gs_buf.index >= gs_input_count) { + // TODO(ds84182): LOG_ERROR() + ASSERT_MSG(false, "Number of GS inputs (%d) is not divisible by number of VS outputs (%d)", + gs_input_count, vs_output_count); + continue; + } + gs_buf.buffer.attr[gs_buf.index++] = output_registers.value[i]; + } + + if (gs_buf.index >= gs_input_count) { + + // b15 will be false when a new primitive starts and then switch to true at some point + //TODO: Test how this works exactly on hardware + g_state.gs.uniforms.b[15] |= (index > 0); + + // Process Geometry Shader + if (g_debug_context) + g_debug_context->OnEvent(DebugContext::Event::GeometryShaderInvocation, static_cast(&gs_buf.buffer)); + gs_unit_state.emit_triangle_callback = AddTriangle; + g_state.gs.Run(gs_unit_state, gs_buf.buffer, gs_input_count, regs.gs); + gs_unit_state.emit_triangle_callback = nullptr; + + gs_buf.index = 0; + } + } else { + Shader::OutputVertex output_vertex = output_registers.ToVertex(regs.vs); + primitive_assembler.SubmitVertex(output_vertex, AddTriangle); + } + } for (auto& range : memory_accesses.ranges) { diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 92e9734ae..a79143cbb 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -41,6 +41,7 @@ public: IncomingPrimitiveBatch, FinishedPrimitiveBatch, VertexShaderInvocation, + GeometryShaderInvocation, IncomingDisplayTransfer, GSPCommandProcessed, BufferSwapped, diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index f83b91f7d..923dbf539 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h @@ -22,6 +22,8 @@ struct State { /// Pica registers Regs regs; + Shader::UnitState shader_units[4]; + Shader::ShaderSetup vs; Shader::ShaderSetup gs; diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index 68ea3c08a..df79b1925 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp @@ -19,7 +19,6 @@ template void PrimitiveAssembler::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler) { switch (topology) { - // TODO: Figure out what's different with TriangleTopology::Shader. case Regs::TriangleTopology::List: case Regs::TriangleTopology::Shader: if (buffer_index < 2) { diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 1423ba2f5..8e2651f8e 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -167,6 +167,28 @@ bool SharedGS() { return g_state.regs.vs_com_mode == Pica::Regs::VSComMode::Shared; } +bool UseGS() { + // TODO(ds84182): This would be more accurate if it looked at induvidual shader units for the geoshader bit + // gs_regs.input_buffer_config.use_geometry_shader == 0x08 + ASSERT((g_state.regs.using_geometry_shader == 0) || (g_state.regs.using_geometry_shader == 2)); + return g_state.regs.using_geometry_shader == 2; +} + +UnitState& GetShaderUnit(bool gs) { + + // GS are always run on shader unit 3 + if (gs) { + return g_state.shader_units[3]; + } + + // The worst scheduler you'll ever see! + //TODO: How does PICA shader scheduling work? + static unsigned shader_unit_scheduler = 0; + shader_unit_scheduler++; + shader_unit_scheduler %= 3; // TODO: When does it also allow use of unit 3?! + return g_state.shader_units[shader_unit_scheduler]; +} + void WriteUniformBoolReg(bool gs, u32 value) { auto& setup = gs ? g_state.gs : g_state.vs; diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index cb1eda1d2..3af8d4ebf 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -408,6 +408,8 @@ struct ShaderSetup { }; bool SharedGS(); +bool UseGS(); +UnitState& GetShaderUnit(bool gs); void WriteUniformBoolReg(bool gs, u32 value); void WriteUniformIntReg(bool gs, unsigned index, const Math::Vec4& values); void WriteUniformFloatSetupReg(bool gs, u32 value);