Merge pull request #2865 from wwylele/gs++

PICA: implemented geometry shader
This commit is contained in:
bunnei 2017-09-07 23:02:59 -04:00 committed by GitHub
commit 11baa40d75
15 changed files with 594 additions and 37 deletions

View File

@ -1,6 +1,7 @@
set(SRCS set(SRCS
command_processor.cpp command_processor.cpp
debug_utils/debug_utils.cpp debug_utils/debug_utils.cpp
geometry_pipeline.cpp
pica.cpp pica.cpp
primitive_assembly.cpp primitive_assembly.cpp
regs.cpp regs.cpp
@ -29,6 +30,7 @@ set(SRCS
set(HEADERS set(HEADERS
command_processor.h command_processor.h
debug_utils/debug_utils.h debug_utils/debug_utils.h
geometry_pipeline.h
gpu_debugger.h gpu_debugger.h
pica.h pica.h
pica_state.h pica_state.h

View File

@ -161,6 +161,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index): case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index):
g_state.immediate.current_attribute = 0; g_state.immediate.current_attribute = 0;
g_state.immediate.reset_geometry_pipeline = true;
default_attr_counter = 0; default_attr_counter = 0;
break; break;
@ -234,16 +235,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
shader_engine->Run(g_state.vs, shader_unit); shader_engine->Run(g_state.vs, shader_unit);
shader_unit.WriteOutput(regs.vs, output); shader_unit.WriteOutput(regs.vs, output);
// Send to renderer // Send to geometry pipeline
using Pica::Shader::OutputVertex; if (g_state.immediate.reset_geometry_pipeline) {
auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, g_state.geometry_pipeline.Reconfigure();
const OutputVertex& v2) { g_state.immediate.reset_geometry_pipeline = false;
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); }
}; ASSERT(!g_state.geometry_pipeline.NeedIndexInput());
g_state.geometry_pipeline.Setup(shader_engine);
g_state.primitive_assembler.SubmitVertex( g_state.geometry_pipeline.SubmitVertex(output);
Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output),
AddTriangle);
} }
} }
} }
@ -321,8 +320,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
// The size has been tuned for optimal balance between hit-rate and the cost of lookup // The size has been tuned for optimal balance between hit-rate and the cost of lookup
const size_t VERTEX_CACHE_SIZE = 32; const size_t VERTEX_CACHE_SIZE = 32;
std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
std::array<Shader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache; std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache;
Shader::OutputVertex output_vertex; Shader::AttributeBuffer vs_output;
unsigned int vertex_cache_pos = 0; unsigned int vertex_cache_pos = 0;
vertex_cache_ids.fill(-1); vertex_cache_ids.fill(-1);
@ -332,6 +331,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
g_state.geometry_pipeline.Reconfigure();
g_state.geometry_pipeline.Setup(shader_engine);
if (g_state.geometry_pipeline.NeedIndexInput())
ASSERT(is_indexed);
for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) {
// Indexed rendering doesn't use the start offset // Indexed rendering doesn't use the start offset
unsigned int vertex = unsigned int vertex =
@ -345,6 +349,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
bool vertex_cache_hit = false; bool vertex_cache_hit = false;
if (is_indexed) { if (is_indexed) {
if (g_state.geometry_pipeline.NeedIndexInput()) {
g_state.geometry_pipeline.SubmitIndex(vertex);
continue;
}
if (g_debug_context && Pica::g_debug_context->recorder) { if (g_debug_context && Pica::g_debug_context->recorder) {
int size = index_u16 ? 2 : 1; int size = index_u16 ? 2 : 1;
memory_accesses.AddAccess(base_address + index_info.offset + size * index, memory_accesses.AddAccess(base_address + index_info.offset + size * index,
@ -353,7 +362,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
if (vertex == vertex_cache_ids[i]) { if (vertex == vertex_cache_ids[i]) {
output_vertex = vertex_cache[i]; vs_output = vertex_cache[i];
vertex_cache_hit = true; vertex_cache_hit = true;
break; break;
} }
@ -362,7 +371,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
if (!vertex_cache_hit) { if (!vertex_cache_hit) {
// Initialize data for the current vertex // Initialize data for the current vertex
Shader::AttributeBuffer input, output{}; Shader::AttributeBuffer input;
loader.LoadVertex(base_address, index, vertex, input, memory_accesses); loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
// Send to vertex shader // Send to vertex shader
@ -371,26 +380,17 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
(void*)&input); (void*)&input);
shader_unit.LoadInput(regs.vs, input); shader_unit.LoadInput(regs.vs, input);
shader_engine->Run(g_state.vs, shader_unit); shader_engine->Run(g_state.vs, shader_unit);
shader_unit.WriteOutput(regs.vs, output); shader_unit.WriteOutput(regs.vs, vs_output);
// Retrieve vertex from register data
output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output);
if (is_indexed) { if (is_indexed) {
vertex_cache[vertex_cache_pos] = output_vertex; vertex_cache[vertex_cache_pos] = vs_output;
vertex_cache_ids[vertex_cache_pos] = vertex; vertex_cache_ids[vertex_cache_pos] = vertex;
vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
} }
} }
// Send to renderer // Send to geometry pipeline
using Pica::Shader::OutputVertex; g_state.geometry_pipeline.SubmitVertex(vs_output);
auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1,
const OutputVertex& v2) {
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
};
primitive_assembler.SubmitVertex(output_vertex, AddTriangle);
} }
for (auto& range : memory_accesses.ranges) { for (auto& range : memory_accesses.ranges) {

View File

@ -0,0 +1,274 @@
// Copyright 2017 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/geometry_pipeline.h"
#include "video_core/pica_state.h"
#include "video_core/regs.h"
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
namespace Pica {
/// An attribute buffering interface for different pipeline modes
class GeometryPipelineBackend {
public:
virtual ~GeometryPipelineBackend() = default;
/// Checks if there is no incomplete data transfer
virtual bool IsEmpty() const = 0;
/// Checks if the pipeline needs a direct input from index buffer
virtual bool NeedIndexInput() const = 0;
/// Submits an index from index buffer
virtual void SubmitIndex(unsigned int val) = 0;
/**
* Submits vertex attributes
* @param input attributes of a vertex output from vertex shader
* @return if the buffer is full and the geometry shader should be invoked
*/
virtual bool SubmitVertex(const Shader::AttributeBuffer& input) = 0;
};
// In the Point mode, vertex attributes are sent to the input registers in the geometry shader unit.
// The size of vertex shader outputs and geometry shader inputs are constants. Geometry shader is
// invoked upon inputs buffer filled up by vertex shader outputs. For example, if we have a geometry
// shader that takes 6 inputs, and the vertex shader outputs 2 attributes, it would take 3 vertices
// for one geometry shader invocation.
// TODO: what happens when the input size is not divisible by the output size?
class GeometryPipeline_Point : public GeometryPipelineBackend {
public:
GeometryPipeline_Point(const Regs& regs, Shader::GSUnitState& unit) : regs(regs), unit(unit) {
ASSERT(regs.pipeline.variable_primitive == 0);
ASSERT(regs.gs.input_to_uniform == 0);
vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1;
size_t gs_input_num = regs.gs.max_input_attribute_index + 1;
ASSERT(gs_input_num % vs_output_num == 0);
buffer_cur = attribute_buffer.attr;
buffer_end = attribute_buffer.attr + gs_input_num;
}
bool IsEmpty() const override {
return buffer_cur == attribute_buffer.attr;
}
bool NeedIndexInput() const override {
return false;
}
void SubmitIndex(unsigned int val) override {
UNREACHABLE();
}
bool SubmitVertex(const Shader::AttributeBuffer& input) override {
buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur);
if (buffer_cur == buffer_end) {
buffer_cur = attribute_buffer.attr;
unit.LoadInput(regs.gs, attribute_buffer);
return true;
}
return false;
}
private:
const Regs& regs;
Shader::GSUnitState& unit;
Shader::AttributeBuffer attribute_buffer;
Math::Vec4<float24>* buffer_cur;
Math::Vec4<float24>* buffer_end;
unsigned int vs_output_num;
};
// In VariablePrimitive mode, vertex attributes are buffered into the uniform registers in the
// geometry shader unit. The number of vertex is variable, which is specified by the first index
// value in the batch. This mode is usually used for subdivision.
class GeometryPipeline_VariablePrimitive : public GeometryPipelineBackend {
public:
GeometryPipeline_VariablePrimitive(const Regs& regs, Shader::ShaderSetup& setup)
: regs(regs), setup(setup) {
ASSERT(regs.pipeline.variable_primitive == 1);
ASSERT(regs.gs.input_to_uniform == 1);
vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1;
}
bool IsEmpty() const override {
return need_index;
}
bool NeedIndexInput() const override {
return need_index;
}
void SubmitIndex(unsigned int val) override {
DEBUG_ASSERT(need_index);
// The number of vertex input is put to the uniform register
float24 vertex_num = float24::FromFloat32(val);
setup.uniforms.f[0] = Math::MakeVec(vertex_num, vertex_num, vertex_num, vertex_num);
// The second uniform register and so on are used for receiving input vertices
buffer_cur = setup.uniforms.f + 1;
main_vertex_num = regs.pipeline.variable_vertex_main_num_minus_1 + 1;
total_vertex_num = val;
need_index = false;
}
bool SubmitVertex(const Shader::AttributeBuffer& input) override {
DEBUG_ASSERT(!need_index);
if (main_vertex_num != 0) {
// For main vertices, receive all attributes
buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur);
--main_vertex_num;
} else {
// For other vertices, only receive the first attribute (usually the position)
*(buffer_cur++) = input.attr[0];
}
--total_vertex_num;
if (total_vertex_num == 0) {
need_index = true;
return true;
}
return false;
}
private:
bool need_index = true;
const Regs& regs;
Shader::ShaderSetup& setup;
unsigned int main_vertex_num;
unsigned int total_vertex_num;
Math::Vec4<float24>* buffer_cur;
unsigned int vs_output_num;
};
// In FixedPrimitive mode, vertex attributes are buffered into the uniform registers in the geometry
// shader unit. The number of vertex per shader invocation is constant. This is usually used for
// particle system.
class GeometryPipeline_FixedPrimitive : public GeometryPipelineBackend {
public:
GeometryPipeline_FixedPrimitive(const Regs& regs, Shader::ShaderSetup& setup)
: regs(regs), setup(setup) {
ASSERT(regs.pipeline.variable_primitive == 0);
ASSERT(regs.gs.input_to_uniform == 1);
vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1;
ASSERT(vs_output_num == regs.pipeline.gs_config.stride_minus_1 + 1);
size_t vertex_num = regs.pipeline.gs_config.fixed_vertex_num_minus_1 + 1;
buffer_cur = buffer_begin = setup.uniforms.f + regs.pipeline.gs_config.start_index;
buffer_end = buffer_begin + vs_output_num * vertex_num;
}
bool IsEmpty() const override {
return buffer_cur == buffer_begin;
}
bool NeedIndexInput() const override {
return false;
}
void SubmitIndex(unsigned int val) override {
UNREACHABLE();
}
bool SubmitVertex(const Shader::AttributeBuffer& input) override {
buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur);
if (buffer_cur == buffer_end) {
buffer_cur = buffer_begin;
return true;
}
return false;
}
private:
const Regs& regs;
Shader::ShaderSetup& setup;
Math::Vec4<float24>* buffer_begin;
Math::Vec4<float24>* buffer_cur;
Math::Vec4<float24>* buffer_end;
unsigned int vs_output_num;
};
GeometryPipeline::GeometryPipeline(State& state) : state(state) {}
GeometryPipeline::~GeometryPipeline() = default;
void GeometryPipeline::SetVertexHandler(Shader::VertexHandler vertex_handler) {
this->vertex_handler = vertex_handler;
}
void GeometryPipeline::Setup(Shader::ShaderEngine* shader_engine) {
if (!backend)
return;
this->shader_engine = shader_engine;
shader_engine->SetupBatch(state.gs, state.regs.gs.main_offset);
}
void GeometryPipeline::Reconfigure() {
ASSERT(!backend || backend->IsEmpty());
if (state.regs.pipeline.use_gs == PipelineRegs::UseGS::No) {
backend = nullptr;
return;
}
ASSERT(state.regs.pipeline.use_gs == PipelineRegs::UseGS::Yes);
// The following assumes that when geometry shader is in use, the shader unit 3 is configured as
// a geometry shader unit.
// TODO: what happens if this is not true?
ASSERT(state.regs.pipeline.gs_unit_exclusive_configuration == 1);
ASSERT(state.regs.gs.shader_mode == ShaderRegs::ShaderMode::GS);
state.gs_unit.ConfigOutput(state.regs.gs);
ASSERT(state.regs.pipeline.vs_outmap_total_minus_1_a ==
state.regs.pipeline.vs_outmap_total_minus_1_b);
switch (state.regs.pipeline.gs_config.mode) {
case PipelineRegs::GSMode::Point:
backend = std::make_unique<GeometryPipeline_Point>(state.regs, state.gs_unit);
break;
case PipelineRegs::GSMode::VariablePrimitive:
backend = std::make_unique<GeometryPipeline_VariablePrimitive>(state.regs, state.gs);
break;
case PipelineRegs::GSMode::FixedPrimitive:
backend = std::make_unique<GeometryPipeline_FixedPrimitive>(state.regs, state.gs);
break;
default:
UNREACHABLE();
}
}
bool GeometryPipeline::NeedIndexInput() const {
if (!backend)
return false;
return backend->NeedIndexInput();
}
void GeometryPipeline::SubmitIndex(unsigned int val) {
backend->SubmitIndex(val);
}
void GeometryPipeline::SubmitVertex(const Shader::AttributeBuffer& input) {
if (!backend) {
// No backend means the geometry shader is disabled, so we send the vertex shader output
// directly to the primitive assembler.
vertex_handler(input);
} else {
if (backend->SubmitVertex(input)) {
shader_engine->Run(state.gs, state.gs_unit);
// The uniform b15 is set to true after every geometry shader invocation. This is useful
// for the shader to know if this is the first invocation in a batch, if the program set
// b15 to false first.
state.gs.uniforms.b[15] = true;
}
}
}
} // namespace Pica

View File

@ -0,0 +1,49 @@
// Copyright 2017 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "video_core/shader/shader.h"
namespace Pica {
struct State;
class GeometryPipelineBackend;
/// A pipeline receiving from vertex shader and sending to geometry shader and primitive assembler
class GeometryPipeline {
public:
explicit GeometryPipeline(State& state);
~GeometryPipeline();
/// Sets the handler for receiving vertex outputs from vertex shader
void SetVertexHandler(Shader::VertexHandler vertex_handler);
/**
* Setup the geometry shader unit if it is in use
* @param shader_engine the shader engine for the geometry shader to run
*/
void Setup(Shader::ShaderEngine* shader_engine);
/// Reconfigures the pipeline according to current register settings
void Reconfigure();
/// Checks if the pipeline needs a direct input from index buffer
bool NeedIndexInput() const;
/// Submits an index from index buffer. Call this only when NeedIndexInput returns true
void SubmitIndex(unsigned int val);
/// Submits vertex attributes output from vertex shader
void SubmitVertex(const Shader::AttributeBuffer& input);
private:
Shader::VertexHandler vertex_handler;
Shader::ShaderEngine* shader_engine;
std::unique_ptr<GeometryPipelineBackend> backend;
State& state;
};
} // namespace Pica

View File

@ -3,9 +3,11 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <cstring> #include <cstring>
#include "video_core/geometry_pipeline.h"
#include "video_core/pica.h" #include "video_core/pica.h"
#include "video_core/pica_state.h" #include "video_core/pica_state.h"
#include "video_core/regs_pipeline.h" #include "video_core/renderer_base.h"
#include "video_core/video_core.h"
namespace Pica { namespace Pica {
@ -24,6 +26,23 @@ void Zero(T& o) {
memset(&o, 0, sizeof(o)); memset(&o, 0, sizeof(o));
} }
State::State() : geometry_pipeline(*this) {
auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) {
using Pica::Shader::OutputVertex;
auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1,
const OutputVertex& v2) {
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
};
primitive_assembler.SubmitVertex(
Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, vertex), AddTriangle);
};
auto SetWinding = [this]() { primitive_assembler.SetWinding(); };
g_state.gs_unit.SetVertexHandler(SubmitVertex, SetWinding);
g_state.geometry_pipeline.SetVertexHandler(SubmitVertex);
}
void State::Reset() { void State::Reset() {
Zero(regs); Zero(regs);
Zero(vs); Zero(vs);

View File

@ -8,6 +8,7 @@
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/vector_math.h" #include "common/vector_math.h"
#include "video_core/geometry_pipeline.h"
#include "video_core/primitive_assembly.h" #include "video_core/primitive_assembly.h"
#include "video_core/regs.h" #include "video_core/regs.h"
#include "video_core/shader/shader.h" #include "video_core/shader/shader.h"
@ -16,6 +17,7 @@ namespace Pica {
/// Struct used to describe current Pica state /// Struct used to describe current Pica state
struct State { struct State {
State();
void Reset(); void Reset();
/// Pica registers /// Pica registers
@ -137,8 +139,17 @@ struct State {
Shader::AttributeBuffer input_vertex; Shader::AttributeBuffer input_vertex;
// Index of the next attribute to be loaded into `input_vertex`. // Index of the next attribute to be loaded into `input_vertex`.
u32 current_attribute = 0; u32 current_attribute = 0;
// Indicates the immediate mode just started and the geometry pipeline needs to reconfigure
bool reset_geometry_pipeline = true;
} immediate; } immediate;
// the geometry shader needs to be kept in the global state because some shaders relie on
// preserved register value across shader invocation.
// TODO: also bring the three vertex shader units here and implement the shader scheduler.
Shader::GSUnitState gs_unit;
GeometryPipeline geometry_pipeline;
// This is constructed with a dummy triangle topology // This is constructed with a dummy triangle topology
PrimitiveAssembler<Shader::OutputVertex> primitive_assembler; PrimitiveAssembler<Shader::OutputVertex> primitive_assembler;
}; };

View File

@ -17,16 +17,19 @@ template <typename VertexType>
void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx, void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx,
TriangleHandler triangle_handler) { TriangleHandler triangle_handler) {
switch (topology) { switch (topology) {
// TODO: Figure out what's different with TriangleTopology::Shader.
case PipelineRegs::TriangleTopology::List: case PipelineRegs::TriangleTopology::List:
case PipelineRegs::TriangleTopology::Shader: case PipelineRegs::TriangleTopology::Shader:
if (buffer_index < 2) { if (buffer_index < 2) {
buffer[buffer_index++] = vtx; buffer[buffer_index++] = vtx;
} else { } else {
buffer_index = 0; buffer_index = 0;
if (topology == PipelineRegs::TriangleTopology::Shader && winding) {
triangle_handler(buffer[1], buffer[0], vtx);
winding = false;
} else {
triangle_handler(buffer[0], buffer[1], vtx); triangle_handler(buffer[0], buffer[1], vtx);
} }
}
break; break;
case PipelineRegs::TriangleTopology::Strip: case PipelineRegs::TriangleTopology::Strip:
@ -50,10 +53,16 @@ void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx,
} }
} }
template <typename VertexType>
void PrimitiveAssembler<VertexType>::SetWinding() {
winding = true;
}
template <typename VertexType> template <typename VertexType>
void PrimitiveAssembler<VertexType>::Reset() { void PrimitiveAssembler<VertexType>::Reset() {
buffer_index = 0; buffer_index = 0;
strip_ready = false; strip_ready = false;
winding = false;
} }
template <typename VertexType> template <typename VertexType>

View File

@ -29,6 +29,12 @@ struct PrimitiveAssembler {
*/ */
void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler); void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler);
/**
* Invert the vertex order of the next triangle. Called by geometry shader emitter.
* This only takes effect for TriangleTopology::Shader.
*/
void SetWinding();
/** /**
* Resets the internal state of the PrimitiveAssembler. * Resets the internal state of the PrimitiveAssembler.
*/ */
@ -45,6 +51,7 @@ private:
int buffer_index; int buffer_index;
VertexType buffer[2]; VertexType buffer[2];
bool strip_ready = false; bool strip_ready = false;
bool winding = false;
}; };
} // namespace } // namespace

View File

@ -147,7 +147,15 @@ struct PipelineRegs {
// Number of vertices to render // Number of vertices to render
u32 num_vertices; u32 num_vertices;
INSERT_PADDING_WORDS(0x1); enum class UseGS : u32 {
No = 0,
Yes = 2,
};
union {
BitField<0, 2, UseGS> use_gs;
BitField<31, 1, u32> variable_primitive;
};
// The index of the first vertex to render // The index of the first vertex to render
u32 vertex_offset; u32 vertex_offset;
@ -218,7 +226,29 @@ struct PipelineRegs {
GPUMode gpu_mode; GPUMode gpu_mode;
INSERT_PADDING_WORDS(0x18); INSERT_PADDING_WORDS(0x4);
BitField<0, 4, u32> vs_outmap_total_minus_1_a;
INSERT_PADDING_WORDS(0x6);
BitField<0, 4, u32> vs_outmap_total_minus_1_b;
enum class GSMode : u32 {
Point = 0,
VariablePrimitive = 1,
FixedPrimitive = 2,
};
union {
BitField<0, 8, GSMode> mode;
BitField<8, 4, u32> fixed_vertex_num_minus_1;
BitField<12, 4, u32> stride_minus_1;
BitField<16, 4, u32> start_index;
} gs_config;
INSERT_PADDING_WORDS(0x1);
u32 variable_vertex_main_num_minus_1;
INSERT_PADDING_WORDS(0x9);
enum class TriangleTopology : u32 { enum class TriangleTopology : u32 {
List = 0, List = 0,

View File

@ -24,9 +24,16 @@ struct ShaderRegs {
INSERT_PADDING_WORDS(0x4); INSERT_PADDING_WORDS(0x4);
enum ShaderMode {
GS = 0x08,
VS = 0xA0,
};
union { union {
// Number of input attributes to shader unit - 1 // Number of input attributes to shader unit - 1
BitField<0, 4, u32> max_input_attribute_index; BitField<0, 4, u32> max_input_attribute_index;
BitField<8, 8, u32> input_to_uniform;
BitField<24, 8, ShaderMode> shader_mode;
}; };
// Offset to shader program entry point (in words) // Offset to shader program entry point (in words)

View File

@ -21,7 +21,8 @@ namespace Pica {
namespace Shader { namespace Shader {
OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& input) { OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs,
const AttributeBuffer& input) {
// Setup output data // Setup output data
union { union {
OutputVertex ret{}; OutputVertex ret{};
@ -82,6 +83,44 @@ void UnitState::WriteOutput(const ShaderRegs& config, AttributeBuffer& output) {
} }
} }
UnitState::UnitState(GSEmitter* emitter) : emitter_ptr(emitter) {}
GSEmitter::GSEmitter() {
handlers = new Handlers;
}
GSEmitter::~GSEmitter() {
delete handlers;
}
void GSEmitter::Emit(Math::Vec4<float24> (&vertex)[16]) {
ASSERT(vertex_id < 3);
std::copy(std::begin(vertex), std::end(vertex), buffer[vertex_id].begin());
if (prim_emit) {
if (winding)
handlers->winding_setter();
for (size_t i = 0; i < buffer.size(); ++i) {
AttributeBuffer output;
unsigned int output_i = 0;
for (unsigned int reg : Common::BitSet<u32>(output_mask)) {
output.attr[output_i++] = buffer[i][reg];
}
handlers->vertex_handler(output);
}
}
}
GSUnitState::GSUnitState() : UnitState(&emitter) {}
void GSUnitState::SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter) {
emitter.handlers->vertex_handler = std::move(vertex_handler);
emitter.handlers->winding_setter = std::move(winding_setter);
}
void GSUnitState::ConfigOutput(const ShaderRegs& config) {
emitter.output_mask = config.output_mask;
}
MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
#ifdef ARCHITECTURE_x86_64 #ifdef ARCHITECTURE_x86_64

View File

@ -6,6 +6,7 @@
#include <array> #include <array>
#include <cstddef> #include <cstddef>
#include <functional>
#include <type_traits> #include <type_traits>
#include <nihstro/shader_bytecode.h> #include <nihstro/shader_bytecode.h>
#include "common/assert.h" #include "common/assert.h"
@ -31,6 +32,12 @@ struct AttributeBuffer {
alignas(16) Math::Vec4<float24> attr[16]; alignas(16) Math::Vec4<float24> attr[16];
}; };
/// Handler type for receiving vertex outputs from vertex shader or geometry shader
using VertexHandler = std::function<void(const AttributeBuffer&)>;
/// Handler type for signaling to invert the vertex order of the next triangle
using WindingSetter = std::function<void()>;
struct OutputVertex { struct OutputVertex {
Math::Vec4<float24> pos; Math::Vec4<float24> pos;
Math::Vec4<float24> quat; Math::Vec4<float24> quat;
@ -43,7 +50,8 @@ struct OutputVertex {
INSERT_PADDING_WORDS(1); INSERT_PADDING_WORDS(1);
Math::Vec2<float24> tc2; Math::Vec2<float24> tc2;
static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& output); static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs,
const AttributeBuffer& output);
}; };
#define ASSERT_POS(var, pos) \ #define ASSERT_POS(var, pos) \
static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \ static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \
@ -60,6 +68,29 @@ ASSERT_POS(tc2, RasterizerRegs::VSOutputAttributes::TEXCOORD2_U);
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size");
/**
* This structure contains state information for primitive emitting in geometry shader.
*/
struct GSEmitter {
std::array<std::array<Math::Vec4<float24>, 16>, 3> buffer;
u8 vertex_id;
bool prim_emit;
bool winding;
u32 output_mask;
// Function objects are hidden behind a raw pointer to make the structure standard layout type,
// for JIT to use offsetof to access other members.
struct Handlers {
VertexHandler vertex_handler;
WindingSetter winding_setter;
} * handlers;
GSEmitter();
~GSEmitter();
void Emit(Math::Vec4<float24> (&vertex)[16]);
};
static_assert(std::is_standard_layout<GSEmitter>::value, "GSEmitter is not standard layout type");
/** /**
* This structure contains the state information that needs to be unique for a shader unit. The 3DS * This structure contains the state information that needs to be unique for a shader unit. The 3DS
* has four shader units that process shaders in parallel. At the present, Citra only implements a * has four shader units that process shaders in parallel. At the present, Citra only implements a
@ -67,6 +98,7 @@ static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has inva
* here will make it easier for us to parallelize the shader processing later. * here will make it easier for us to parallelize the shader processing later.
*/ */
struct UnitState { struct UnitState {
explicit UnitState(GSEmitter* emitter = nullptr);
struct Registers { struct Registers {
// The registers are accessed by the shader JIT using SSE instructions, and are therefore // The registers are accessed by the shader JIT using SSE instructions, and are therefore
// required to be 16-byte aligned. // required to be 16-byte aligned.
@ -82,6 +114,8 @@ struct UnitState {
// TODO: How many bits do these actually have? // TODO: How many bits do these actually have?
s32 address_registers[3]; s32 address_registers[3];
GSEmitter* emitter_ptr;
static size_t InputOffset(const SourceRegister& reg) { static size_t InputOffset(const SourceRegister& reg) {
switch (reg.GetRegisterType()) { switch (reg.GetRegisterType()) {
case RegisterType::Input: case RegisterType::Input:
@ -125,6 +159,19 @@ struct UnitState {
void WriteOutput(const ShaderRegs& config, AttributeBuffer& output); void WriteOutput(const ShaderRegs& config, AttributeBuffer& output);
}; };
/**
* This is an extended shader unit state that represents the special unit that can run both vertex
* shader and geometry shader. It contains an additional primitive emitter and utilities for
* geometry shader.
*/
struct GSUnitState : public UnitState {
GSUnitState();
void SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter);
void ConfigOutput(const ShaderRegs& config);
GSEmitter emitter;
};
struct ShaderSetup { struct ShaderSetup {
struct { struct {
// The float uniforms are accessed by the shader JIT using SSE instructions, and are // The float uniforms are accessed by the shader JIT using SSE instructions, and are

View File

@ -636,6 +636,22 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
break; break;
} }
case OpCode::Id::EMIT: {
GSEmitter* emitter = state.emitter_ptr;
ASSERT_MSG(emitter, "Execute EMIT on VS");
emitter->Emit(state.registers.output);
break;
}
case OpCode::Id::SETEMIT: {
GSEmitter* emitter = state.emitter_ptr;
ASSERT_MSG(emitter, "Execute SETEMIT on VS");
emitter->vertex_id = instr.setemit.vertex_id;
emitter->prim_emit = instr.setemit.prim_emit != 0;
emitter->winding = instr.setemit.winding != 0;
break;
}
default: default:
LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
(int)instr.opcode.Value().EffectiveOpCode(), (int)instr.opcode.Value().EffectiveOpCode(),

View File

@ -75,8 +75,8 @@ const JitFunction instr_table[64] = {
&JitShader::Compile_IF, // ifu &JitShader::Compile_IF, // ifu
&JitShader::Compile_IF, // ifc &JitShader::Compile_IF, // ifc
&JitShader::Compile_LOOP, // loop &JitShader::Compile_LOOP, // loop
nullptr, // emit &JitShader::Compile_EMIT, // emit
nullptr, // sete &JitShader::Compile_SETE, // sete
&JitShader::Compile_JMP, // jmpc &JitShader::Compile_JMP, // jmpc
&JitShader::Compile_JMP, // jmpu &JitShader::Compile_JMP, // jmpu
&JitShader::Compile_CMP, // cmp &JitShader::Compile_CMP, // cmp
@ -772,6 +772,51 @@ void JitShader::Compile_JMP(Instruction instr) {
} }
} }
static void Emit(GSEmitter* emitter, Math::Vec4<float24> (*output)[16]) {
emitter->Emit(*output);
}
void JitShader::Compile_EMIT(Instruction instr) {
Label have_emitter, end;
mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]);
test(rax, rax);
jnz(have_emitter);
ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute EMIT on VS"));
CallFarFunction(*this, LogCritical);
ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
jmp(end);
L(have_emitter);
ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
mov(ABI_PARAM1, rax);
mov(ABI_PARAM2, STATE);
add(ABI_PARAM2, static_cast<Xbyak::uint32>(offsetof(UnitState, registers.output)));
CallFarFunction(*this, Emit);
ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
L(end);
}
void JitShader::Compile_SETE(Instruction instr) {
Label have_emitter, end;
mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]);
test(rax, rax);
jnz(have_emitter);
ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute SETEMIT on VS"));
CallFarFunction(*this, LogCritical);
ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
jmp(end);
L(have_emitter);
mov(byte[rax + offsetof(GSEmitter, vertex_id)], instr.setemit.vertex_id);
mov(byte[rax + offsetof(GSEmitter, prim_emit)], instr.setemit.prim_emit);
mov(byte[rax + offsetof(GSEmitter, winding)], instr.setemit.winding);
L(end);
}
void JitShader::Compile_Block(unsigned end) { void JitShader::Compile_Block(unsigned end) {
while (program_counter < end) { while (program_counter < end) {
Compile_NextInstr(); Compile_NextInstr();

View File

@ -66,6 +66,8 @@ public:
void Compile_JMP(Instruction instr); void Compile_JMP(Instruction instr);
void Compile_CMP(Instruction instr); void Compile_CMP(Instruction instr);
void Compile_MAD(Instruction instr); void Compile_MAD(Instruction instr);
void Compile_EMIT(Instruction instr);
void Compile_SETE(Instruction instr);
private: private:
void Compile_Block(unsigned end); void Compile_Block(unsigned end);