mirror of
https://github.com/citra-emu/citra.git
synced 2024-11-25 08:30:41 +00:00
New GS refactor (#14)
* Turn ShaderSetup into a class * Cleanup ShaderSetup * Replace logic in shader.cpp with loop * Rename 'VertexLoaded' breakpoint to 'Vertex shader invocation' * Prepare Pica regs for GS * Make shader code less VS-specific * Only check for enabled JIT in Setup() to avoid race conditions * Write shader registers in functions * Write GS registers * Implement EMIT and SETEMIT * Implement 4 shader units and geometry shaders
This commit is contained in:
parent
c7032066b8
commit
d6c9d5b4f1
@ -44,7 +44,8 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const
|
|||||||
{ Pica::DebugContext::Event::PicaCommandProcessed, tr("Pica command processed") },
|
{ Pica::DebugContext::Event::PicaCommandProcessed, tr("Pica command processed") },
|
||||||
{ Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") },
|
{ Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") },
|
||||||
{ Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") },
|
{ Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") },
|
||||||
{ Pica::DebugContext::Event::VertexLoaded, tr("Vertex loaded") },
|
{ Pica::DebugContext::Event::RunVS, tr("Vertex shader invocation") },
|
||||||
|
{ Pica::DebugContext::Event::RunGS, tr("Geometry shader invocation") },
|
||||||
{ Pica::DebugContext::Event::IncomingDisplayTransfer, tr("Incoming display transfer") },
|
{ Pica::DebugContext::Event::IncomingDisplayTransfer, tr("Incoming display transfer") },
|
||||||
{ Pica::DebugContext::Event::GSPCommandProcessed, tr("GSP command processed") },
|
{ Pica::DebugContext::Event::GSPCommandProcessed, tr("GSP command processed") },
|
||||||
{ Pica::DebugContext::Event::BufferSwapped, tr("Buffers swapped") }
|
{ Pica::DebugContext::Event::BufferSwapped, tr("Buffers swapped") }
|
||||||
|
@ -70,7 +70,7 @@ void GraphicsTracingWidget::StartRecording() {
|
|||||||
std::array<u32, 4 * 16> default_attributes;
|
std::array<u32, 4 * 16> default_attributes;
|
||||||
for (unsigned i = 0; i < 16; ++i) {
|
for (unsigned i = 0; i < 16; ++i) {
|
||||||
for (unsigned comp = 0; comp < 3; ++comp) {
|
for (unsigned comp = 0; comp < 3; ++comp) {
|
||||||
default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs.default_attributes[i][comp].ToFloat32());
|
default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs_default_attributes[i][comp].ToFloat32());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -365,7 +365,7 @@ GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::De
|
|||||||
input_data[i]->setValidator(new QDoubleValidator(input_data[i]));
|
input_data[i]->setValidator(new QDoubleValidator(input_data[i]));
|
||||||
}
|
}
|
||||||
|
|
||||||
breakpoint_warning = new QLabel(tr("(data only available at VertexLoaded breakpoints)"));
|
breakpoint_warning = new QLabel(tr("(data only available at vertex shader invocation breakpoints)"));
|
||||||
|
|
||||||
// TODO: Add some button for jumping to the shader entry point
|
// TODO: Add some button for jumping to the shader entry point
|
||||||
|
|
||||||
@ -454,7 +454,7 @@ GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::De
|
|||||||
|
|
||||||
void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) {
|
void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) {
|
||||||
auto input = static_cast<Pica::Shader::InputVertex*>(data);
|
auto input = static_cast<Pica::Shader::InputVertex*>(data);
|
||||||
if (event == Pica::DebugContext::Event::VertexLoaded) {
|
if (event == Pica::DebugContext::Event::RunVS) {
|
||||||
Reload(true, data);
|
Reload(true, data);
|
||||||
} else {
|
} else {
|
||||||
// No vertex data is retrievable => invalidate currently stored vertex data
|
// No vertex data is retrievable => invalidate currently stored vertex data
|
||||||
@ -501,7 +501,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d
|
|||||||
info.labels.insert({ entry_point, "main" });
|
info.labels.insert({ entry_point, "main" });
|
||||||
|
|
||||||
// Generate debug information
|
// Generate debug information
|
||||||
debug_data = Pica::Shader::ProduceDebugInfo(input_vertex, num_attributes, shader_config, shader_setup);
|
debug_data = Pica::g_state.vs.ProduceDebugInfo(input_vertex, num_attributes, shader_config);
|
||||||
|
|
||||||
// Reload widget state
|
// Reload widget state
|
||||||
for (int attr = 0; attr < num_attributes; ++attr) {
|
for (int attr = 0; attr < num_attributes; ++attr) {
|
||||||
|
@ -27,10 +27,6 @@ namespace Pica {
|
|||||||
|
|
||||||
namespace CommandProcessor {
|
namespace CommandProcessor {
|
||||||
|
|
||||||
static int float_regs_counter = 0;
|
|
||||||
|
|
||||||
static u32 uniform_write_buffer[4];
|
|
||||||
|
|
||||||
static int default_attr_counter = 0;
|
static int default_attr_counter = 0;
|
||||||
|
|
||||||
static u32 default_attr_write_buffer[3];
|
static u32 default_attr_write_buffer[3];
|
||||||
@ -124,7 +120,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||||||
|
|
||||||
// TODO: Verify that this actually modifies the register!
|
// TODO: Verify that this actually modifies the register!
|
||||||
if (setup.index < 15) {
|
if (setup.index < 15) {
|
||||||
g_state.vs.default_attributes[setup.index] = attribute;
|
g_state.vs_default_attributes[setup.index] = attribute;
|
||||||
setup.index++;
|
setup.index++;
|
||||||
} else {
|
} else {
|
||||||
// Put each attribute into an immediate input buffer.
|
// Put each attribute into an immediate input buffer.
|
||||||
@ -139,14 +135,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||||||
if (immediate_attribute_id >= regs.vs.num_input_attributes+1) {
|
if (immediate_attribute_id >= regs.vs.num_input_attributes+1) {
|
||||||
immediate_attribute_id = 0;
|
immediate_attribute_id = 0;
|
||||||
|
|
||||||
Shader::UnitState<false> shader_unit;
|
auto& shader_unit = Shader::GetShaderUnit(false);
|
||||||
Shader::Setup();
|
g_state.vs.Setup();
|
||||||
|
|
||||||
if (g_debug_context)
|
|
||||||
g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast<void*>(&immediate_input));
|
|
||||||
|
|
||||||
// Send to vertex shader
|
// Send to vertex shader
|
||||||
Shader::OutputVertex output = Shader::Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1);
|
if (g_debug_context)
|
||||||
|
g_debug_context->OnEvent(DebugContext::Event::RunVS, static_cast<void*>(&immediate_input));
|
||||||
|
g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1, regs.vs);
|
||||||
|
Shader::OutputVertex output_vertex = shader_unit.output_registers.ToVertex(regs.vs);
|
||||||
|
|
||||||
// Send to renderer
|
// Send to renderer
|
||||||
using Pica::Shader::OutputVertex;
|
using Pica::Shader::OutputVertex;
|
||||||
@ -154,7 +150,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||||||
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
|
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
|
||||||
};
|
};
|
||||||
|
|
||||||
g_state.primitive_assembler.SubmitVertex(output, AddTriangle);
|
g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -294,13 +290,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||||||
// The size has been tuned for optimal balance between hit-rate and the cost of lookup
|
// The size has been tuned for optimal balance between hit-rate and the cost of lookup
|
||||||
const size_t VERTEX_CACHE_SIZE = 32;
|
const size_t VERTEX_CACHE_SIZE = 32;
|
||||||
std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
|
std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
|
||||||
std::array<Shader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache;
|
std::array<Shader::OutputRegisters, VERTEX_CACHE_SIZE> vertex_cache;
|
||||||
|
|
||||||
unsigned int vertex_cache_pos = 0;
|
unsigned int vertex_cache_pos = 0;
|
||||||
vertex_cache_ids.fill(-1);
|
vertex_cache_ids.fill(-1);
|
||||||
|
|
||||||
Shader::UnitState<false> shader_unit;
|
auto& vs_shader_unit = Shader::GetShaderUnit(false);
|
||||||
Shader::Setup();
|
g_state.vs.Setup();
|
||||||
|
|
||||||
|
auto& gs_unit_state = Shader::GetShaderUnit(true);
|
||||||
|
g_state.gs.Setup();
|
||||||
|
|
||||||
for (unsigned int index = 0; index < regs.num_vertices; ++index)
|
for (unsigned int index = 0; index < regs.num_vertices; ++index)
|
||||||
{
|
{
|
||||||
@ -312,7 +311,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||||||
ASSERT(vertex != -1);
|
ASSERT(vertex != -1);
|
||||||
|
|
||||||
bool vertex_cache_hit = false;
|
bool vertex_cache_hit = false;
|
||||||
Shader::OutputVertex output;
|
Shader::OutputRegisters output_registers;
|
||||||
|
|
||||||
if (is_indexed) {
|
if (is_indexed) {
|
||||||
if (g_debug_context && Pica::g_debug_context->recorder) {
|
if (g_debug_context && Pica::g_debug_context->recorder) {
|
||||||
@ -322,7 +321,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||||||
|
|
||||||
for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
|
for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
|
||||||
if (vertex == vertex_cache_ids[i]) {
|
if (vertex == vertex_cache_ids[i]) {
|
||||||
output = vertex_cache[i];
|
output_registers = vertex_cache[i];
|
||||||
vertex_cache_hit = true;
|
vertex_cache_hit = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -369,7 +368,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||||||
}
|
}
|
||||||
} else if (attribute_config.IsDefaultAttribute(i)) {
|
} else if (attribute_config.IsDefaultAttribute(i)) {
|
||||||
// Load the default attribute if we're configured to do so
|
// Load the default attribute if we're configured to do so
|
||||||
input.attr[i] = g_state.vs.default_attributes[i];
|
input.attr[i] = g_state.vs_default_attributes[i];
|
||||||
LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
|
LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
|
||||||
i, vertex, index,
|
i, vertex, index,
|
||||||
input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
|
input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
|
||||||
@ -381,27 +380,69 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_debug_context)
|
|
||||||
g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
|
|
||||||
|
|
||||||
// Send to vertex shader
|
// Send to vertex shader
|
||||||
output = Shader::Run(shader_unit, input, attribute_config.GetNumTotalAttributes());
|
if (g_debug_context)
|
||||||
|
g_debug_context->OnEvent(DebugContext::Event::RunVS, static_cast<void*>(&input));
|
||||||
|
g_state.vs.Run(vs_shader_unit, input, attribute_config.GetNumTotalAttributes(), g_state.regs.vs);
|
||||||
|
output_registers = vs_shader_unit.output_registers;
|
||||||
|
|
||||||
if (is_indexed) {
|
if (is_indexed) {
|
||||||
vertex_cache[vertex_cache_pos] = output;
|
vertex_cache[vertex_cache_pos] = output_registers;
|
||||||
vertex_cache_ids[vertex_cache_pos] = vertex;
|
vertex_cache_ids[vertex_cache_pos] = vertex;
|
||||||
vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
|
vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send to renderer
|
// Helper to send triangle to renderer
|
||||||
using Pica::Shader::OutputVertex;
|
using Pica::Shader::OutputVertex;
|
||||||
auto AddTriangle = [](
|
auto AddTriangle = [](
|
||||||
const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) {
|
const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) {
|
||||||
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
|
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
|
||||||
};
|
};
|
||||||
|
|
||||||
primitive_assembler.SubmitVertex(output, AddTriangle);
|
if (Shader::UseGS()) {
|
||||||
|
|
||||||
|
auto& regs = g_state.regs;
|
||||||
|
auto& gs_regs = g_state.regs.gs;
|
||||||
|
auto& gs_buf = g_state.gs_input_buffer;
|
||||||
|
|
||||||
|
// Vertex Shader Outputs are converted into Geometry Shader inputs by filling up a buffer
|
||||||
|
// For example, if we have a geoshader that takes 6 inputs, and the vertex shader outputs 2 attributes
|
||||||
|
// It would take 3 vertices to fill up the Geometry Shader buffer
|
||||||
|
unsigned int gs_input_count = gs_regs.num_input_attributes + 1;
|
||||||
|
unsigned int vs_output_count = regs.vs_outmap_total2 + 1;
|
||||||
|
ASSERT_MSG(regs.vs_outmap_total1 == regs.vs_outmap_total2, "VS_OUTMAP_TOTAL1 and VS_OUTMAP_TOTAL2 don't match!");
|
||||||
|
// copy into the geoshader buffer
|
||||||
|
for (unsigned int i = 0; i < vs_output_count; i++) {
|
||||||
|
if (gs_buf.index >= gs_input_count) {
|
||||||
|
// TODO(ds84182): LOG_ERROR()
|
||||||
|
ASSERT_MSG(false, "Number of GS inputs (%d) is not divisible by number of VS outputs (%d)",
|
||||||
|
gs_input_count, vs_output_count);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
gs_buf.buffer.attr[gs_buf.index++] = output_registers.value[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gs_buf.index >= gs_input_count) {
|
||||||
|
|
||||||
|
// b15 will be false when a new primitive starts and then switch to true at some point
|
||||||
|
//TODO: Test how this works exactly on hardware
|
||||||
|
g_state.gs.uniforms.b[15] |= (index > 0);
|
||||||
|
|
||||||
|
// Process Geometry Shader
|
||||||
|
if (g_debug_context)
|
||||||
|
g_debug_context->OnEvent(DebugContext::Event::RunGS, static_cast<void*>(&gs_buf.buffer));
|
||||||
|
gs_unit_state.emit_triangle_callback = AddTriangle;
|
||||||
|
g_state.gs.Run(gs_unit_state, gs_buf.buffer, gs_input_count, regs.gs);
|
||||||
|
gs_unit_state.emit_triangle_callback = nullptr;
|
||||||
|
|
||||||
|
gs_buf.index = 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Shader::OutputVertex output_vertex = output_registers.ToVertex(regs.vs);
|
||||||
|
primitive_assembler.SubmitVertex(output_vertex, AddTriangle);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto& range : memory_accesses.ranges) {
|
for (auto& range : memory_accesses.ranges) {
|
||||||
@ -412,10 +453,76 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case PICA_REG_INDEX(vs.bool_uniforms):
|
case PICA_REG_INDEX(gs.bool_uniforms):
|
||||||
for (unsigned i = 0; i < 16; ++i)
|
Shader::WriteUniformBoolReg(true, value);
|
||||||
g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0;
|
break;
|
||||||
|
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[0], 0x281):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[1], 0x282):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[2], 0x283):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[3], 0x284):
|
||||||
|
{
|
||||||
|
unsigned index = (id - PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[0], 0x281));
|
||||||
|
auto values = regs.gs.int_uniforms[index];
|
||||||
|
Shader::WriteUniformIntReg(true, index, Math::Vec4<u8>(values.x, values.y, values.z, values.w));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.setup, 0x290):
|
||||||
|
Shader::WriteUniformFloatSetupReg(true, value);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[0], 0x291):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[1], 0x292):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[2], 0x293):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[3], 0x294):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[4], 0x295):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[5], 0x296):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[6], 0x297):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[7], 0x298):
|
||||||
|
{
|
||||||
|
Shader::WriteUniformFloatReg(true, value);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load shader program code
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.program.offset, 0x29b):
|
||||||
|
Shader::WriteProgramCodeOffset(true, value);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[0], 0x29c):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[1], 0x29d):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[2], 0x29e):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[3], 0x29f):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[4], 0x2a0):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[5], 0x2a1):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[6], 0x2a2):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[7], 0x2a3):
|
||||||
|
{
|
||||||
|
Shader::WriteProgramCode(true, value);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load swizzle pattern data
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.offset, 0x2a5):
|
||||||
|
Shader::WriteSwizzlePatternsOffset(true, value);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[0], 0x2a6):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[1], 0x2a7):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[2], 0x2a8):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[3], 0x2a9):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[4], 0x2aa):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[5], 0x2ab):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[6], 0x2ac):
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[7], 0x2ad):
|
||||||
|
{
|
||||||
|
Shader::WriteSwizzlePatterns(true, value);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PICA_REG_INDEX(vs.bool_uniforms):
|
||||||
|
Shader::WriteUniformBoolReg(false, value);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1):
|
case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1):
|
||||||
@ -423,14 +530,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||||||
case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3):
|
case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3):
|
||||||
case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4):
|
case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4):
|
||||||
{
|
{
|
||||||
int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1));
|
unsigned index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1));
|
||||||
auto values = regs.vs.int_uniforms[index];
|
auto values = regs.vs.int_uniforms[index];
|
||||||
g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w);
|
Shader::WriteUniformIntReg(false, index, Math::Vec4<u8>(values.x, values.y, values.z, values.w));
|
||||||
LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x",
|
|
||||||
index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value());
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.setup, 0x2c0):
|
||||||
|
Shader::WriteUniformFloatSetupReg(false, value);
|
||||||
|
break;
|
||||||
|
|
||||||
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[0], 0x2c1):
|
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[0], 0x2c1):
|
||||||
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[1], 0x2c2):
|
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[1], 0x2c2):
|
||||||
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[2], 0x2c3):
|
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[2], 0x2c3):
|
||||||
@ -440,49 +549,15 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||||||
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7):
|
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7):
|
||||||
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8):
|
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8):
|
||||||
{
|
{
|
||||||
auto& uniform_setup = regs.vs.uniform_setup;
|
Shader::WriteUniformFloatReg(false, value);
|
||||||
|
|
||||||
// TODO: Does actual hardware indeed keep an intermediate buffer or does
|
|
||||||
// it directly write the values?
|
|
||||||
uniform_write_buffer[float_regs_counter++] = value;
|
|
||||||
|
|
||||||
// Uniforms are written in a packed format such that four float24 values are encoded in
|
|
||||||
// three 32-bit numbers. We write to internal memory once a full such vector is
|
|
||||||
// written.
|
|
||||||
if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) ||
|
|
||||||
(float_regs_counter >= 3 && !uniform_setup.IsFloat32())) {
|
|
||||||
float_regs_counter = 0;
|
|
||||||
|
|
||||||
auto& uniform = g_state.vs.uniforms.f[uniform_setup.index];
|
|
||||||
|
|
||||||
if (uniform_setup.index > 95) {
|
|
||||||
LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// NOTE: The destination component order indeed is "backwards"
|
|
||||||
if (uniform_setup.IsFloat32()) {
|
|
||||||
for (auto i : {0,1,2,3})
|
|
||||||
uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i]));
|
|
||||||
} else {
|
|
||||||
// TODO: Untested
|
|
||||||
uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8);
|
|
||||||
uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF));
|
|
||||||
uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | ((uniform_write_buffer[2] >> 24) & 0xFF));
|
|
||||||
uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF);
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index,
|
|
||||||
uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(),
|
|
||||||
uniform.w.ToFloat32());
|
|
||||||
|
|
||||||
// TODO: Verify that this actually modifies the register!
|
|
||||||
uniform_setup.index.Assign(uniform_setup.index + 1);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load shader program code
|
// Load shader program code
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(vs.program.offset, 0x2cb):
|
||||||
|
Shader::WriteProgramCodeOffset(false, value);
|
||||||
|
break;
|
||||||
|
|
||||||
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc):
|
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc):
|
||||||
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd):
|
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd):
|
||||||
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce):
|
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce):
|
||||||
@ -492,12 +567,15 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||||||
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2):
|
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2):
|
||||||
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3):
|
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3):
|
||||||
{
|
{
|
||||||
g_state.vs.program_code[regs.vs.program.offset] = value;
|
Shader::WriteProgramCode(false, value);
|
||||||
regs.vs.program.offset++;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load swizzle pattern data
|
// Load swizzle pattern data
|
||||||
|
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.offset, 0x2d5):
|
||||||
|
Shader::WriteSwizzlePatternsOffset(false, value);
|
||||||
|
break;
|
||||||
|
|
||||||
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6):
|
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6):
|
||||||
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7):
|
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7):
|
||||||
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8):
|
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8):
|
||||||
@ -507,8 +585,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||||||
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc):
|
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc):
|
||||||
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd):
|
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd):
|
||||||
{
|
{
|
||||||
g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value;
|
Shader::WriteSwizzlePatterns(false, value);
|
||||||
regs.vs.swizzle_patterns.offset++;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -30,7 +30,8 @@ public:
|
|||||||
PicaCommandProcessed,
|
PicaCommandProcessed,
|
||||||
IncomingPrimitiveBatch,
|
IncomingPrimitiveBatch,
|
||||||
FinishedPrimitiveBatch,
|
FinishedPrimitiveBatch,
|
||||||
VertexLoaded,
|
RunVS,
|
||||||
|
RunGS,
|
||||||
IncomingDisplayTransfer,
|
IncomingDisplayTransfer,
|
||||||
GSPCommandProcessed,
|
GSPCommandProcessed,
|
||||||
BufferSwapped,
|
BufferSwapped,
|
||||||
|
@ -497,7 +497,7 @@ void Init() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Shutdown() {
|
void Shutdown() {
|
||||||
Shader::Shutdown();
|
Shader::ShaderSetup::Shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
@ -1064,7 +1064,7 @@ struct Regs {
|
|||||||
// Number of vertices to render
|
// Number of vertices to render
|
||||||
u32 num_vertices;
|
u32 num_vertices;
|
||||||
|
|
||||||
INSERT_PADDING_WORDS(0x1);
|
BitField<0, 2, u32> using_geometry_shader;
|
||||||
|
|
||||||
// The index of the first vertex to render
|
// The index of the first vertex to render
|
||||||
u32 vertex_offset;
|
u32 vertex_offset;
|
||||||
@ -1112,7 +1112,14 @@ struct Regs {
|
|||||||
}
|
}
|
||||||
} command_buffer;
|
} command_buffer;
|
||||||
|
|
||||||
INSERT_PADDING_WORDS(0x07);
|
INSERT_PADDING_WORDS(0x06);
|
||||||
|
|
||||||
|
enum class VSComMode : u32 {
|
||||||
|
Shared = 0,
|
||||||
|
Exclusive = 1
|
||||||
|
};
|
||||||
|
|
||||||
|
VSComMode vs_com_mode;
|
||||||
|
|
||||||
enum class GPUMode : u32 {
|
enum class GPUMode : u32 {
|
||||||
Drawing = 0,
|
Drawing = 0,
|
||||||
@ -1121,7 +1128,17 @@ struct Regs {
|
|||||||
|
|
||||||
GPUMode gpu_mode;
|
GPUMode gpu_mode;
|
||||||
|
|
||||||
INSERT_PADDING_WORDS(0x18);
|
INSERT_PADDING_WORDS(0x4);
|
||||||
|
|
||||||
|
BitField<0, 4, u32> vs_outmap_total1;
|
||||||
|
|
||||||
|
INSERT_PADDING_WORDS(0x6);
|
||||||
|
|
||||||
|
BitField<0, 4, u32> vs_outmap_total2;
|
||||||
|
|
||||||
|
BitField<0, 4, u32> gsh_misc0;
|
||||||
|
|
||||||
|
INSERT_PADDING_WORDS(0xB);
|
||||||
|
|
||||||
enum class TriangleTopology : u32 {
|
enum class TriangleTopology : u32 {
|
||||||
List = 0,
|
List = 0,
|
||||||
@ -1130,7 +1147,10 @@ struct Regs {
|
|||||||
Shader = 3, // Programmable setup unit implemented in a geometry shader
|
Shader = 3, // Programmable setup unit implemented in a geometry shader
|
||||||
};
|
};
|
||||||
|
|
||||||
BitField<8, 2, TriangleTopology> triangle_topology;
|
union {
|
||||||
|
BitField<0, 4, u32> vs_outmap_count;
|
||||||
|
BitField<8, 2, TriangleTopology> triangle_topology;
|
||||||
|
};
|
||||||
|
|
||||||
u32 restart_primitive;
|
u32 restart_primitive;
|
||||||
|
|
||||||
@ -1149,8 +1169,9 @@ struct Regs {
|
|||||||
INSERT_PADDING_WORDS(0x4);
|
INSERT_PADDING_WORDS(0x4);
|
||||||
|
|
||||||
union {
|
union {
|
||||||
// Number of input attributes to shader unit - 1
|
BitField<0, 4, u32> num_input_attributes; // Number of input attributes to shader unit - 1
|
||||||
BitField<0, 4, u32> num_input_attributes;
|
BitField<8, 4, u32> use_subdivision;
|
||||||
|
BitField<24, 8, u32> use_geometry_shader;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Offset to shader program entry point (in words)
|
// Offset to shader program entry point (in words)
|
||||||
@ -1202,6 +1223,8 @@ struct Regs {
|
|||||||
}
|
}
|
||||||
|
|
||||||
union {
|
union {
|
||||||
|
u32 setup;
|
||||||
|
|
||||||
// Index of the next uniform to write to
|
// Index of the next uniform to write to
|
||||||
// TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices
|
// TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices
|
||||||
// TODO: Maybe the uppermost index is for the geometry shader? Investigate!
|
// TODO: Maybe the uppermost index is for the geometry shader? Investigate!
|
||||||
@ -1318,7 +1341,11 @@ ASSERT_REG_POSITION(trigger_draw, 0x22e);
|
|||||||
ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
|
ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
|
||||||
ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232);
|
ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232);
|
||||||
ASSERT_REG_POSITION(command_buffer, 0x238);
|
ASSERT_REG_POSITION(command_buffer, 0x238);
|
||||||
|
ASSERT_REG_POSITION(vs_com_mode, 0x244);
|
||||||
ASSERT_REG_POSITION(gpu_mode, 0x245);
|
ASSERT_REG_POSITION(gpu_mode, 0x245);
|
||||||
|
ASSERT_REG_POSITION(vs_outmap_total1, 0x24A);
|
||||||
|
ASSERT_REG_POSITION(vs_outmap_total2, 0x251);
|
||||||
|
ASSERT_REG_POSITION(gsh_misc0, 0x252);
|
||||||
ASSERT_REG_POSITION(triangle_topology, 0x25e);
|
ASSERT_REG_POSITION(triangle_topology, 0x25e);
|
||||||
ASSERT_REG_POSITION(restart_primitive, 0x25f);
|
ASSERT_REG_POSITION(restart_primitive, 0x25f);
|
||||||
ASSERT_REG_POSITION(gs, 0x280);
|
ASSERT_REG_POSITION(gs, 0x280);
|
||||||
|
@ -17,9 +17,13 @@ struct State {
|
|||||||
/// Pica registers
|
/// Pica registers
|
||||||
Regs regs;
|
Regs regs;
|
||||||
|
|
||||||
|
Shader::UnitState<false> shader_units[4];
|
||||||
|
|
||||||
Shader::ShaderSetup vs;
|
Shader::ShaderSetup vs;
|
||||||
Shader::ShaderSetup gs;
|
Shader::ShaderSetup gs;
|
||||||
|
|
||||||
|
Math::Vec4<float24> vs_default_attributes[16];
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
union LutEntry {
|
union LutEntry {
|
||||||
// Used for raw access
|
// Used for raw access
|
||||||
@ -56,6 +60,15 @@ struct State {
|
|||||||
|
|
||||||
// This is constructed with a dummy triangle topology
|
// This is constructed with a dummy triangle topology
|
||||||
PrimitiveAssembler<Shader::OutputVertex> primitive_assembler;
|
PrimitiveAssembler<Shader::OutputVertex> primitive_assembler;
|
||||||
|
|
||||||
|
/// Current geometry shader state
|
||||||
|
struct GeometryShaderState {
|
||||||
|
// Buffer used for geometry shader inputs
|
||||||
|
Shader::InputVertex buffer;
|
||||||
|
// The current index into the buffer
|
||||||
|
unsigned int index;
|
||||||
|
} gs_input_buffer;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
extern State g_state; ///< Current Pica state
|
extern State g_state; ///< Current Pica state
|
||||||
|
@ -20,7 +20,6 @@ template<typename VertexType>
|
|||||||
void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler)
|
void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler)
|
||||||
{
|
{
|
||||||
switch (topology) {
|
switch (topology) {
|
||||||
// TODO: Figure out what's different with TriangleTopology::Shader.
|
|
||||||
case Regs::TriangleTopology::List:
|
case Regs::TriangleTopology::List:
|
||||||
case Regs::TriangleTopology::Shader:
|
case Regs::TriangleTopology::Shader:
|
||||||
if (buffer_index < 2) {
|
if (buffer_index < 2) {
|
||||||
|
@ -27,83 +27,7 @@ namespace Pica {
|
|||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) {
|
||||||
static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map;
|
|
||||||
static const JitShader* jit_shader;
|
|
||||||
#endif // ARCHITECTURE_x86_64
|
|
||||||
|
|
||||||
void Setup() {
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
|
||||||
if (VideoCore::g_shader_jit_enabled) {
|
|
||||||
u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
|
|
||||||
Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
|
|
||||||
|
|
||||||
auto iter = shader_map.find(cache_key);
|
|
||||||
if (iter != shader_map.end()) {
|
|
||||||
jit_shader = iter->second.get();
|
|
||||||
} else {
|
|
||||||
auto shader = std::make_unique<JitShader>();
|
|
||||||
shader->Compile();
|
|
||||||
jit_shader = shader.get();
|
|
||||||
shader_map[cache_key] = std::move(shader);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif // ARCHITECTURE_x86_64
|
|
||||||
}
|
|
||||||
|
|
||||||
void Shutdown() {
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
|
||||||
shader_map.clear();
|
|
||||||
#endif // ARCHITECTURE_x86_64
|
|
||||||
}
|
|
||||||
|
|
||||||
static Common::Profiling::TimingCategory shader_category("Vertex Shader");
|
|
||||||
MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240));
|
|
||||||
|
|
||||||
OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
|
|
||||||
auto& config = g_state.regs.vs;
|
|
||||||
|
|
||||||
Common::Profiling::ScopeTimer timer(shader_category);
|
|
||||||
MICROPROFILE_SCOPE(GPU_VertexShader);
|
|
||||||
|
|
||||||
state.program_counter = config.main_offset;
|
|
||||||
state.debug.max_offset = 0;
|
|
||||||
state.debug.max_opdesc_id = 0;
|
|
||||||
|
|
||||||
// Setup input register table
|
|
||||||
const auto& attribute_register_map = config.input_register_map;
|
|
||||||
|
|
||||||
// TODO: Instead of this cumbersome logic, just load the input data directly like
|
|
||||||
// for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; }
|
|
||||||
if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0];
|
|
||||||
if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1];
|
|
||||||
if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2];
|
|
||||||
if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = input.attr[3];
|
|
||||||
if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = input.attr[4];
|
|
||||||
if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = input.attr[5];
|
|
||||||
if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = input.attr[6];
|
|
||||||
if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = input.attr[7];
|
|
||||||
if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = input.attr[8];
|
|
||||||
if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = input.attr[9];
|
|
||||||
if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = input.attr[10];
|
|
||||||
if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = input.attr[11];
|
|
||||||
if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = input.attr[12];
|
|
||||||
if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = input.attr[13];
|
|
||||||
if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14];
|
|
||||||
if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15];
|
|
||||||
|
|
||||||
state.conditional_code[0] = false;
|
|
||||||
state.conditional_code[1] = false;
|
|
||||||
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
|
||||||
if (VideoCore::g_shader_jit_enabled)
|
|
||||||
jit_shader->Run(&state.registers, g_state.regs.vs.main_offset);
|
|
||||||
else
|
|
||||||
RunInterpreter(state);
|
|
||||||
#else
|
|
||||||
RunInterpreter(state);
|
|
||||||
#endif // ARCHITECTURE_x86_64
|
|
||||||
|
|
||||||
// Setup output data
|
// Setup output data
|
||||||
OutputVertex ret;
|
OutputVertex ret;
|
||||||
// TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
|
// TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
|
||||||
@ -114,10 +38,10 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
|
|||||||
if (index >= g_state.regs.vs_output_total)
|
if (index >= g_state.regs.vs_output_total)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if ((g_state.regs.vs.output_mask & (1 << i)) == 0)
|
if ((config.output_mask & (1 << i)) == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
const auto& output_register_map = g_state.regs.vs_output_attributes[index]; // TODO: Don't hardcode VS here
|
const auto& output_register_map = g_state.regs.vs_output_attributes[index];
|
||||||
|
|
||||||
u32 semantics[4] = {
|
u32 semantics[4] = {
|
||||||
output_register_map.map_x, output_register_map.map_y,
|
output_register_map.map_x, output_register_map.map_y,
|
||||||
@ -127,7 +51,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
|
|||||||
for (unsigned comp = 0; comp < 4; ++comp) {
|
for (unsigned comp = 0; comp < 4; ++comp) {
|
||||||
float24* out = ((float24*)&ret) + semantics[comp];
|
float24* out = ((float24*)&ret) + semantics[comp];
|
||||||
if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
|
if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
|
||||||
*out = state.registers.output[i][comp];
|
*out = value[i][comp];
|
||||||
} else {
|
} else {
|
||||||
// Zero output so that attributes which aren't output won't have denormals in them,
|
// Zero output so that attributes which aren't output won't have denormals in them,
|
||||||
// which would slow us down later.
|
// which would slow us down later.
|
||||||
@ -155,10 +79,71 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) {
|
#ifdef ARCHITECTURE_x86_64
|
||||||
|
static std::unordered_map<u64, std::shared_ptr<JitShader>> shader_map;
|
||||||
|
#endif // ARCHITECTURE_x86_64
|
||||||
|
|
||||||
|
void ShaderSetup::Setup() {
|
||||||
|
#ifdef ARCHITECTURE_x86_64
|
||||||
|
if (VideoCore::g_shader_jit_enabled) {
|
||||||
|
u64 cache_key = (Common::ComputeHash64(&program_code, sizeof(program_code)) ^
|
||||||
|
Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data)));
|
||||||
|
|
||||||
|
auto iter = shader_map.find(cache_key);
|
||||||
|
if (iter != shader_map.end()) {
|
||||||
|
jit_shader = iter->second;
|
||||||
|
} else {
|
||||||
|
auto shader = std::make_shared<JitShader>();
|
||||||
|
shader->Compile(*this);
|
||||||
|
jit_shader = shader;
|
||||||
|
shader_map[cache_key] = std::move(shader);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
jit_shader.reset();
|
||||||
|
}
|
||||||
|
#endif // ARCHITECTURE_x86_64
|
||||||
|
}
|
||||||
|
|
||||||
|
void ShaderSetup::Shutdown() {
|
||||||
|
#ifdef ARCHITECTURE_x86_64
|
||||||
|
shader_map.clear();
|
||||||
|
#endif // ARCHITECTURE_x86_64
|
||||||
|
}
|
||||||
|
|
||||||
|
static Common::Profiling::TimingCategory shader_category("Shader");
|
||||||
|
MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
|
||||||
|
|
||||||
|
void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config) {
|
||||||
|
|
||||||
|
Common::Profiling::ScopeTimer timer(shader_category);
|
||||||
|
MICROPROFILE_SCOPE(GPU_Shader);
|
||||||
|
|
||||||
|
state.debug.max_offset = 0;
|
||||||
|
state.debug.max_opdesc_id = 0;
|
||||||
|
|
||||||
|
// Setup input register table
|
||||||
|
const auto& attribute_register_map = config.input_register_map;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < num_attributes; i++)
|
||||||
|
state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
|
||||||
|
|
||||||
|
state.conditional_code[0] = false;
|
||||||
|
state.conditional_code[1] = false;
|
||||||
|
|
||||||
|
#ifdef ARCHITECTURE_x86_64
|
||||||
|
if (auto shader = jit_shader.lock())
|
||||||
|
shader.get()->Run(config, *this, state);
|
||||||
|
else
|
||||||
|
RunInterpreter(config, *this, state);
|
||||||
|
#else
|
||||||
|
RunInterpreter(config, *this, state);
|
||||||
|
#endif // ARCHITECTURE_x86_64
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config) {
|
||||||
UnitState<true> state;
|
UnitState<true> state;
|
||||||
|
|
||||||
state.program_counter = config.main_offset;
|
|
||||||
state.debug.max_offset = 0;
|
state.debug.max_offset = 0;
|
||||||
state.debug.max_opdesc_id = 0;
|
state.debug.max_opdesc_id = 0;
|
||||||
|
|
||||||
@ -167,30 +152,218 @@ DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, c
|
|||||||
float24 dummy_register;
|
float24 dummy_register;
|
||||||
boost::fill(state.registers.input, &dummy_register);
|
boost::fill(state.registers.input, &dummy_register);
|
||||||
|
|
||||||
if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = &input.attr[0].x;
|
for (unsigned i = 0; i < num_attributes; i++)
|
||||||
if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = &input.attr[1].x;
|
state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
|
||||||
if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = &input.attr[2].x;
|
|
||||||
if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = &input.attr[3].x;
|
|
||||||
if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = &input.attr[4].x;
|
|
||||||
if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = &input.attr[5].x;
|
|
||||||
if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = &input.attr[6].x;
|
|
||||||
if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = &input.attr[7].x;
|
|
||||||
if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = &input.attr[8].x;
|
|
||||||
if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = &input.attr[9].x;
|
|
||||||
if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = &input.attr[10].x;
|
|
||||||
if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = &input.attr[11].x;
|
|
||||||
if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = &input.attr[12].x;
|
|
||||||
if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = &input.attr[13].x;
|
|
||||||
if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = &input.attr[14].x;
|
|
||||||
if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = &input.attr[15].x;
|
|
||||||
|
|
||||||
state.conditional_code[0] = false;
|
state.conditional_code[0] = false;
|
||||||
state.conditional_code[1] = false;
|
state.conditional_code[1] = false;
|
||||||
|
|
||||||
RunInterpreter(state);
|
RunInterpreter(config, *this, state);
|
||||||
return state.debug;
|
return state.debug;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SharedGS() {
|
||||||
|
return g_state.regs.vs_com_mode == Pica::Regs::VSComMode::Shared;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool UseGS() {
|
||||||
|
// TODO(ds84182): This would be more accurate if it looked at induvidual shader units for the geoshader bit
|
||||||
|
// gs_regs.input_buffer_config.use_geometry_shader == 0x08
|
||||||
|
ASSERT((g_state.regs.using_geometry_shader == 0) || (g_state.regs.using_geometry_shader == 2));
|
||||||
|
return g_state.regs.using_geometry_shader == 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
UnitState<false>& GetShaderUnit(bool gs) {
|
||||||
|
|
||||||
|
// GS are always run on shader unit 3
|
||||||
|
if (gs) {
|
||||||
|
return g_state.shader_units[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
// The worst scheduler you'll ever see!
|
||||||
|
//TODO: How does PICA shader scheduling work?
|
||||||
|
static unsigned shader_unit_scheduler = 0;
|
||||||
|
shader_unit_scheduler++;
|
||||||
|
shader_unit_scheduler %= 3; // TODO: When does it also allow use of unit 3?!
|
||||||
|
return g_state.shader_units[shader_unit_scheduler];
|
||||||
|
}
|
||||||
|
|
||||||
|
void WriteUniformBoolReg(bool gs, u32 value) {
|
||||||
|
auto& setup = gs ? g_state.gs : g_state.vs;
|
||||||
|
|
||||||
|
ASSERT(setup.uniforms.b.size() == 16);
|
||||||
|
for (unsigned i = 0; i < 16; ++i)
|
||||||
|
setup.uniforms.b[i] = (value & (1 << i)) != 0;
|
||||||
|
|
||||||
|
// Copy for GS in shared mode
|
||||||
|
if (!gs && SharedGS()) {
|
||||||
|
WriteUniformBoolReg(true, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void WriteUniformIntReg(bool gs, unsigned index, const Math::Vec4<u8>& values) {
|
||||||
|
const char* shader_type = gs ? "GS" : "VS";
|
||||||
|
auto& setup = gs ? g_state.gs : g_state.vs;
|
||||||
|
|
||||||
|
ASSERT(index < setup.uniforms.i.size());
|
||||||
|
setup.uniforms.i[index] = values;
|
||||||
|
LOG_TRACE(HW_GPU, "Set %s integer uniform %d to %02x %02x %02x %02x",
|
||||||
|
shader_type, index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value());
|
||||||
|
|
||||||
|
// Copy for GS in shared mode
|
||||||
|
if (!gs && SharedGS()) {
|
||||||
|
WriteUniformIntReg(true, index, values);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void WriteUniformFloatSetupReg(bool gs, u32 value) {
|
||||||
|
auto& config = gs ? g_state.regs.gs : g_state.regs.vs;
|
||||||
|
|
||||||
|
config.uniform_setup.setup = value;
|
||||||
|
|
||||||
|
// Copy for GS in shared mode
|
||||||
|
if (!gs && SharedGS()) {
|
||||||
|
WriteUniformFloatSetupReg(true, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void WriteUniformFloatReg(bool gs, u32 value) {
|
||||||
|
const char* shader_type = gs ? "GS" : "VS";
|
||||||
|
auto& config = gs ? g_state.regs.gs : g_state.regs.vs;
|
||||||
|
auto& setup = gs ? g_state.gs : g_state.vs;
|
||||||
|
|
||||||
|
auto& uniform_setup = config.uniform_setup;
|
||||||
|
auto& uniform_write_buffer = setup.uniform_write_buffer;
|
||||||
|
auto& float_regs_counter = setup.float_regs_counter;
|
||||||
|
|
||||||
|
// TODO: Does actual hardware indeed keep an intermediate buffer or does
|
||||||
|
// it directly write the values?
|
||||||
|
uniform_write_buffer[float_regs_counter++] = value;
|
||||||
|
|
||||||
|
// Uniforms are written in a packed format such that four float24 values are encoded in
|
||||||
|
// three 32-bit numbers. We write to internal memory once a full such vector is
|
||||||
|
// written.
|
||||||
|
if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) ||
|
||||||
|
(float_regs_counter >= 3 && !uniform_setup.IsFloat32())) {
|
||||||
|
float_regs_counter = 0;
|
||||||
|
|
||||||
|
auto& uniform = setup.uniforms.f[uniform_setup.index];
|
||||||
|
|
||||||
|
if (uniform_setup.index >= 96) {
|
||||||
|
LOG_ERROR(HW_GPU, "Invalid %s float uniform index %d", shader_type, (int)uniform_setup.index);
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// NOTE: The destination component order indeed is "backwards"
|
||||||
|
if (uniform_setup.IsFloat32()) {
|
||||||
|
for (auto i : {0,1,2,3})
|
||||||
|
uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i]));
|
||||||
|
} else {
|
||||||
|
// TODO: Untested
|
||||||
|
uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8);
|
||||||
|
uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF));
|
||||||
|
uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | ((uniform_write_buffer[2] >> 24) & 0xFF));
|
||||||
|
uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_TRACE(HW_GPU, "Set %s float uniform %x to (%f %f %f %f)", shader_type, (int)uniform_setup.index,
|
||||||
|
uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(),
|
||||||
|
uniform.w.ToFloat32());
|
||||||
|
|
||||||
|
// TODO: Verify that this actually modifies the register!
|
||||||
|
uniform_setup.index.Assign(uniform_setup.index + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy for GS in shared mode
|
||||||
|
if (!gs && SharedGS()) {
|
||||||
|
WriteUniformFloatReg(true, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void WriteProgramCodeOffset(bool gs, u32 value) {
|
||||||
|
auto& config = gs ? g_state.regs.gs : g_state.regs.vs;
|
||||||
|
config.program.offset = value;
|
||||||
|
|
||||||
|
// Copy for GS in shared mode
|
||||||
|
if (!gs && SharedGS()) {
|
||||||
|
WriteProgramCodeOffset(true, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void WriteProgramCode(bool gs, u32 value) {
|
||||||
|
const char* shader_type = gs ? "GS" : "VS";
|
||||||
|
auto& config = gs ? g_state.regs.gs : g_state.regs.vs;
|
||||||
|
auto& setup = gs ? g_state.gs : g_state.vs;
|
||||||
|
|
||||||
|
if (config.program.offset >= setup.program_code.size()) {
|
||||||
|
LOG_ERROR(HW_GPU, "Invalid %s program offset %d", shader_type, (int)config.program.offset);
|
||||||
|
} else {
|
||||||
|
setup.program_code[config.program.offset] = value;
|
||||||
|
config.program.offset++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy for GS in shared mode
|
||||||
|
if (!gs && SharedGS()) {
|
||||||
|
WriteProgramCode(true, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void WriteSwizzlePatternsOffset(bool gs, u32 value) {
|
||||||
|
auto& config = gs ? g_state.regs.gs : g_state.regs.vs;
|
||||||
|
config.swizzle_patterns.offset = value;
|
||||||
|
|
||||||
|
// Copy for GS in shared mode
|
||||||
|
if (!gs && SharedGS()) {
|
||||||
|
WriteSwizzlePatternsOffset(true, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void WriteSwizzlePatterns(bool gs, u32 value) {
|
||||||
|
const char* shader_type = gs ? "GS" : "VS";
|
||||||
|
auto& config = gs ? g_state.regs.gs : g_state.regs.vs;
|
||||||
|
auto& setup = gs ? g_state.gs : g_state.vs;
|
||||||
|
|
||||||
|
if (config.swizzle_patterns.offset >= setup.swizzle_data.size()) {
|
||||||
|
LOG_ERROR(HW_GPU, "Invalid %s swizzle pattern offset %d", shader_type, (int)config.swizzle_patterns.offset);
|
||||||
|
} else {
|
||||||
|
setup.swizzle_data[config.swizzle_patterns.offset] = value;
|
||||||
|
config.swizzle_patterns.offset++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy for GS in shared mode
|
||||||
|
if (!gs && SharedGS()) {
|
||||||
|
WriteSwizzlePatterns(true, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<bool Debug>
|
||||||
|
void HandleEMIT(UnitState<Debug>& state) {
|
||||||
|
auto &config = g_state.regs.gs;
|
||||||
|
auto &emit_params = state.emit_params;
|
||||||
|
auto &emit_buffers = state.emit_buffers;
|
||||||
|
|
||||||
|
ASSERT(emit_params.vertex_id < 3);
|
||||||
|
|
||||||
|
emit_buffers[emit_params.vertex_id] = state.output_registers;
|
||||||
|
|
||||||
|
if (emit_params.primitive_emit) {
|
||||||
|
ASSERT_MSG(state.emit_triangle_callback, "EMIT invoked but no handler set!");
|
||||||
|
OutputVertex v0 = emit_buffers[0].ToVertex(config);
|
||||||
|
OutputVertex v1 = emit_buffers[1].ToVertex(config);
|
||||||
|
OutputVertex v2 = emit_buffers[2].ToVertex(config);
|
||||||
|
if (emit_params.winding) {
|
||||||
|
state.emit_triangle_callback(v2, v1, v0);
|
||||||
|
} else {
|
||||||
|
state.emit_triangle_callback(v0, v1, v2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Explicit instantiation
|
||||||
|
template void HandleEMIT(UnitState<false>& state);
|
||||||
|
template void HandleEMIT(UnitState<true>& state);
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
|
||||||
} // namespace Pica
|
} // namespace Pica
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
@ -15,6 +16,7 @@
|
|||||||
#include "common/vector_math.h"
|
#include "common/vector_math.h"
|
||||||
|
|
||||||
#include "video_core/pica.h"
|
#include "video_core/pica.h"
|
||||||
|
#include "video_core/primitive_assembly.h"
|
||||||
|
|
||||||
using nihstro::RegisterType;
|
using nihstro::RegisterType;
|
||||||
using nihstro::SourceRegister;
|
using nihstro::SourceRegister;
|
||||||
@ -24,6 +26,11 @@ namespace Pica {
|
|||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
|
#ifdef ARCHITECTURE_x86_64
|
||||||
|
// Forward declare JitShader because shader_jit_x64.h requires ShaderSetup (which uses JitShader) from this file
|
||||||
|
class JitShader;
|
||||||
|
#endif // ARCHITECTURE_x86_64
|
||||||
|
|
||||||
struct InputVertex {
|
struct InputVertex {
|
||||||
Math::Vec4<float24> attr[16];
|
Math::Vec4<float24> attr[16];
|
||||||
};
|
};
|
||||||
@ -77,22 +84,14 @@ struct OutputVertex {
|
|||||||
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
|
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
|
||||||
static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
|
static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
|
||||||
|
|
||||||
/// Vertex shader memory
|
struct OutputRegisters {
|
||||||
struct ShaderSetup {
|
OutputRegisters() = default;
|
||||||
struct {
|
|
||||||
// The float uniforms are accessed by the shader JIT using SSE instructions, and are
|
|
||||||
// therefore required to be 16-byte aligned.
|
|
||||||
alignas(16) Math::Vec4<float24> f[96];
|
|
||||||
|
|
||||||
std::array<bool, 16> b;
|
alignas(16) Math::Vec4<float24> value[16];
|
||||||
std::array<Math::Vec4<u8>, 4> i;
|
|
||||||
} uniforms;
|
|
||||||
|
|
||||||
Math::Vec4<float24> default_attributes[16];
|
OutputVertex ToVertex(const Regs::ShaderConfig& config);
|
||||||
|
|
||||||
std::array<u32, 1024> program_code;
|
|
||||||
std::array<u32, 1024> swizzle_data;
|
|
||||||
};
|
};
|
||||||
|
static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD");
|
||||||
|
|
||||||
// Helper structure used to keep track of data useful for inspection of shader emulation
|
// Helper structure used to keep track of data useful for inspection of shader emulation
|
||||||
template<bool full_debugging>
|
template<bool full_debugging>
|
||||||
@ -192,9 +191,9 @@ inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* va
|
|||||||
record.src3.x = value[0];
|
record.src3.x = value[0];
|
||||||
record.src3.y = value[1];
|
record.src3.y = value[1];
|
||||||
record.src3.z = value[2];
|
record.src3.z = value[2];
|
||||||
|
|
||||||
record.src3.w = value[3];
|
record.src3.w = value[3];
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) {
|
inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) {
|
||||||
record.dest_in.x = value[0];
|
record.dest_in.x = value[0];
|
||||||
@ -277,43 +276,38 @@ struct UnitState {
|
|||||||
// The registers are accessed by the shader JIT using SSE instructions, and are therefore
|
// The registers are accessed by the shader JIT using SSE instructions, and are therefore
|
||||||
// required to be 16-byte aligned.
|
// required to be 16-byte aligned.
|
||||||
alignas(16) Math::Vec4<float24> input[16];
|
alignas(16) Math::Vec4<float24> input[16];
|
||||||
alignas(16) Math::Vec4<float24> output[16];
|
|
||||||
alignas(16) Math::Vec4<float24> temporary[16];
|
alignas(16) Math::Vec4<float24> temporary[16];
|
||||||
} registers;
|
} registers;
|
||||||
static_assert(std::is_pod<Registers>::value, "Structure is not POD");
|
static_assert(std::is_pod<Registers>::value, "Structure is not POD");
|
||||||
|
|
||||||
u32 program_counter;
|
OutputRegisters emit_buffers[3]; //TODO: 3dbrew suggests this only stores the first 7 output registers
|
||||||
|
|
||||||
|
union EmitParameters {
|
||||||
|
u32 raw;
|
||||||
|
BitField<22, 1, u32> winding;
|
||||||
|
BitField<23, 1, u32> primitive_emit;
|
||||||
|
BitField<24, 2, u32> vertex_id;
|
||||||
|
} emit_params;
|
||||||
|
|
||||||
|
PrimitiveAssembler<OutputVertex>::TriangleHandler emit_triangle_callback;
|
||||||
|
|
||||||
|
OutputRegisters output_registers;
|
||||||
|
|
||||||
bool conditional_code[2];
|
bool conditional_code[2];
|
||||||
|
|
||||||
// Two Address registers and one loop counter
|
// Two Address registers and one loop counter
|
||||||
// TODO: How many bits do these actually have?
|
// TODO: How many bits do these actually have?
|
||||||
s32 address_registers[3];
|
s32 address_registers[3];
|
||||||
|
|
||||||
enum {
|
|
||||||
INVALID_ADDRESS = 0xFFFFFFFF
|
|
||||||
};
|
|
||||||
|
|
||||||
struct CallStackElement {
|
|
||||||
u32 final_address; // Address upon which we jump to return_address
|
|
||||||
u32 return_address; // Where to jump when leaving scope
|
|
||||||
u8 repeat_counter; // How often to repeat until this call stack element is removed
|
|
||||||
u8 loop_increment; // Which value to add to the loop counter after an iteration
|
|
||||||
// TODO: Should this be a signed value? Does it even matter?
|
|
||||||
u32 loop_address; // The address where we'll return to after each loop iteration
|
|
||||||
};
|
|
||||||
|
|
||||||
// TODO: Is there a maximal size for this?
|
|
||||||
boost::container::static_vector<CallStackElement, 16> call_stack;
|
|
||||||
|
|
||||||
DebugData<Debug> debug;
|
DebugData<Debug> debug;
|
||||||
|
|
||||||
static size_t InputOffset(const SourceRegister& reg) {
|
static size_t InputOffset(const SourceRegister& reg) {
|
||||||
switch (reg.GetRegisterType()) {
|
switch (reg.GetRegisterType()) {
|
||||||
case RegisterType::Input:
|
case RegisterType::Input:
|
||||||
return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
||||||
|
|
||||||
case RegisterType::Temporary:
|
case RegisterType::Temporary:
|
||||||
return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
@ -324,45 +318,105 @@ struct UnitState {
|
|||||||
static size_t OutputOffset(const DestRegister& reg) {
|
static size_t OutputOffset(const DestRegister& reg) {
|
||||||
switch (reg.GetRegisterType()) {
|
switch (reg.GetRegisterType()) {
|
||||||
case RegisterType::Output:
|
case RegisterType::Output:
|
||||||
return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
return offsetof(UnitState, output_registers.value) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
||||||
|
|
||||||
case RegisterType::Temporary:
|
case RegisterType::Temporary:
|
||||||
return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t EmitParamsOffset() {
|
||||||
|
return offsetof(UnitState, emit_params.raw);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
class ShaderSetup {
|
||||||
* Performs any shader unit setup that only needs to happen once per shader (as opposed to once per
|
|
||||||
* vertex, which would happen within the `Run` function).
|
|
||||||
*/
|
|
||||||
void Setup();
|
|
||||||
|
|
||||||
/// Performs any cleanup when the emulator is shutdown
|
public:
|
||||||
void Shutdown();
|
|
||||||
|
|
||||||
/**
|
struct {
|
||||||
* Runs the currently setup shader
|
// The float uniforms are accessed by the shader JIT using SSE instructions, and are
|
||||||
* @param state Shader unit state, must be setup per shader and per shader unit
|
// therefore required to be 16-byte aligned.
|
||||||
* @param input Input vertex into the shader
|
alignas(16) Math::Vec4<float24> f[96];
|
||||||
* @param num_attributes The number of vertex shader attributes
|
|
||||||
* @return The output vertex, after having been processed by the vertex shader
|
|
||||||
*/
|
|
||||||
OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes);
|
|
||||||
|
|
||||||
/**
|
std::array<bool, 16> b;
|
||||||
* Produce debug information based on the given shader and input vertex
|
std::array<Math::Vec4<u8>, 4> i;
|
||||||
* @param input Input vertex into the shader
|
} uniforms;
|
||||||
* @param num_attributes The number of vertex shader attributes
|
|
||||||
* @param config Configuration object for the shader pipeline
|
static size_t UniformOffset(RegisterType type, unsigned index) {
|
||||||
* @param setup Setup object for the shader pipeline
|
switch (type) {
|
||||||
* @return Debug information for this shader with regards to the given vertex
|
case RegisterType::FloatUniform:
|
||||||
*/
|
return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>);
|
||||||
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup);
|
|
||||||
|
case RegisterType::BoolUniform:
|
||||||
|
return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool);
|
||||||
|
|
||||||
|
case RegisterType::IntUniform:
|
||||||
|
return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>);
|
||||||
|
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int float_regs_counter = 0;
|
||||||
|
u32 uniform_write_buffer[4];
|
||||||
|
|
||||||
|
std::array<u32, 1024> program_code;
|
||||||
|
std::array<u32, 1024> swizzle_data;
|
||||||
|
|
||||||
|
#ifdef ARCHITECTURE_x86_64
|
||||||
|
std::weak_ptr<const JitShader> jit_shader;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs any shader setup that only needs to happen once per shader (as opposed to once per
|
||||||
|
* vertex, which would happen within the `Run` function).
|
||||||
|
*/
|
||||||
|
void Setup();
|
||||||
|
|
||||||
|
/// Performs any cleanup when the emulator is shutdown
|
||||||
|
static void Shutdown();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs the currently setup shader
|
||||||
|
* @param state Shader unit state, must be setup per shader and per shader unit
|
||||||
|
* @param input Input vertex into the shader
|
||||||
|
* @param num_attributes The number of vertex shader attributes
|
||||||
|
* @param config Configuration object for the shader pipeline
|
||||||
|
*/
|
||||||
|
void Run(UnitState<false>& state, const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Produce debug information based on the given shader and input vertex
|
||||||
|
* @param input Input vertex into the shader
|
||||||
|
* @param num_attributes The number of vertex shader attributes
|
||||||
|
* @param config Configuration object for the shader pipeline
|
||||||
|
* @return Debug information for this shader with regards to the given vertex
|
||||||
|
*/
|
||||||
|
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
bool SharedGS();
|
||||||
|
bool UseGS();
|
||||||
|
UnitState<false>& GetShaderUnit(bool gs);
|
||||||
|
void WriteUniformBoolReg(bool gs, u32 value);
|
||||||
|
void WriteUniformIntReg(bool gs, unsigned index, const Math::Vec4<u8>& values);
|
||||||
|
void WriteUniformFloatSetupReg(bool gs, u32 value);
|
||||||
|
void WriteUniformFloatReg(bool gs, u32 value);
|
||||||
|
void WriteProgramCodeOffset(bool gs, u32 value);
|
||||||
|
void WriteProgramCode(bool gs, u32 value);
|
||||||
|
void WriteSwizzlePatternsOffset(bool gs, u32 value);
|
||||||
|
void WriteSwizzlePatterns(bool gs, u32 value);
|
||||||
|
|
||||||
|
template<bool Debug>
|
||||||
|
void HandleEMIT(UnitState<Debug>& state);
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
|
||||||
|
@ -21,11 +21,30 @@ namespace Pica {
|
|||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
|
enum {
|
||||||
|
INVALID_ADDRESS = 0xFFFFFFFF
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CallStackElement {
|
||||||
|
u32 final_address; // Address upon which we jump to return_address
|
||||||
|
u32 return_address; // Where to jump when leaving scope
|
||||||
|
u8 repeat_counter; // How often to repeat until this call stack element is removed
|
||||||
|
u8 loop_increment; // Which value to add to the loop counter after an iteration
|
||||||
|
// TODO: Should this be a signed value? Does it even matter?
|
||||||
|
u32 loop_address; // The address where we'll return to after each loop iteration
|
||||||
|
};
|
||||||
|
|
||||||
template<bool Debug>
|
template<bool Debug>
|
||||||
void RunInterpreter(UnitState<Debug>& state) {
|
void RunInterpreter(const Pica::Regs::ShaderConfig& config, const ShaderSetup& setup, UnitState<Debug>& state) {
|
||||||
const auto& uniforms = g_state.vs.uniforms;
|
|
||||||
const auto& swizzle_data = g_state.vs.swizzle_data;
|
// TODO: Is there a maximal size for this?
|
||||||
const auto& program_code = g_state.vs.program_code;
|
boost::container::static_vector<CallStackElement, 16> call_stack;
|
||||||
|
|
||||||
|
u32 program_counter = config.main_offset;
|
||||||
|
|
||||||
|
const auto& uniforms = setup.uniforms;
|
||||||
|
const auto& swizzle_data = setup.swizzle_data;
|
||||||
|
const auto& program_code = setup.program_code;
|
||||||
|
|
||||||
// Placeholder for invalid inputs
|
// Placeholder for invalid inputs
|
||||||
static float24 dummy_vec4_float24[4];
|
static float24 dummy_vec4_float24[4];
|
||||||
@ -33,16 +52,16 @@ void RunInterpreter(UnitState<Debug>& state) {
|
|||||||
unsigned iteration = 0;
|
unsigned iteration = 0;
|
||||||
bool exit_loop = false;
|
bool exit_loop = false;
|
||||||
while (!exit_loop) {
|
while (!exit_loop) {
|
||||||
if (!state.call_stack.empty()) {
|
if (!call_stack.empty()) {
|
||||||
auto& top = state.call_stack.back();
|
auto& top = call_stack.back();
|
||||||
if (state.program_counter == top.final_address) {
|
if (program_counter == top.final_address) {
|
||||||
state.address_registers[2] += top.loop_increment;
|
state.address_registers[2] += top.loop_increment;
|
||||||
|
|
||||||
if (top.repeat_counter-- == 0) {
|
if (top.repeat_counter-- == 0) {
|
||||||
state.program_counter = top.return_address;
|
program_counter = top.return_address;
|
||||||
state.call_stack.pop_back();
|
call_stack.pop_back();
|
||||||
} else {
|
} else {
|
||||||
state.program_counter = top.loop_address;
|
program_counter = top.loop_address;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Is "trying again" accurate to hardware?
|
// TODO: Is "trying again" accurate to hardware?
|
||||||
@ -50,20 +69,20 @@ void RunInterpreter(UnitState<Debug>& state) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const Instruction instr = { program_code[state.program_counter] };
|
const Instruction instr = { program_code[program_counter] };
|
||||||
const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] };
|
const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] };
|
||||||
|
|
||||||
static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions,
|
static auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions,
|
||||||
u32 return_offset, u8 repeat_count, u8 loop_increment) {
|
u32 return_offset, u8 repeat_count, u8 loop_increment) {
|
||||||
state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
|
program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
|
||||||
ASSERT(state.call_stack.size() < state.call_stack.capacity());
|
ASSERT(call_stack.size() < call_stack.capacity());
|
||||||
state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset });
|
call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset });
|
||||||
};
|
};
|
||||||
Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter);
|
Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter);
|
||||||
if (iteration > 0)
|
if (iteration > 0)
|
||||||
Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter);
|
Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, program_counter);
|
||||||
|
|
||||||
state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter);
|
state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + program_counter);
|
||||||
|
|
||||||
auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
|
auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
|
||||||
switch (source_reg.GetRegisterType()) {
|
switch (source_reg.GetRegisterType()) {
|
||||||
@ -120,7 +139,7 @@ void RunInterpreter(UnitState<Debug>& state) {
|
|||||||
src2[3] = src2[3] * float24::FromFloat32(-1);
|
src2[3] = src2[3] * float24::FromFloat32(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
float24* dest = (instr.common.dest.Value() < 0x10) ? &state.registers.output[instr.common.dest.Value().GetIndex()][0]
|
float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0]
|
||||||
: (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
|
: (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
|
||||||
: dummy_vec4_float24;
|
: dummy_vec4_float24;
|
||||||
|
|
||||||
@ -459,7 +478,7 @@ void RunInterpreter(UnitState<Debug>& state) {
|
|||||||
src3[3] = src3[3] * float24::FromFloat32(-1);
|
src3[3] = src3[3] * float24::FromFloat32(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0]
|
float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0]
|
||||||
: (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
|
: (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
|
||||||
: dummy_vec4_float24;
|
: dummy_vec4_float24;
|
||||||
|
|
||||||
@ -511,7 +530,7 @@ void RunInterpreter(UnitState<Debug>& state) {
|
|||||||
case OpCode::Id::JMPC:
|
case OpCode::Id::JMPC:
|
||||||
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
|
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
|
||||||
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
|
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
|
||||||
state.program_counter = instr.flow_control.dest_offset - 1;
|
program_counter = instr.flow_control.dest_offset - 1;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -519,7 +538,7 @@ void RunInterpreter(UnitState<Debug>& state) {
|
|||||||
Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
|
Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
|
||||||
|
|
||||||
if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) {
|
if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) {
|
||||||
state.program_counter = instr.flow_control.dest_offset - 1;
|
program_counter = instr.flow_control.dest_offset - 1;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -527,7 +546,7 @@ void RunInterpreter(UnitState<Debug>& state) {
|
|||||||
call(state,
|
call(state,
|
||||||
instr.flow_control.dest_offset,
|
instr.flow_control.dest_offset,
|
||||||
instr.flow_control.num_instructions,
|
instr.flow_control.num_instructions,
|
||||||
state.program_counter + 1, 0, 0);
|
program_counter + 1, 0, 0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OpCode::Id::CALLU:
|
case OpCode::Id::CALLU:
|
||||||
@ -536,7 +555,7 @@ void RunInterpreter(UnitState<Debug>& state) {
|
|||||||
call(state,
|
call(state,
|
||||||
instr.flow_control.dest_offset,
|
instr.flow_control.dest_offset,
|
||||||
instr.flow_control.num_instructions,
|
instr.flow_control.num_instructions,
|
||||||
state.program_counter + 1, 0, 0);
|
program_counter + 1, 0, 0);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -546,7 +565,7 @@ void RunInterpreter(UnitState<Debug>& state) {
|
|||||||
call(state,
|
call(state,
|
||||||
instr.flow_control.dest_offset,
|
instr.flow_control.dest_offset,
|
||||||
instr.flow_control.num_instructions,
|
instr.flow_control.num_instructions,
|
||||||
state.program_counter + 1, 0, 0);
|
program_counter + 1, 0, 0);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -557,8 +576,8 @@ void RunInterpreter(UnitState<Debug>& state) {
|
|||||||
Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
|
Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
|
||||||
if (uniforms.b[instr.flow_control.bool_uniform_id]) {
|
if (uniforms.b[instr.flow_control.bool_uniform_id]) {
|
||||||
call(state,
|
call(state,
|
||||||
state.program_counter + 1,
|
program_counter + 1,
|
||||||
instr.flow_control.dest_offset - state.program_counter - 1,
|
instr.flow_control.dest_offset - program_counter - 1,
|
||||||
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
|
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
|
||||||
} else {
|
} else {
|
||||||
call(state,
|
call(state,
|
||||||
@ -576,8 +595,8 @@ void RunInterpreter(UnitState<Debug>& state) {
|
|||||||
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
|
Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
|
||||||
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
|
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
|
||||||
call(state,
|
call(state,
|
||||||
state.program_counter + 1,
|
program_counter + 1,
|
||||||
instr.flow_control.dest_offset - state.program_counter - 1,
|
instr.flow_control.dest_offset - program_counter - 1,
|
||||||
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
|
instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
|
||||||
} else {
|
} else {
|
||||||
call(state,
|
call(state,
|
||||||
@ -599,14 +618,24 @@ void RunInterpreter(UnitState<Debug>& state) {
|
|||||||
|
|
||||||
Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param);
|
Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param);
|
||||||
call(state,
|
call(state,
|
||||||
state.program_counter + 1,
|
program_counter + 1,
|
||||||
instr.flow_control.dest_offset - state.program_counter + 1,
|
instr.flow_control.dest_offset - program_counter + 1,
|
||||||
instr.flow_control.dest_offset + 1,
|
instr.flow_control.dest_offset + 1,
|
||||||
loop_param.x,
|
loop_param.x,
|
||||||
loop_param.z);
|
loop_param.z);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case OpCode::Id::EMIT: {
|
||||||
|
Shader::HandleEMIT(state);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case OpCode::Id::SETEMIT: {
|
||||||
|
state.emit_params.raw = program_code[program_counter];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
|
LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
|
||||||
(int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
|
(int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
|
||||||
@ -617,14 +646,14 @@ void RunInterpreter(UnitState<Debug>& state) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
++state.program_counter;
|
++program_counter;
|
||||||
++iteration;
|
++iteration;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Explicit instantiation
|
// Explicit instantiation
|
||||||
template void RunInterpreter(UnitState<false>& state);
|
template void RunInterpreter(const Pica::Regs::ShaderConfig& config, const ShaderSetup& setup, UnitState<false>& state);
|
||||||
template void RunInterpreter(UnitState<true>& state);
|
template void RunInterpreter(const Pica::Regs::ShaderConfig& config, const ShaderSetup& setup, UnitState<true>& state);
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "video_core/pica.h"
|
||||||
#include "video_core/shader/shader.h"
|
#include "video_core/shader/shader.h"
|
||||||
|
|
||||||
namespace Pica {
|
namespace Pica {
|
||||||
@ -11,7 +12,7 @@ namespace Pica {
|
|||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
template<bool Debug>
|
template<bool Debug>
|
||||||
void RunInterpreter(UnitState<Debug>& state);
|
void RunInterpreter(const Pica::Regs::ShaderConfig& config, const ShaderSetup& setup, UnitState<Debug>& state);
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
@ -65,8 +65,8 @@ const JitFunction instr_table[64] = {
|
|||||||
&JitShader::Compile_IF, // ifu
|
&JitShader::Compile_IF, // ifu
|
||||||
&JitShader::Compile_IF, // ifc
|
&JitShader::Compile_IF, // ifc
|
||||||
&JitShader::Compile_LOOP, // loop
|
&JitShader::Compile_LOOP, // loop
|
||||||
nullptr, // emit
|
&JitShader::Compile_EMIT, // emit
|
||||||
nullptr, // sete
|
&JitShader::Compile_SETEMIT, // setemit
|
||||||
&JitShader::Compile_JMP, // jmpc
|
&JitShader::Compile_JMP, // jmpc
|
||||||
&JitShader::Compile_JMP, // jmpu
|
&JitShader::Compile_JMP, // jmpu
|
||||||
&JitShader::Compile_CMP, // cmp
|
&JitShader::Compile_CMP, // cmp
|
||||||
@ -94,7 +94,7 @@ const JitFunction instr_table[64] = {
|
|||||||
// purposes, as documented below:
|
// purposes, as documented below:
|
||||||
|
|
||||||
/// Pointer to the uniform memory
|
/// Pointer to the uniform memory
|
||||||
static const X64Reg UNIFORMS = R9;
|
static const X64Reg SETUP = R9;
|
||||||
/// The two 32-bit VS address offset registers set by the MOVA instruction
|
/// The two 32-bit VS address offset registers set by the MOVA instruction
|
||||||
static const X64Reg ADDROFFS_REG_0 = R10;
|
static const X64Reg ADDROFFS_REG_0 = R10;
|
||||||
static const X64Reg ADDROFFS_REG_1 = R11;
|
static const X64Reg ADDROFFS_REG_1 = R11;
|
||||||
@ -109,7 +109,7 @@ static const X64Reg COND0 = R13;
|
|||||||
/// Result of the previous CMP instruction for the Y-component comparison
|
/// Result of the previous CMP instruction for the Y-component comparison
|
||||||
static const X64Reg COND1 = R14;
|
static const X64Reg COND1 = R14;
|
||||||
/// Pointer to the UnitState instance for the current VS unit
|
/// Pointer to the UnitState instance for the current VS unit
|
||||||
static const X64Reg REGISTERS = R15;
|
static const X64Reg STATE = R15;
|
||||||
/// SIMD scratch register
|
/// SIMD scratch register
|
||||||
static const X64Reg SCRATCH = XMM0;
|
static const X64Reg SCRATCH = XMM0;
|
||||||
/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
|
/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
|
||||||
@ -128,7 +128,7 @@ static const X64Reg NEGBIT = XMM15;
|
|||||||
// State registers that must not be modified by external functions calls
|
// State registers that must not be modified by external functions calls
|
||||||
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
|
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
|
||||||
static const BitSet32 persistent_regs = {
|
static const BitSet32 persistent_regs = {
|
||||||
UNIFORMS, REGISTERS, // Pointers to register blocks
|
SETUP, STATE, // Pointers to register blocks
|
||||||
ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
|
ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
|
||||||
ONE+16, NEGBIT+16, // Constants
|
ONE+16, NEGBIT+16, // Constants
|
||||||
};
|
};
|
||||||
@ -138,15 +138,6 @@ static const u8 NO_SRC_REG_SWIZZLE = 0x1b;
|
|||||||
/// Raw constant for the destination register enable mask that indicates all components are enabled
|
/// Raw constant for the destination register enable mask that indicates all components are enabled
|
||||||
static const u8 NO_DEST_REG_MASK = 0xf;
|
static const u8 NO_DEST_REG_MASK = 0xf;
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the vertex shader instruction for a given offset in the current shader program
|
|
||||||
* @param offset Offset in the current shader program of the instruction
|
|
||||||
* @return Instruction at the specified offset
|
|
||||||
*/
|
|
||||||
static Instruction GetVertexShaderInstruction(size_t offset) {
|
|
||||||
return { g_state.vs.program_code[offset] };
|
|
||||||
}
|
|
||||||
|
|
||||||
static void LogCritical(const char* msg) {
|
static void LogCritical(const char* msg) {
|
||||||
LOG_CRITICAL(HW_GPU, msg);
|
LOG_CRITICAL(HW_GPU, msg);
|
||||||
}
|
}
|
||||||
@ -169,10 +160,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
|
|||||||
size_t src_offset;
|
size_t src_offset;
|
||||||
|
|
||||||
if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
|
if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
|
||||||
src_ptr = UNIFORMS;
|
src_ptr = SETUP;
|
||||||
src_offset = src_reg.GetIndex() * sizeof(float24) * 4;
|
src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex());
|
||||||
} else {
|
} else {
|
||||||
src_ptr = REGISTERS;
|
src_ptr = STATE;
|
||||||
src_offset = UnitState<false>::InputOffset(src_reg);
|
src_offset = UnitState<false>::InputOffset(src_reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -217,7 +208,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
|
|||||||
MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
|
MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
|
||||||
}
|
}
|
||||||
|
|
||||||
SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] };
|
SwizzlePattern swiz = { setup->swizzle_data[operand_desc_id] };
|
||||||
|
|
||||||
// Generate instructions for source register swizzling as needed
|
// Generate instructions for source register swizzling as needed
|
||||||
u8 sel = swiz.GetRawSelector(src_num);
|
u8 sel = swiz.GetRawSelector(src_num);
|
||||||
@ -248,7 +239,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
|
|||||||
dest = instr.common.dest.Value();
|
dest = instr.common.dest.Value();
|
||||||
}
|
}
|
||||||
|
|
||||||
SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] };
|
SwizzlePattern swiz = { setup->swizzle_data[operand_desc_id] };
|
||||||
|
|
||||||
int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest);
|
int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest);
|
||||||
ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), "Destinaton offset too large for int type");
|
ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), "Destinaton offset too large for int type");
|
||||||
@ -256,11 +247,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
|
|||||||
// If all components are enabled, write the result to the destination register
|
// If all components are enabled, write the result to the destination register
|
||||||
if (swiz.dest_mask == NO_DEST_REG_MASK) {
|
if (swiz.dest_mask == NO_DEST_REG_MASK) {
|
||||||
// Store dest back to memory
|
// Store dest back to memory
|
||||||
MOVAPS(MDisp(REGISTERS, dest_offset_disp), src);
|
MOVAPS(MDisp(STATE, dest_offset_disp), src);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// Not all components are enabled, so mask the result when storing to the destination register...
|
// Not all components are enabled, so mask the result when storing to the destination register...
|
||||||
MOVAPS(SCRATCH, MDisp(REGISTERS, dest_offset_disp));
|
MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp));
|
||||||
|
|
||||||
if (Common::GetCPUCaps().sse4_1) {
|
if (Common::GetCPUCaps().sse4_1) {
|
||||||
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
|
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
|
||||||
@ -279,7 +270,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Store dest back to memory
|
// Store dest back to memory
|
||||||
MOVAPS(MDisp(REGISTERS, dest_offset_disp), SCRATCH);
|
MOVAPS(MDisp(STATE, dest_offset_disp), SCRATCH);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -328,8 +319,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void JitShader::Compile_UniformCondition(Instruction instr) {
|
void JitShader::Compile_UniformCondition(Instruction instr) {
|
||||||
int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool));
|
int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
|
||||||
CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
|
CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
BitSet32 JitShader::PersistentCallerSavedRegs() {
|
BitSet32 JitShader::PersistentCallerSavedRegs() {
|
||||||
@ -504,7 +495,7 @@ void JitShader::Compile_MIN(Instruction instr) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void JitShader::Compile_MOVA(Instruction instr) {
|
void JitShader::Compile_MOVA(Instruction instr) {
|
||||||
SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] };
|
SwizzlePattern swiz = { setup->swizzle_data[instr.common.operand_desc_id] };
|
||||||
|
|
||||||
if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
|
if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
|
||||||
return; // NoOp
|
return; // NoOp
|
||||||
@ -706,8 +697,8 @@ void JitShader::Compile_LOOP(Instruction instr) {
|
|||||||
|
|
||||||
looping = true;
|
looping = true;
|
||||||
|
|
||||||
int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>));
|
int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
|
||||||
MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset));
|
MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
|
||||||
MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
|
MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
|
||||||
SHR(32, R(LOOPCOUNT_REG), Imm8(8));
|
SHR(32, R(LOOPCOUNT_REG), Imm8(8));
|
||||||
AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
|
AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
|
||||||
@ -728,6 +719,22 @@ void JitShader::Compile_LOOP(Instruction instr) {
|
|||||||
looping = false;
|
looping = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void Handle_EMIT(void* param1) {
|
||||||
|
UnitState<false>& state = *static_cast<UnitState<false>*>(param1);
|
||||||
|
Shader::HandleEMIT(state);
|
||||||
|
};
|
||||||
|
|
||||||
|
void JitShader::Compile_EMIT(Instruction instr) {
|
||||||
|
ABI_PushRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
|
||||||
|
MOV(PTRBITS, R(ABI_PARAM1), R(STATE));
|
||||||
|
ABI_CallFunctionR(reinterpret_cast<const void*>(Handle_EMIT), ABI_PARAM1);
|
||||||
|
ABI_PopRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void JitShader::Compile_SETEMIT(Instruction instr) {
|
||||||
|
MOV(32, MDisp(STATE, UnitState<false>::EmitParamsOffset()), Imm32(*(u32*)&instr.setemit));
|
||||||
|
}
|
||||||
|
|
||||||
void JitShader::Compile_JMP(Instruction instr) {
|
void JitShader::Compile_JMP(Instruction instr) {
|
||||||
if (instr.opcode.Value() == OpCode::Id::JMPC)
|
if (instr.opcode.Value() == OpCode::Id::JMPC)
|
||||||
Compile_EvaluateCondition(instr);
|
Compile_EvaluateCondition(instr);
|
||||||
@ -768,7 +775,7 @@ void JitShader::Compile_NextInstr() {
|
|||||||
ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!");
|
ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!");
|
||||||
code_ptr[program_counter] = GetCodePtr();
|
code_ptr[program_counter] = GetCodePtr();
|
||||||
|
|
||||||
Instruction instr = GetVertexShaderInstruction(program_counter++);
|
Instruction instr = GetShaderInstruction(program_counter++);
|
||||||
|
|
||||||
OpCode::Id opcode = instr.opcode.Value();
|
OpCode::Id opcode = instr.opcode.Value();
|
||||||
auto instr_func = instr_table[static_cast<unsigned>(opcode)];
|
auto instr_func = instr_table[static_cast<unsigned>(opcode)];
|
||||||
@ -786,8 +793,8 @@ void JitShader::Compile_NextInstr() {
|
|||||||
void JitShader::FindReturnOffsets() {
|
void JitShader::FindReturnOffsets() {
|
||||||
return_offsets.clear();
|
return_offsets.clear();
|
||||||
|
|
||||||
for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) {
|
for (size_t offset = 0; offset < setup->program_code.size(); ++offset) {
|
||||||
Instruction instr = GetVertexShaderInstruction(offset);
|
Instruction instr = GetShaderInstruction(offset);
|
||||||
|
|
||||||
switch (instr.opcode.Value()) {
|
switch (instr.opcode.Value()) {
|
||||||
case OpCode::Id::CALL:
|
case OpCode::Id::CALL:
|
||||||
@ -802,7 +809,11 @@ void JitShader::FindReturnOffsets() {
|
|||||||
std::sort(return_offsets.begin(), return_offsets.end());
|
std::sort(return_offsets.begin(), return_offsets.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitShader::Compile() {
|
void JitShader::Compile(const ShaderSetup& setup) {
|
||||||
|
|
||||||
|
// Get a pointer to the setup to access program_code and swizzle_data
|
||||||
|
this->setup = &setup;
|
||||||
|
|
||||||
// Reset flow control state
|
// Reset flow control state
|
||||||
program = (CompiledShader*)GetCodePtr();
|
program = (CompiledShader*)GetCodePtr();
|
||||||
program_counter = 0;
|
program_counter = 0;
|
||||||
@ -816,8 +827,8 @@ void JitShader::Compile() {
|
|||||||
// The stack pointer is 8 modulo 16 at the entry of a procedure
|
// The stack pointer is 8 modulo 16 at the entry of a procedure
|
||||||
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
|
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
|
||||||
|
|
||||||
MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1));
|
MOV(PTRBITS, R(SETUP), R(ABI_PARAM1));
|
||||||
MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
|
MOV(PTRBITS, R(STATE), R(ABI_PARAM3));
|
||||||
|
|
||||||
// Zero address/loop registers
|
// Zero address/loop registers
|
||||||
XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0));
|
XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0));
|
||||||
@ -838,7 +849,7 @@ void JitShader::Compile() {
|
|||||||
JMPptr(R(ABI_PARAM2));
|
JMPptr(R(ABI_PARAM2));
|
||||||
|
|
||||||
// Compile entire program
|
// Compile entire program
|
||||||
Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
|
Compile_Block(static_cast<unsigned>(this->setup->program_code.size()));
|
||||||
|
|
||||||
// Set the target for any incomplete branches now that the entire shader program has been emitted
|
// Set the target for any incomplete branches now that the entire shader program has been emitted
|
||||||
for (const auto& branch : fixup_branches) {
|
for (const auto& branch : fixup_branches) {
|
||||||
@ -855,6 +866,10 @@ void JitShader::Compile() {
|
|||||||
ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
|
ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
|
||||||
|
|
||||||
LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size);
|
LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size);
|
||||||
|
|
||||||
|
// We don't need the setup anymore
|
||||||
|
this->setup = nullptr;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
JitShader::JitShader() {
|
JitShader::JitShader() {
|
||||||
|
@ -33,11 +33,11 @@ class JitShader : public Gen::XCodeBlock {
|
|||||||
public:
|
public:
|
||||||
JitShader();
|
JitShader();
|
||||||
|
|
||||||
void Run(void* registers, unsigned offset) const {
|
void Run(const Pica::Regs::ShaderConfig& config, const ShaderSetup& setup, UnitState<false>& state) const {
|
||||||
program(registers, code_ptr[offset]);
|
program(&setup, code_ptr[config.main_offset], &state);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compile();
|
void Compile(const ShaderSetup& setup);
|
||||||
|
|
||||||
void Compile_ADD(Instruction instr);
|
void Compile_ADD(Instruction instr);
|
||||||
void Compile_DP3(Instruction instr);
|
void Compile_DP3(Instruction instr);
|
||||||
@ -62,6 +62,8 @@ public:
|
|||||||
void Compile_CALLU(Instruction instr);
|
void Compile_CALLU(Instruction instr);
|
||||||
void Compile_IF(Instruction instr);
|
void Compile_IF(Instruction instr);
|
||||||
void Compile_LOOP(Instruction instr);
|
void Compile_LOOP(Instruction instr);
|
||||||
|
void Compile_EMIT(Instruction instr);
|
||||||
|
void Compile_SETEMIT(Instruction instr);
|
||||||
void Compile_JMP(Instruction instr);
|
void Compile_JMP(Instruction instr);
|
||||||
void Compile_CMP(Instruction instr);
|
void Compile_CMP(Instruction instr);
|
||||||
void Compile_MAD(Instruction instr);
|
void Compile_MAD(Instruction instr);
|
||||||
@ -96,6 +98,17 @@ private:
|
|||||||
*/
|
*/
|
||||||
void Compile_Assert(bool condition, const char* msg);
|
void Compile_Assert(bool condition, const char* msg);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the shader instruction for a given offset in the current shader program
|
||||||
|
* @param offset Offset in the current shader program of the instruction
|
||||||
|
* @return Instruction at the specified offset
|
||||||
|
*/
|
||||||
|
Instruction GetShaderInstruction(size_t offset) {
|
||||||
|
Instruction instruction;
|
||||||
|
std::memcpy(&instruction, &setup->program_code[offset], sizeof(Instruction));
|
||||||
|
return instruction;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Analyzes the entire shader program for `CALL` instructions before emitting any code,
|
* Analyzes the entire shader program for `CALL` instructions before emitting any code,
|
||||||
* identifying the locations where a return needs to be inserted.
|
* identifying the locations where a return needs to be inserted.
|
||||||
@ -114,8 +127,10 @@ private:
|
|||||||
/// Branches that need to be fixed up once the entire shader program is compiled
|
/// Branches that need to be fixed up once the entire shader program is compiled
|
||||||
std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
|
std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
|
||||||
|
|
||||||
using CompiledShader = void(void* registers, const u8* start_addr);
|
using CompiledShader = void(const void* setup, const u8* start_addr, void* state);
|
||||||
CompiledShader* program = nullptr;
|
CompiledShader* program = nullptr;
|
||||||
|
|
||||||
|
const ShaderSetup* setup = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // Shader
|
} // Shader
|
||||||
|
Loading…
Reference in New Issue
Block a user