mirror of
https://github.com/citra-emu/citra.git
synced 2025-02-21 17:50:07 +00:00
Merge remote-tracking branch 'JayFoxRox/gs-new-refactor' into orange
This commit is contained in:
commit
55375a91b1
@ -45,6 +45,7 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const
|
||||
{ Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") },
|
||||
{ Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") },
|
||||
{ Pica::DebugContext::Event::VertexShaderInvocation, tr("Vertex shader invocation") },
|
||||
{ Pica::DebugContext::Event::GeometryShaderInvocation, tr("Geometry shader invocation") },
|
||||
{ Pica::DebugContext::Event::IncomingDisplayTransfer, tr("Incoming display transfer") },
|
||||
{ Pica::DebugContext::Event::GSPCommandProcessed, tr("GSP command processed") },
|
||||
{ Pica::DebugContext::Event::BufferSwapped, tr("Buffers swapped") }
|
||||
|
@ -501,7 +501,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d
|
||||
info.labels.insert({ entry_point, "main" });
|
||||
|
||||
// Generate debug information
|
||||
debug_data = Pica::g_state.vs.ProduceDebugInfo(input_vertex, num_attributes, shader_config, shader_setup);
|
||||
debug_data = Pica::g_state.vs.ProduceDebugInfo(input_vertex, num_attributes, shader_config);
|
||||
|
||||
// Reload widget state
|
||||
for (int attr = 0; attr < num_attributes; ++attr) {
|
||||
|
@ -33,10 +33,6 @@ namespace Pica {
|
||||
|
||||
namespace CommandProcessor {
|
||||
|
||||
static int float_regs_counter = 0;
|
||||
|
||||
static u32 uniform_write_buffer[4];
|
||||
|
||||
static int default_attr_counter = 0;
|
||||
|
||||
static u32 default_attr_write_buffer[3];
|
||||
@ -143,13 +139,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||
if (immediate_attribute_id >= regs.vs.num_input_attributes+1) {
|
||||
immediate_attribute_id = 0;
|
||||
|
||||
Shader::UnitState<false> shader_unit;
|
||||
auto& shader_unit = Shader::GetShaderUnit(false);
|
||||
g_state.vs.Setup();
|
||||
|
||||
// Send to vertex shader
|
||||
if (g_debug_context)
|
||||
g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast<void*>(&immediate_input));
|
||||
g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1);
|
||||
g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1, regs.vs);
|
||||
Shader::OutputVertex output_vertex = shader_unit.output_registers.ToVertex(regs.vs);
|
||||
|
||||
// Send to renderer
|
||||
@ -236,9 +232,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||
unsigned int vertex_cache_pos = 0;
|
||||
vertex_cache_ids.fill(-1);
|
||||
|
||||
Shader::UnitState<false> shader_unit;
|
||||
auto& vs_shader_unit = Shader::GetShaderUnit(false);
|
||||
g_state.vs.Setup();
|
||||
|
||||
auto& gs_unit_state = Shader::GetShaderUnit(true);
|
||||
g_state.gs.Setup();
|
||||
|
||||
for (unsigned int index = 0; index < regs.num_vertices; ++index)
|
||||
{
|
||||
// Indexed rendering doesn't use the start offset
|
||||
@ -274,8 +273,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||
// Send to vertex shader
|
||||
if (g_debug_context)
|
||||
g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input);
|
||||
g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes());
|
||||
output_registers = shader_unit.output_registers;
|
||||
g_state.vs.Run(vs_shader_unit, input, loader.GetNumTotalAttributes(), regs.vs);
|
||||
output_registers = vs_shader_unit.output_registers;
|
||||
|
||||
if (is_indexed) {
|
||||
vertex_cache[vertex_cache_pos] = output_registers;
|
||||
@ -284,17 +283,56 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||
}
|
||||
}
|
||||
|
||||
// Retreive vertex from register data
|
||||
Shader::OutputVertex output_vertex = output_registers.ToVertex(regs.vs);
|
||||
|
||||
// Send to renderer
|
||||
// Helper to send triangle to renderer
|
||||
using Pica::Shader::OutputVertex;
|
||||
auto AddTriangle = [](
|
||||
const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) {
|
||||
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
|
||||
};
|
||||
|
||||
primitive_assembler.SubmitVertex(output_vertex, AddTriangle);
|
||||
if (Shader::UseGS()) {
|
||||
|
||||
auto& regs = g_state.regs;
|
||||
auto& gs_regs = g_state.regs.gs;
|
||||
auto& gs_buf = g_state.gs_input_buffer;
|
||||
|
||||
// Vertex Shader Outputs are converted into Geometry Shader inputs by filling up a buffer
|
||||
// For example, if we have a geoshader that takes 6 inputs, and the vertex shader outputs 2 attributes
|
||||
// It would take 3 vertices to fill up the Geometry Shader buffer
|
||||
unsigned int gs_input_count = gs_regs.num_input_attributes + 1;
|
||||
unsigned int vs_output_count = regs.vs_outmap_total2 + 1;
|
||||
ASSERT_MSG(regs.vs_outmap_total1 == regs.vs_outmap_total2, "VS_OUTMAP_TOTAL1 and VS_OUTMAP_TOTAL2 don't match!");
|
||||
// copy into the geoshader buffer
|
||||
for (unsigned int i = 0; i < vs_output_count; i++) {
|
||||
if (gs_buf.index >= gs_input_count) {
|
||||
// TODO(ds84182): LOG_ERROR()
|
||||
ASSERT_MSG(false, "Number of GS inputs (%d) is not divisible by number of VS outputs (%d)",
|
||||
gs_input_count, vs_output_count);
|
||||
continue;
|
||||
}
|
||||
gs_buf.buffer.attr[gs_buf.index++] = output_registers.value[i];
|
||||
}
|
||||
|
||||
if (gs_buf.index >= gs_input_count) {
|
||||
|
||||
// b15 will be false when a new primitive starts and then switch to true at some point
|
||||
//TODO: Test how this works exactly on hardware
|
||||
g_state.gs.uniforms.b[15] |= (index > 0);
|
||||
|
||||
// Process Geometry Shader
|
||||
if (g_debug_context)
|
||||
g_debug_context->OnEvent(DebugContext::Event::GeometryShaderInvocation, static_cast<void*>(&gs_buf.buffer));
|
||||
gs_unit_state.emit_triangle_callback = AddTriangle;
|
||||
g_state.gs.Run(gs_unit_state, gs_buf.buffer, gs_input_count, regs.gs);
|
||||
gs_unit_state.emit_triangle_callback = nullptr;
|
||||
|
||||
gs_buf.index = 0;
|
||||
}
|
||||
} else {
|
||||
Shader::OutputVertex output_vertex = output_registers.ToVertex(regs.vs);
|
||||
primitive_assembler.SubmitVertex(output_vertex, AddTriangle);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for (auto& range : memory_accesses.ranges) {
|
||||
@ -311,10 +349,76 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||
break;
|
||||
}
|
||||
|
||||
case PICA_REG_INDEX(vs.bool_uniforms):
|
||||
for (unsigned i = 0; i < 16; ++i)
|
||||
g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0;
|
||||
case PICA_REG_INDEX(gs.bool_uniforms):
|
||||
Shader::WriteUniformBoolReg(true, value);
|
||||
break;
|
||||
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[0], 0x281):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[1], 0x282):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[2], 0x283):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[3], 0x284):
|
||||
{
|
||||
unsigned index = (id - PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[0], 0x281));
|
||||
auto values = regs.gs.int_uniforms[index];
|
||||
Shader::WriteUniformIntReg(true, index, Math::Vec4<u8>(values.x, values.y, values.z, values.w));
|
||||
break;
|
||||
}
|
||||
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.setup, 0x290):
|
||||
Shader::WriteUniformFloatSetupReg(true, value);
|
||||
break;
|
||||
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[0], 0x291):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[1], 0x292):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[2], 0x293):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[3], 0x294):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[4], 0x295):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[5], 0x296):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[6], 0x297):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[7], 0x298):
|
||||
{
|
||||
Shader::WriteUniformFloatReg(true, value);
|
||||
break;
|
||||
}
|
||||
|
||||
// Load shader program code
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.program.offset, 0x29b):
|
||||
Shader::WriteProgramCodeOffset(true, value);
|
||||
break;
|
||||
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[0], 0x29c):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[1], 0x29d):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[2], 0x29e):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[3], 0x29f):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[4], 0x2a0):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[5], 0x2a1):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[6], 0x2a2):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[7], 0x2a3):
|
||||
{
|
||||
Shader::WriteProgramCode(true, value);
|
||||
break;
|
||||
}
|
||||
|
||||
// Load swizzle pattern data
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.offset, 0x2a5):
|
||||
Shader::WriteSwizzlePatternsOffset(true, value);
|
||||
break;
|
||||
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[0], 0x2a6):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[1], 0x2a7):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[2], 0x2a8):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[3], 0x2a9):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[4], 0x2aa):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[5], 0x2ab):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[6], 0x2ac):
|
||||
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[7], 0x2ad):
|
||||
{
|
||||
Shader::WriteSwizzlePatterns(true, value);
|
||||
break;
|
||||
}
|
||||
|
||||
case PICA_REG_INDEX(vs.bool_uniforms):
|
||||
Shader::WriteUniformBoolReg(false, value);
|
||||
break;
|
||||
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1):
|
||||
@ -322,14 +426,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4):
|
||||
{
|
||||
int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1));
|
||||
unsigned index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1));
|
||||
auto values = regs.vs.int_uniforms[index];
|
||||
g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w);
|
||||
LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x",
|
||||
index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value());
|
||||
Shader::WriteUniformIntReg(false, index, Math::Vec4<u8>(values.x, values.y, values.z, values.w));
|
||||
break;
|
||||
}
|
||||
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.setup, 0x2c0):
|
||||
Shader::WriteUniformFloatSetupReg(false, value);
|
||||
break;
|
||||
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[0], 0x2c1):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[1], 0x2c2):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[2], 0x2c3):
|
||||
@ -339,49 +445,15 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8):
|
||||
{
|
||||
auto& uniform_setup = regs.vs.uniform_setup;
|
||||
|
||||
// TODO: Does actual hardware indeed keep an intermediate buffer or does
|
||||
// it directly write the values?
|
||||
uniform_write_buffer[float_regs_counter++] = value;
|
||||
|
||||
// Uniforms are written in a packed format such that four float24 values are encoded in
|
||||
// three 32-bit numbers. We write to internal memory once a full such vector is
|
||||
// written.
|
||||
if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) ||
|
||||
(float_regs_counter >= 3 && !uniform_setup.IsFloat32())) {
|
||||
float_regs_counter = 0;
|
||||
|
||||
auto& uniform = g_state.vs.uniforms.f[uniform_setup.index];
|
||||
|
||||
if (uniform_setup.index > 95) {
|
||||
LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index);
|
||||
break;
|
||||
}
|
||||
|
||||
// NOTE: The destination component order indeed is "backwards"
|
||||
if (uniform_setup.IsFloat32()) {
|
||||
for (auto i : {0,1,2,3})
|
||||
uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i]));
|
||||
} else {
|
||||
// TODO: Untested
|
||||
uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8);
|
||||
uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF));
|
||||
uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | ((uniform_write_buffer[2] >> 24) & 0xFF));
|
||||
uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF);
|
||||
}
|
||||
|
||||
LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index,
|
||||
uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(),
|
||||
uniform.w.ToFloat32());
|
||||
|
||||
// TODO: Verify that this actually modifies the register!
|
||||
uniform_setup.index.Assign(uniform_setup.index + 1);
|
||||
}
|
||||
Shader::WriteUniformFloatReg(false, value);
|
||||
break;
|
||||
}
|
||||
|
||||
// Load shader program code
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.program.offset, 0x2cb):
|
||||
Shader::WriteProgramCodeOffset(false, value);
|
||||
break;
|
||||
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce):
|
||||
@ -391,12 +463,15 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3):
|
||||
{
|
||||
g_state.vs.program_code[regs.vs.program.offset] = value;
|
||||
regs.vs.program.offset++;
|
||||
Shader::WriteProgramCode(false, value);
|
||||
break;
|
||||
}
|
||||
|
||||
// Load swizzle pattern data
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.offset, 0x2d5):
|
||||
Shader::WriteSwizzlePatternsOffset(false, value);
|
||||
break;
|
||||
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8):
|
||||
@ -406,8 +481,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd):
|
||||
{
|
||||
g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value;
|
||||
regs.vs.swizzle_patterns.offset++;
|
||||
Shader::WriteSwizzlePatterns(false, value);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -41,6 +41,7 @@ public:
|
||||
IncomingPrimitiveBatch,
|
||||
FinishedPrimitiveBatch,
|
||||
VertexShaderInvocation,
|
||||
GeometryShaderInvocation,
|
||||
IncomingDisplayTransfer,
|
||||
GSPCommandProcessed,
|
||||
BufferSwapped,
|
||||
|
@ -1103,7 +1103,7 @@ struct Regs {
|
||||
// Number of vertices to render
|
||||
u32 num_vertices;
|
||||
|
||||
INSERT_PADDING_WORDS(0x1);
|
||||
BitField<0, 2, u32> using_geometry_shader;
|
||||
|
||||
// The index of the first vertex to render
|
||||
u32 vertex_offset;
|
||||
@ -1151,7 +1151,14 @@ struct Regs {
|
||||
}
|
||||
} command_buffer;
|
||||
|
||||
INSERT_PADDING_WORDS(0x07);
|
||||
INSERT_PADDING_WORDS(0x06);
|
||||
|
||||
enum class VSComMode : u32 {
|
||||
Shared = 0,
|
||||
Exclusive = 1
|
||||
};
|
||||
|
||||
VSComMode vs_com_mode;
|
||||
|
||||
enum class GPUMode : u32 {
|
||||
Drawing = 0,
|
||||
@ -1160,7 +1167,17 @@ struct Regs {
|
||||
|
||||
GPUMode gpu_mode;
|
||||
|
||||
INSERT_PADDING_WORDS(0x18);
|
||||
INSERT_PADDING_WORDS(0x4);
|
||||
|
||||
BitField<0, 4, u32> vs_outmap_total1;
|
||||
|
||||
INSERT_PADDING_WORDS(0x6);
|
||||
|
||||
BitField<0, 4, u32> vs_outmap_total2;
|
||||
|
||||
BitField<0, 4, u32> gsh_misc0;
|
||||
|
||||
INSERT_PADDING_WORDS(0xB);
|
||||
|
||||
enum class TriangleTopology : u32 {
|
||||
List = 0,
|
||||
@ -1169,7 +1186,10 @@ struct Regs {
|
||||
Shader = 3, // Programmable setup unit implemented in a geometry shader
|
||||
};
|
||||
|
||||
BitField<8, 2, TriangleTopology> triangle_topology;
|
||||
union {
|
||||
BitField<0, 4, u32> vs_outmap_count;
|
||||
BitField<8, 2, TriangleTopology> triangle_topology;
|
||||
};
|
||||
|
||||
u32 restart_primitive;
|
||||
|
||||
@ -1188,8 +1208,9 @@ struct Regs {
|
||||
INSERT_PADDING_WORDS(0x4);
|
||||
|
||||
union {
|
||||
// Number of input attributes to shader unit - 1
|
||||
BitField<0, 4, u32> num_input_attributes;
|
||||
BitField<0, 4, u32> num_input_attributes; // Number of input attributes to shader unit - 1
|
||||
BitField<8, 4, u32> use_subdivision;
|
||||
BitField<24, 8, u32> use_geometry_shader;
|
||||
};
|
||||
|
||||
// Offset to shader program entry point (in words)
|
||||
@ -1241,6 +1262,8 @@ struct Regs {
|
||||
}
|
||||
|
||||
union {
|
||||
u32 setup;
|
||||
|
||||
// Index of the next uniform to write to
|
||||
// TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices
|
||||
// TODO: Maybe the uppermost index is for the geometry shader? Investigate!
|
||||
@ -1361,7 +1384,11 @@ ASSERT_REG_POSITION(trigger_draw, 0x22e);
|
||||
ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
|
||||
ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232);
|
||||
ASSERT_REG_POSITION(command_buffer, 0x238);
|
||||
ASSERT_REG_POSITION(vs_com_mode, 0x244);
|
||||
ASSERT_REG_POSITION(gpu_mode, 0x245);
|
||||
ASSERT_REG_POSITION(vs_outmap_total1, 0x24A);
|
||||
ASSERT_REG_POSITION(vs_outmap_total2, 0x251);
|
||||
ASSERT_REG_POSITION(gsh_misc0, 0x252);
|
||||
ASSERT_REG_POSITION(triangle_topology, 0x25e);
|
||||
ASSERT_REG_POSITION(restart_primitive, 0x25f);
|
||||
ASSERT_REG_POSITION(gs, 0x280);
|
||||
|
@ -22,6 +22,8 @@ struct State {
|
||||
/// Pica registers
|
||||
Regs regs;
|
||||
|
||||
Shader::UnitState<false> shader_units[4];
|
||||
|
||||
Shader::ShaderSetup vs;
|
||||
Shader::ShaderSetup gs;
|
||||
|
||||
@ -75,6 +77,15 @@ struct State {
|
||||
|
||||
// This is constructed with a dummy triangle topology
|
||||
PrimitiveAssembler<Shader::OutputVertex> primitive_assembler;
|
||||
|
||||
/// Current geometry shader state
|
||||
struct GeometryShaderState {
|
||||
// Buffer used for geometry shader inputs
|
||||
Shader::InputVertex buffer;
|
||||
// The current index into the buffer
|
||||
unsigned int index;
|
||||
} gs_input_buffer;
|
||||
|
||||
};
|
||||
|
||||
extern State g_state; ///< Current Pica state
|
||||
|
@ -19,7 +19,6 @@ template<typename VertexType>
|
||||
void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler)
|
||||
{
|
||||
switch (topology) {
|
||||
// TODO: Figure out what's different with TriangleTopology::Shader.
|
||||
case Regs::TriangleTopology::List:
|
||||
case Regs::TriangleTopology::Shader:
|
||||
if (buffer_index < 2) {
|
||||
|
@ -83,8 +83,7 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) {
|
||||
}
|
||||
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map;
|
||||
static const JitShader* jit_shader;
|
||||
static std::unordered_map<u64, std::shared_ptr<JitShader>> shader_map;
|
||||
#endif // ARCHITECTURE_x86_64
|
||||
|
||||
void ClearCache() {
|
||||
@ -96,27 +95,27 @@ void ClearCache() {
|
||||
void ShaderSetup::Setup() {
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
if (VideoCore::g_shader_jit_enabled) {
|
||||
u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
|
||||
Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
|
||||
u64 cache_key = (Common::ComputeHash64(&program_code, sizeof(program_code)) ^
|
||||
Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data)));
|
||||
|
||||
auto iter = shader_map.find(cache_key);
|
||||
if (iter != shader_map.end()) {
|
||||
jit_shader = iter->second.get();
|
||||
jit_shader = iter->second;
|
||||
} else {
|
||||
auto shader = std::make_unique<JitShader>();
|
||||
shader->Compile();
|
||||
jit_shader = shader.get();
|
||||
auto shader = std::make_shared<JitShader>();
|
||||
shader->Compile(*this);
|
||||
jit_shader = shader;
|
||||
shader_map[cache_key] = std::move(shader);
|
||||
}
|
||||
} else {
|
||||
jit_shader.reset();
|
||||
}
|
||||
#endif // ARCHITECTURE_x86_64
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
|
||||
|
||||
void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
|
||||
auto& config = g_state.regs.vs;
|
||||
auto& setup = g_state.vs;
|
||||
void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config) {
|
||||
|
||||
MICROPROFILE_SCOPE(GPU_Shader);
|
||||
|
||||
@ -133,17 +132,17 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num
|
||||
state.conditional_code[1] = false;
|
||||
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
if (VideoCore::g_shader_jit_enabled)
|
||||
jit_shader->Run(setup, state, config.main_offset);
|
||||
if (auto shader = jit_shader.lock())
|
||||
shader.get()->Run(*this, state, config.main_offset);
|
||||
else
|
||||
RunInterpreter(setup, state, config.main_offset);
|
||||
RunInterpreter(*this, state, config.main_offset);
|
||||
#else
|
||||
RunInterpreter(setup, state, config.main_offset);
|
||||
RunInterpreter(*this, state, config.main_offset);
|
||||
#endif // ARCHITECTURE_x86_64
|
||||
|
||||
}
|
||||
|
||||
DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) {
|
||||
DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config) {
|
||||
UnitState<true> state;
|
||||
|
||||
state.debug.max_offset = 0;
|
||||
@ -160,10 +159,212 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_
|
||||
state.conditional_code[0] = false;
|
||||
state.conditional_code[1] = false;
|
||||
|
||||
RunInterpreter(setup, state, config.main_offset);
|
||||
RunInterpreter(*this, state, config.main_offset);
|
||||
return state.debug;
|
||||
}
|
||||
|
||||
bool SharedGS() {
|
||||
return g_state.regs.vs_com_mode == Pica::Regs::VSComMode::Shared;
|
||||
}
|
||||
|
||||
bool UseGS() {
|
||||
// TODO(ds84182): This would be more accurate if it looked at induvidual shader units for the geoshader bit
|
||||
// gs_regs.input_buffer_config.use_geometry_shader == 0x08
|
||||
ASSERT((g_state.regs.using_geometry_shader == 0) || (g_state.regs.using_geometry_shader == 2));
|
||||
return g_state.regs.using_geometry_shader == 2;
|
||||
}
|
||||
|
||||
UnitState<false>& GetShaderUnit(bool gs) {
|
||||
|
||||
// GS are always run on shader unit 3
|
||||
if (gs) {
|
||||
return g_state.shader_units[3];
|
||||
}
|
||||
|
||||
// The worst scheduler you'll ever see!
|
||||
//TODO: How does PICA shader scheduling work?
|
||||
static unsigned shader_unit_scheduler = 0;
|
||||
shader_unit_scheduler++;
|
||||
shader_unit_scheduler %= 3; // TODO: When does it also allow use of unit 3?!
|
||||
return g_state.shader_units[shader_unit_scheduler];
|
||||
}
|
||||
|
||||
void WriteUniformBoolReg(bool gs, u32 value) {
|
||||
auto& setup = gs ? g_state.gs : g_state.vs;
|
||||
|
||||
ASSERT(setup.uniforms.b.size() == 16);
|
||||
for (unsigned i = 0; i < 16; ++i)
|
||||
setup.uniforms.b[i] = (value & (1 << i)) != 0;
|
||||
|
||||
// Copy for GS in shared mode
|
||||
if (!gs && SharedGS()) {
|
||||
WriteUniformBoolReg(true, value);
|
||||
}
|
||||
}
|
||||
|
||||
void WriteUniformIntReg(bool gs, unsigned index, const Math::Vec4<u8>& values) {
|
||||
const char* shader_type = gs ? "GS" : "VS";
|
||||
auto& setup = gs ? g_state.gs : g_state.vs;
|
||||
|
||||
ASSERT(index < setup.uniforms.i.size());
|
||||
setup.uniforms.i[index] = values;
|
||||
LOG_TRACE(HW_GPU, "Set %s integer uniform %d to %02x %02x %02x %02x",
|
||||
shader_type, index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value());
|
||||
|
||||
// Copy for GS in shared mode
|
||||
if (!gs && SharedGS()) {
|
||||
WriteUniformIntReg(true, index, values);
|
||||
}
|
||||
}
|
||||
|
||||
void WriteUniformFloatSetupReg(bool gs, u32 value) {
|
||||
auto& config = gs ? g_state.regs.gs : g_state.regs.vs;
|
||||
|
||||
config.uniform_setup.setup = value;
|
||||
|
||||
// Copy for GS in shared mode
|
||||
if (!gs && SharedGS()) {
|
||||
WriteUniformFloatSetupReg(true, value);
|
||||
}
|
||||
}
|
||||
|
||||
void WriteUniformFloatReg(bool gs, u32 value) {
|
||||
const char* shader_type = gs ? "GS" : "VS";
|
||||
auto& config = gs ? g_state.regs.gs : g_state.regs.vs;
|
||||
auto& setup = gs ? g_state.gs : g_state.vs;
|
||||
|
||||
auto& uniform_setup = config.uniform_setup;
|
||||
auto& uniform_write_buffer = setup.uniform_write_buffer;
|
||||
auto& float_regs_counter = setup.float_regs_counter;
|
||||
|
||||
// TODO: Does actual hardware indeed keep an intermediate buffer or does
|
||||
// it directly write the values?
|
||||
uniform_write_buffer[float_regs_counter++] = value;
|
||||
|
||||
// Uniforms are written in a packed format such that four float24 values are encoded in
|
||||
// three 32-bit numbers. We write to internal memory once a full such vector is
|
||||
// written.
|
||||
if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) ||
|
||||
(float_regs_counter >= 3 && !uniform_setup.IsFloat32())) {
|
||||
float_regs_counter = 0;
|
||||
|
||||
auto& uniform = setup.uniforms.f[uniform_setup.index];
|
||||
|
||||
if (uniform_setup.index >= 96) {
|
||||
LOG_ERROR(HW_GPU, "Invalid %s float uniform index %d", shader_type, (int)uniform_setup.index);
|
||||
} else {
|
||||
|
||||
// NOTE: The destination component order indeed is "backwards"
|
||||
if (uniform_setup.IsFloat32()) {
|
||||
for (auto i : {0,1,2,3})
|
||||
uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i]));
|
||||
} else {
|
||||
// TODO: Untested
|
||||
uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8);
|
||||
uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF));
|
||||
uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | ((uniform_write_buffer[2] >> 24) & 0xFF));
|
||||
uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF);
|
||||
}
|
||||
|
||||
LOG_TRACE(HW_GPU, "Set %s float uniform %x to (%f %f %f %f)", shader_type, (int)uniform_setup.index,
|
||||
uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(),
|
||||
uniform.w.ToFloat32());
|
||||
|
||||
// TODO: Verify that this actually modifies the register!
|
||||
uniform_setup.index.Assign(uniform_setup.index + 1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Copy for GS in shared mode
|
||||
if (!gs && SharedGS()) {
|
||||
WriteUniformFloatReg(true, value);
|
||||
}
|
||||
}
|
||||
|
||||
void WriteProgramCodeOffset(bool gs, u32 value) {
|
||||
auto& config = gs ? g_state.regs.gs : g_state.regs.vs;
|
||||
config.program.offset = value;
|
||||
|
||||
// Copy for GS in shared mode
|
||||
if (!gs && SharedGS()) {
|
||||
WriteProgramCodeOffset(true, value);
|
||||
}
|
||||
}
|
||||
|
||||
void WriteProgramCode(bool gs, u32 value) {
|
||||
const char* shader_type = gs ? "GS" : "VS";
|
||||
auto& config = gs ? g_state.regs.gs : g_state.regs.vs;
|
||||
auto& setup = gs ? g_state.gs : g_state.vs;
|
||||
|
||||
if (config.program.offset >= setup.program_code.size()) {
|
||||
LOG_ERROR(HW_GPU, "Invalid %s program offset %d", shader_type, (int)config.program.offset);
|
||||
} else {
|
||||
setup.program_code[config.program.offset] = value;
|
||||
config.program.offset++;
|
||||
}
|
||||
|
||||
// Copy for GS in shared mode
|
||||
if (!gs && SharedGS()) {
|
||||
WriteProgramCode(true, value);
|
||||
}
|
||||
}
|
||||
|
||||
void WriteSwizzlePatternsOffset(bool gs, u32 value) {
|
||||
auto& config = gs ? g_state.regs.gs : g_state.regs.vs;
|
||||
config.swizzle_patterns.offset = value;
|
||||
|
||||
// Copy for GS in shared mode
|
||||
if (!gs && SharedGS()) {
|
||||
WriteSwizzlePatternsOffset(true, value);
|
||||
}
|
||||
}
|
||||
|
||||
void WriteSwizzlePatterns(bool gs, u32 value) {
|
||||
const char* shader_type = gs ? "GS" : "VS";
|
||||
auto& config = gs ? g_state.regs.gs : g_state.regs.vs;
|
||||
auto& setup = gs ? g_state.gs : g_state.vs;
|
||||
|
||||
if (config.swizzle_patterns.offset >= setup.swizzle_data.size()) {
|
||||
LOG_ERROR(HW_GPU, "Invalid %s swizzle pattern offset %d", shader_type, (int)config.swizzle_patterns.offset);
|
||||
} else {
|
||||
setup.swizzle_data[config.swizzle_patterns.offset] = value;
|
||||
config.swizzle_patterns.offset++;
|
||||
}
|
||||
|
||||
// Copy for GS in shared mode
|
||||
if (!gs && SharedGS()) {
|
||||
WriteSwizzlePatterns(true, value);
|
||||
}
|
||||
}
|
||||
|
||||
template<bool Debug>
|
||||
void HandleEMIT(UnitState<Debug>& state) {
|
||||
auto &config = g_state.regs.gs;
|
||||
auto &emit_params = state.emit_params;
|
||||
auto &emit_buffers = state.emit_buffers;
|
||||
|
||||
ASSERT(emit_params.vertex_id < 3);
|
||||
|
||||
emit_buffers[emit_params.vertex_id] = state.output_registers;
|
||||
|
||||
if (emit_params.primitive_emit) {
|
||||
ASSERT_MSG(state.emit_triangle_callback, "EMIT invoked but no handler set!");
|
||||
OutputVertex v0 = emit_buffers[0].ToVertex(config);
|
||||
OutputVertex v1 = emit_buffers[1].ToVertex(config);
|
||||
OutputVertex v2 = emit_buffers[2].ToVertex(config);
|
||||
if (emit_params.winding) {
|
||||
state.emit_triangle_callback(v2, v1, v0);
|
||||
} else {
|
||||
state.emit_triangle_callback(v0, v1, v2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Explicit instantiation
|
||||
template void HandleEMIT(UnitState<false>& state);
|
||||
template void HandleEMIT(UnitState<true>& state);
|
||||
|
||||
} // namespace Shader
|
||||
|
||||
} // namespace Pica
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
#include "video_core/pica.h"
|
||||
#include "video_core/pica_types.h"
|
||||
#include "video_core/primitive_assembly.h"
|
||||
|
||||
using nihstro::RegisterType;
|
||||
using nihstro::SourceRegister;
|
||||
@ -30,6 +31,11 @@ namespace Pica {
|
||||
|
||||
namespace Shader {
|
||||
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
// Forward declare JitShader because shader_jit_x64.h requires ShaderSetup (which uses JitShader) from this file
|
||||
class JitShader;
|
||||
#endif // ARCHITECTURE_x86_64
|
||||
|
||||
struct InputVertex {
|
||||
alignas(16) Math::Vec4<float24> attr[16];
|
||||
};
|
||||
@ -191,9 +197,9 @@ inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* va
|
||||
record.src3.x = value[0];
|
||||
record.src3.y = value[1];
|
||||
record.src3.z = value[2];
|
||||
|
||||
record.src3.w = value[3];
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) {
|
||||
record.dest_in.x = value[0];
|
||||
@ -280,6 +286,17 @@ struct UnitState {
|
||||
} registers;
|
||||
static_assert(std::is_pod<Registers>::value, "Structure is not POD");
|
||||
|
||||
OutputRegisters emit_buffers[3]; //TODO: 3dbrew suggests this only stores the first 7 output registers
|
||||
|
||||
union EmitParameters {
|
||||
u32 raw;
|
||||
BitField<22, 1, u32> winding;
|
||||
BitField<23, 1, u32> primitive_emit;
|
||||
BitField<24, 2, u32> vertex_id;
|
||||
} emit_params;
|
||||
|
||||
PrimitiveAssembler<OutputVertex>::TriangleHandler emit_triangle_callback;
|
||||
|
||||
OutputRegisters output_registers;
|
||||
|
||||
bool conditional_code[2];
|
||||
@ -317,6 +334,10 @@ struct UnitState {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static size_t EmitParamsOffset() {
|
||||
return offsetof(UnitState, emit_params.raw);
|
||||
}
|
||||
};
|
||||
|
||||
/// Clears the shader cache
|
||||
@ -350,11 +371,18 @@ struct ShaderSetup {
|
||||
}
|
||||
}
|
||||
|
||||
int float_regs_counter = 0;
|
||||
u32 uniform_write_buffer[4];
|
||||
|
||||
std::array<u32, 1024> program_code;
|
||||
std::array<u32, 1024> swizzle_data;
|
||||
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
std::weak_ptr<const JitShader> jit_shader;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Performs any shader unit setup that only needs to happen once per shader (as opposed to once per
|
||||
* Performs any shader setup that only needs to happen once per shader (as opposed to once per
|
||||
* vertex, which would happen within the `Run` function).
|
||||
*/
|
||||
void Setup();
|
||||
@ -364,21 +392,36 @@ struct ShaderSetup {
|
||||
* @param state Shader unit state, must be setup per shader and per shader unit
|
||||
* @param input Input vertex into the shader
|
||||
* @param num_attributes The number of vertex shader attributes
|
||||
* @param config Configuration object for the shader pipeline
|
||||
*/
|
||||
void Run(UnitState<false>& state, const InputVertex& input, int num_attributes);
|
||||
void Run(UnitState<false>& state, const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config);
|
||||
|
||||
/**
|
||||
* Produce debug information based on the given shader and input vertex
|
||||
* @param input Input vertex into the shader
|
||||
* @param num_attributes The number of vertex shader attributes
|
||||
* @param config Configuration object for the shader pipeline
|
||||
* @param setup Setup object for the shader pipeline
|
||||
* @return Debug information for this shader with regards to the given vertex
|
||||
*/
|
||||
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup);
|
||||
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config);
|
||||
|
||||
};
|
||||
|
||||
bool SharedGS();
|
||||
bool UseGS();
|
||||
UnitState<false>& GetShaderUnit(bool gs);
|
||||
void WriteUniformBoolReg(bool gs, u32 value);
|
||||
void WriteUniformIntReg(bool gs, unsigned index, const Math::Vec4<u8>& values);
|
||||
void WriteUniformFloatSetupReg(bool gs, u32 value);
|
||||
void WriteUniformFloatReg(bool gs, u32 value);
|
||||
void WriteProgramCodeOffset(bool gs, u32 value);
|
||||
void WriteProgramCode(bool gs, u32 value);
|
||||
void WriteSwizzlePatternsOffset(bool gs, u32 value);
|
||||
void WriteSwizzlePatterns(bool gs, u32 value);
|
||||
|
||||
template<bool Debug>
|
||||
void HandleEMIT(UnitState<Debug>& state);
|
||||
|
||||
} // namespace Shader
|
||||
|
||||
} // namespace Pica
|
||||
|
@ -47,9 +47,9 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
||||
|
||||
u32 program_counter = offset;
|
||||
|
||||
const auto& uniforms = g_state.vs.uniforms;
|
||||
const auto& swizzle_data = g_state.vs.swizzle_data;
|
||||
const auto& program_code = g_state.vs.program_code;
|
||||
const auto& uniforms = setup.uniforms;
|
||||
const auto& swizzle_data = setup.swizzle_data;
|
||||
const auto& program_code = setup.program_code;
|
||||
|
||||
// Placeholder for invalid inputs
|
||||
static float24 dummy_vec4_float24[4];
|
||||
@ -631,6 +631,16 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::EMIT: {
|
||||
Shader::HandleEMIT(state);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Id::SETEMIT: {
|
||||
state.emit_params.raw = program_code[program_counter];
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
|
||||
(int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
|
||||
|
@ -73,8 +73,8 @@ const JitFunction instr_table[64] = {
|
||||
&JitShader::Compile_IF, // ifu
|
||||
&JitShader::Compile_IF, // ifc
|
||||
&JitShader::Compile_LOOP, // loop
|
||||
nullptr, // emit
|
||||
nullptr, // sete
|
||||
&JitShader::Compile_EMIT, // emit
|
||||
&JitShader::Compile_SETEMIT, // setemit
|
||||
&JitShader::Compile_JMP, // jmpc
|
||||
&JitShader::Compile_JMP, // jmpu
|
||||
&JitShader::Compile_CMP, // cmp
|
||||
@ -146,15 +146,6 @@ static const u8 NO_SRC_REG_SWIZZLE = 0x1b;
|
||||
/// Raw constant for the destination register enable mask that indicates all components are enabled
|
||||
static const u8 NO_DEST_REG_MASK = 0xf;
|
||||
|
||||
/**
|
||||
* Get the vertex shader instruction for a given offset in the current shader program
|
||||
* @param offset Offset in the current shader program of the instruction
|
||||
* @return Instruction at the specified offset
|
||||
*/
|
||||
static Instruction GetVertexShaderInstruction(size_t offset) {
|
||||
return { g_state.vs.program_code[offset] };
|
||||
}
|
||||
|
||||
static void LogCritical(const char* msg) {
|
||||
LOG_CRITICAL(HW_GPU, "%s", msg);
|
||||
}
|
||||
@ -225,7 +216,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
|
||||
MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
|
||||
}
|
||||
|
||||
SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] };
|
||||
SwizzlePattern swiz = { setup->swizzle_data[operand_desc_id] };
|
||||
|
||||
// Generate instructions for source register swizzling as needed
|
||||
u8 sel = swiz.GetRawSelector(src_num);
|
||||
@ -256,7 +247,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
|
||||
dest = instr.common.dest.Value();
|
||||
}
|
||||
|
||||
SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] };
|
||||
SwizzlePattern swiz = { setup->swizzle_data[operand_desc_id] };
|
||||
|
||||
int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest);
|
||||
ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), "Destinaton offset too large for int type");
|
||||
@ -512,7 +503,7 @@ void JitShader::Compile_MIN(Instruction instr) {
|
||||
}
|
||||
|
||||
void JitShader::Compile_MOVA(Instruction instr) {
|
||||
SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] };
|
||||
SwizzlePattern swiz = { setup->swizzle_data[instr.common.operand_desc_id] };
|
||||
|
||||
if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
|
||||
return; // NoOp
|
||||
@ -736,6 +727,22 @@ void JitShader::Compile_LOOP(Instruction instr) {
|
||||
looping = false;
|
||||
}
|
||||
|
||||
static void Handle_EMIT(void* param1) {
|
||||
UnitState<false>& state = *static_cast<UnitState<false>*>(param1);
|
||||
Shader::HandleEMIT(state);
|
||||
};
|
||||
|
||||
void JitShader::Compile_EMIT(Instruction instr) {
|
||||
ABI_PushRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
|
||||
MOV(PTRBITS, R(ABI_PARAM1), R(STATE));
|
||||
ABI_CallFunctionR(reinterpret_cast<const void*>(Handle_EMIT), ABI_PARAM1);
|
||||
ABI_PopRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
|
||||
}
|
||||
|
||||
void JitShader::Compile_SETEMIT(Instruction instr) {
|
||||
MOV(32, MDisp(STATE, UnitState<false>::EmitParamsOffset()), Imm32(*(u32*)&instr.setemit));
|
||||
}
|
||||
|
||||
void JitShader::Compile_JMP(Instruction instr) {
|
||||
if (instr.opcode.Value() == OpCode::Id::JMPC)
|
||||
Compile_EvaluateCondition(instr);
|
||||
@ -776,7 +783,7 @@ void JitShader::Compile_NextInstr() {
|
||||
ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!");
|
||||
code_ptr[program_counter] = GetCodePtr();
|
||||
|
||||
Instruction instr = GetVertexShaderInstruction(program_counter++);
|
||||
Instruction instr = GetShaderInstruction(program_counter++);
|
||||
|
||||
OpCode::Id opcode = instr.opcode.Value();
|
||||
auto instr_func = instr_table[static_cast<unsigned>(opcode)];
|
||||
@ -794,8 +801,8 @@ void JitShader::Compile_NextInstr() {
|
||||
void JitShader::FindReturnOffsets() {
|
||||
return_offsets.clear();
|
||||
|
||||
for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) {
|
||||
Instruction instr = GetVertexShaderInstruction(offset);
|
||||
for (size_t offset = 0; offset < setup->program_code.size(); ++offset) {
|
||||
Instruction instr = GetShaderInstruction(offset);
|
||||
|
||||
switch (instr.opcode.Value()) {
|
||||
case OpCode::Id::CALL:
|
||||
@ -812,7 +819,11 @@ void JitShader::FindReturnOffsets() {
|
||||
std::sort(return_offsets.begin(), return_offsets.end());
|
||||
}
|
||||
|
||||
void JitShader::Compile() {
|
||||
void JitShader::Compile(const ShaderSetup& setup) {
|
||||
|
||||
// Get a pointer to the setup to access program_code and swizzle_data
|
||||
this->setup = &setup;
|
||||
|
||||
// Reset flow control state
|
||||
program = (CompiledShader*)GetCodePtr();
|
||||
program_counter = 0;
|
||||
@ -848,7 +859,7 @@ void JitShader::Compile() {
|
||||
JMPptr(R(ABI_PARAM3));
|
||||
|
||||
// Compile entire program
|
||||
Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
|
||||
Compile_Block(static_cast<unsigned>(this->setup->program_code.size()));
|
||||
|
||||
// Set the target for any incomplete branches now that the entire shader program has been emitted
|
||||
for (const auto& branch : fixup_branches) {
|
||||
@ -865,6 +876,9 @@ void JitShader::Compile() {
|
||||
ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
|
||||
|
||||
LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size);
|
||||
|
||||
// We don't need the setup anymore
|
||||
this->setup = nullptr;
|
||||
}
|
||||
|
||||
JitShader::JitShader() {
|
||||
|
@ -40,7 +40,7 @@ public:
|
||||
program(&setup, &state, code_ptr[offset]);
|
||||
}
|
||||
|
||||
void Compile();
|
||||
void Compile(const ShaderSetup& setup);
|
||||
|
||||
void Compile_ADD(Instruction instr);
|
||||
void Compile_DP3(Instruction instr);
|
||||
@ -65,6 +65,8 @@ public:
|
||||
void Compile_CALLU(Instruction instr);
|
||||
void Compile_IF(Instruction instr);
|
||||
void Compile_LOOP(Instruction instr);
|
||||
void Compile_EMIT(Instruction instr);
|
||||
void Compile_SETEMIT(Instruction instr);
|
||||
void Compile_JMP(Instruction instr);
|
||||
void Compile_CMP(Instruction instr);
|
||||
void Compile_MAD(Instruction instr);
|
||||
@ -99,6 +101,17 @@ private:
|
||||
*/
|
||||
void Compile_Assert(bool condition, const char* msg);
|
||||
|
||||
/**
|
||||
* Get the shader instruction for a given offset in the current shader program
|
||||
* @param offset Offset in the current shader program of the instruction
|
||||
* @return Instruction at the specified offset
|
||||
*/
|
||||
Instruction GetShaderInstruction(size_t offset) {
|
||||
Instruction instruction;
|
||||
std::memcpy(&instruction, &setup->program_code[offset], sizeof(Instruction));
|
||||
return instruction;
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyzes the entire shader program for `CALL` instructions before emitting any code,
|
||||
* identifying the locations where a return needs to be inserted.
|
||||
@ -119,6 +132,8 @@ private:
|
||||
|
||||
using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
|
||||
CompiledShader* program = nullptr;
|
||||
|
||||
const ShaderSetup* setup = nullptr;
|
||||
};
|
||||
|
||||
} // Shader
|
||||
|
Loading…
Reference in New Issue
Block a user