diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 1f379b8a7..83b3eeec6 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -40,7 +40,8 @@ void Setup() { u64 cache_key = Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)) ^ - Common::ComputeHash64(&attribute_register_map, sizeof(attribute_register_map)); + Common::ComputeHash64(&attribute_register_map, sizeof(attribute_register_map)) ^ + Common::ComputeHash64(&g_state.regs.vs_output_attributes, sizeof(g_state.regs.vs_output_attributes)); auto iter = shader_map.find(cache_key); if (iter != shader_map.end()) { @@ -64,25 +65,25 @@ void Shutdown() { MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240)); OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) { - auto& config = g_state.regs.vs; - MICROPROFILE_SCOPE(GPU_VertexShader); - state.program_counter = config.main_offset; state.debug.max_offset = 0; state.debug.max_opdesc_id = 0; - // Setup input register table - - state.conditional_code[0] = false; - state.conditional_code[1] = false; + // Setup output data + OutputVertex ret; #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { - jit_shader->Run(&state.registers, &input.attr[0], g_state.regs.vs.main_offset); + jit_shader->Run(&state.registers, &input.attr[0], g_state.regs.vs.main_offset, &ret); } else #endif // ARCHITECTURE_x86_64 { + auto& config = g_state.regs.vs; + // Setup input register table + state.program_counter = config.main_offset; + state.conditional_code[0] = false; + state.conditional_code[1] = false; const auto& attribute_register_map = config.input_register_map; // TODO: Instead of this cumbersome logic, just load the input data directly like // for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; } @@ -106,13 +107,10 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attr RunInterpreter(state); } - // Setup output data - OutputVertex ret; // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to // figure out what those circumstances are and enable the remaining outputs then. unsigned index = 0; for (unsigned i = 0; i < 7; ++i) { - if (index >= g_state.regs.vs_output_total) break; diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 346250ca0..da1081a38 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -581,7 +581,6 @@ void JitShader::Compile_NOP(Instruction instr) { } void JitShader::Compile_END(Instruction instr) { - ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); RET(); } @@ -825,7 +824,7 @@ void JitShader::Compile() { // The stack pointer is 8 modulo 16 at the entry of a procedure ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); - // Load inputs + // Prologue: Scatter inputs into registers according to map MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); MOV(PTRBITS, R(INPUT), R(ABI_PARAM2)); for (int i = 0; i < num_attributes; i++) { @@ -849,9 +848,12 @@ void JitShader::Compile() { static const __m128 neg = { -0.f, -0.f, -0.f, -0.f }; MOV(PTRBITS, R(RAX), ImmPtr(&neg)); MOVAPS(NEGBIT, MatR(RAX)); + // Call the start of the shader program. + CALLptr(R(ABI_PARAM3)); + // Alright, back from the program. Now we can do the epilogue. - // Jump to start of the shader program - JMPptr(R(ABI_PARAM3)); + ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); + RET(); // Compile entire program Compile_Block(static_cast(g_state.vs.program_code.size())); diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index b2df4aa51..4b4065cbe 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -33,8 +33,8 @@ class JitShader : public Gen::XCodeBlock { public: JitShader(); - void Run(void* registers, const void *input, unsigned offset) const { - program(registers, input, code_ptr[offset]); + void Run(void* registers, const void *input, unsigned int offset, void *output) const { + program(registers, input, code_ptr[offset], output); } void Compile(); @@ -114,7 +114,7 @@ private: /// Branches that need to be fixed up once the entire shader program is compiled std::vector> fixup_branches; - using CompiledShader = void(void* registers, const void *input, const u8* start_addr); + using CompiledShader = void(void* registers, const void *input, const u8* start_addr, void *output); CompiledShader* program = nullptr; };