diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 043e99190..1f379b8a7 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -34,8 +34,13 @@ static const JitShader* jit_shader; void Setup() { #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { - u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ - Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data))); + auto& config = g_state.regs.vs; + const auto& attribute_register_map = config.input_register_map; + + u64 cache_key = + Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ + Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)) ^ + Common::ComputeHash64(&attribute_register_map, sizeof(attribute_register_map)); auto iter = shader_map.find(cache_key); if (iter != shader_map.end()) { @@ -68,38 +73,38 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attr state.debug.max_opdesc_id = 0; // Setup input register table - const auto& attribute_register_map = config.input_register_map; - - // TODO: Instead of this cumbersome logic, just load the input data directly like - // for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; } - if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; - if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1]; - if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2]; - if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = input.attr[3]; - if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = input.attr[4]; - if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = input.attr[5]; - if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = input.attr[6]; - if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = input.attr[7]; - if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = input.attr[8]; - if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = input.attr[9]; - if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = input.attr[10]; - if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = input.attr[11]; - if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = input.attr[12]; - if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = input.attr[13]; - if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14]; - if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15]; state.conditional_code[0] = false; state.conditional_code[1] = false; #ifdef ARCHITECTURE_x86_64 - if (VideoCore::g_shader_jit_enabled) - jit_shader->Run(&state.registers, g_state.regs.vs.main_offset); - else - RunInterpreter(state); -#else - RunInterpreter(state); + if (VideoCore::g_shader_jit_enabled) { + jit_shader->Run(&state.registers, &input.attr[0], g_state.regs.vs.main_offset); + } else #endif // ARCHITECTURE_x86_64 + { + const auto& attribute_register_map = config.input_register_map; + // TODO: Instead of this cumbersome logic, just load the input data directly like + // for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; } + if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; + if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1]; + if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2]; + if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = input.attr[3]; + if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = input.attr[4]; + if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = input.attr[5]; + if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = input.attr[6]; + if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = input.attr[7]; + if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = input.attr[8]; + if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = input.attr[9]; + if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = input.attr[10]; + if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = input.attr[11]; + if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = input.attr[12]; + if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = input.attr[13]; + if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14]; + if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15]; + + RunInterpreter(state); + } // Setup output data OutputVertex ret; diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index b7747fa42..346250ca0 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -110,6 +110,8 @@ static const X64Reg COND0 = R13; static const X64Reg COND1 = R14; /// Pointer to the UnitState instance for the current VS unit static const X64Reg REGISTERS = R15; +/// Pointer to the input data. Aliased over LOOPCOUNT as this is before any loops execute. +static const X64Reg INPUT = RSI; /// SIMD scratch register static const X64Reg SCRATCH = XMM0; /// Loaded with the first swizzled source register, otherwise can be used as a scratch register @@ -805,6 +807,11 @@ void JitShader::FindReturnOffsets() { } void JitShader::Compile() { + auto& config = g_state.regs.vs; + const auto& attribute_register_map = config.input_register_map; + const auto& attribute_config = g_state.regs.vertex_attributes; + int num_attributes = attribute_config.GetNumTotalAttributes(); + // Reset flow control state program = (CompiledShader*)GetCodePtr(); program_counter = 0; @@ -818,7 +825,14 @@ void JitShader::Compile() { // The stack pointer is 8 modulo 16 at the entry of a procedure ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); + // Load inputs MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); + MOV(PTRBITS, R(INPUT), R(ABI_PARAM2)); + for (int i = 0; i < num_attributes; i++) { + MOVAPS(SCRATCH, MDisp(INPUT, i * 16)); + MOVAPS(MDisp(REGISTERS, attribute_register_map.GetRegisterForAttribute(i) * 16), SCRATCH); + } + MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); // Zero address/loop registers @@ -837,7 +851,7 @@ void JitShader::Compile() { MOVAPS(NEGBIT, MatR(RAX)); // Jump to start of the shader program - JMPptr(R(ABI_PARAM2)); + JMPptr(R(ABI_PARAM3)); // Compile entire program Compile_Block(static_cast(g_state.vs.program_code.size())); diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index cd6280ade..b2df4aa51 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -33,8 +33,8 @@ class JitShader : public Gen::XCodeBlock { public: JitShader(); - void Run(void* registers, unsigned offset) const { - program(registers, code_ptr[offset]); + void Run(void* registers, const void *input, unsigned offset) const { + program(registers, input, code_ptr[offset]); } void Compile(); @@ -114,7 +114,7 @@ private: /// Branches that need to be fixed up once the entire shader program is compiled std::vector> fixup_branches; - using CompiledShader = void(void* registers, const u8* start_addr); + using CompiledShader = void(void* registers, const void *input, const u8* start_addr); CompiledShader* program = nullptr; };