Move the "input map" into the jitted vertex shader.

This commit is contained in:
Henrik Rydgard 2016-04-30 17:17:16 +02:00
parent 594bd182b4
commit 545db99381
3 changed files with 51 additions and 32 deletions

View File

@ -34,8 +34,13 @@ static const JitShader* jit_shader;
void Setup() { void Setup() {
#ifdef ARCHITECTURE_x86_64 #ifdef ARCHITECTURE_x86_64
if (VideoCore::g_shader_jit_enabled) { if (VideoCore::g_shader_jit_enabled) {
u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ auto& config = g_state.regs.vs;
Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data))); const auto& attribute_register_map = config.input_register_map;
u64 cache_key =
Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)) ^
Common::ComputeHash64(&attribute_register_map, sizeof(attribute_register_map));
auto iter = shader_map.find(cache_key); auto iter = shader_map.find(cache_key);
if (iter != shader_map.end()) { if (iter != shader_map.end()) {
@ -68,8 +73,17 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
state.debug.max_opdesc_id = 0; state.debug.max_opdesc_id = 0;
// Setup input register table // Setup input register table
const auto& attribute_register_map = config.input_register_map;
state.conditional_code[0] = false;
state.conditional_code[1] = false;
#ifdef ARCHITECTURE_x86_64
if (VideoCore::g_shader_jit_enabled) {
jit_shader->Run(&state.registers, &input.attr[0], g_state.regs.vs.main_offset);
} else
#endif // ARCHITECTURE_x86_64
{
const auto& attribute_register_map = config.input_register_map;
// TODO: Instead of this cumbersome logic, just load the input data directly like // TODO: Instead of this cumbersome logic, just load the input data directly like
// for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; } // for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; }
if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0];
@ -89,17 +103,8 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14]; if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14];
if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15]; if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15];
state.conditional_code[0] = false;
state.conditional_code[1] = false;
#ifdef ARCHITECTURE_x86_64
if (VideoCore::g_shader_jit_enabled)
jit_shader->Run(&state.registers, g_state.regs.vs.main_offset);
else
RunInterpreter(state); RunInterpreter(state);
#else }
RunInterpreter(state);
#endif // ARCHITECTURE_x86_64
// Setup output data // Setup output data
OutputVertex ret; OutputVertex ret;

View File

@ -110,6 +110,8 @@ static const X64Reg COND0 = R13;
static const X64Reg COND1 = R14; static const X64Reg COND1 = R14;
/// Pointer to the UnitState instance for the current VS unit /// Pointer to the UnitState instance for the current VS unit
static const X64Reg REGISTERS = R15; static const X64Reg REGISTERS = R15;
/// Pointer to the input data. Aliased over LOOPCOUNT as this is before any loops execute.
static const X64Reg INPUT = RSI;
/// SIMD scratch register /// SIMD scratch register
static const X64Reg SCRATCH = XMM0; static const X64Reg SCRATCH = XMM0;
/// Loaded with the first swizzled source register, otherwise can be used as a scratch register /// Loaded with the first swizzled source register, otherwise can be used as a scratch register
@ -805,6 +807,11 @@ void JitShader::FindReturnOffsets() {
} }
void JitShader::Compile() { void JitShader::Compile() {
auto& config = g_state.regs.vs;
const auto& attribute_register_map = config.input_register_map;
const auto& attribute_config = g_state.regs.vertex_attributes;
int num_attributes = attribute_config.GetNumTotalAttributes();
// Reset flow control state // Reset flow control state
program = (CompiledShader*)GetCodePtr(); program = (CompiledShader*)GetCodePtr();
program_counter = 0; program_counter = 0;
@ -818,7 +825,14 @@ void JitShader::Compile() {
// The stack pointer is 8 modulo 16 at the entry of a procedure // The stack pointer is 8 modulo 16 at the entry of a procedure
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
// Load inputs
MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1));
MOV(PTRBITS, R(INPUT), R(ABI_PARAM2));
for (int i = 0; i < num_attributes; i++) {
MOVAPS(SCRATCH, MDisp(INPUT, i * 16));
MOVAPS(MDisp(REGISTERS, attribute_register_map.GetRegisterForAttribute(i) * 16), SCRATCH);
}
MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
// Zero address/loop registers // Zero address/loop registers
@ -837,7 +851,7 @@ void JitShader::Compile() {
MOVAPS(NEGBIT, MatR(RAX)); MOVAPS(NEGBIT, MatR(RAX));
// Jump to start of the shader program // Jump to start of the shader program
JMPptr(R(ABI_PARAM2)); JMPptr(R(ABI_PARAM3));
// Compile entire program // Compile entire program
Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));

View File

@ -33,8 +33,8 @@ class JitShader : public Gen::XCodeBlock {
public: public:
JitShader(); JitShader();
void Run(void* registers, unsigned offset) const { void Run(void* registers, const void *input, unsigned offset) const {
program(registers, code_ptr[offset]); program(registers, input, code_ptr[offset]);
} }
void Compile(); void Compile();
@ -114,7 +114,7 @@ private:
/// Branches that need to be fixed up once the entire shader program is compiled /// Branches that need to be fixed up once the entire shader program is compiled
std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
using CompiledShader = void(void* registers, const u8* start_addr); using CompiledShader = void(void* registers, const void *input, const u8* start_addr);
CompiledShader* program = nullptr; CompiledShader* program = nullptr;
}; };