Move the "input map" into the jitted vertex shader.

This commit is contained in:
Henrik Rydgard 2016-04-30 17:17:16 +02:00
parent 594bd182b4
commit 545db99381
3 changed files with 51 additions and 32 deletions

View File

@ -34,8 +34,13 @@ static const JitShader* jit_shader;
void Setup() {
#ifdef ARCHITECTURE_x86_64
if (VideoCore::g_shader_jit_enabled) {
u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
auto& config = g_state.regs.vs;
const auto& attribute_register_map = config.input_register_map;
u64 cache_key =
Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)) ^
Common::ComputeHash64(&attribute_register_map, sizeof(attribute_register_map));
auto iter = shader_map.find(cache_key);
if (iter != shader_map.end()) {
@ -68,38 +73,38 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
state.debug.max_opdesc_id = 0;
// Setup input register table
const auto& attribute_register_map = config.input_register_map;
// TODO: Instead of this cumbersome logic, just load the input data directly like
// for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; }
if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0];
if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1];
if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2];
if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = input.attr[3];
if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = input.attr[4];
if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = input.attr[5];
if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = input.attr[6];
if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = input.attr[7];
if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = input.attr[8];
if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = input.attr[9];
if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = input.attr[10];
if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = input.attr[11];
if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = input.attr[12];
if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = input.attr[13];
if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14];
if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15];
state.conditional_code[0] = false;
state.conditional_code[1] = false;
#ifdef ARCHITECTURE_x86_64
if (VideoCore::g_shader_jit_enabled)
jit_shader->Run(&state.registers, g_state.regs.vs.main_offset);
else
RunInterpreter(state);
#else
RunInterpreter(state);
if (VideoCore::g_shader_jit_enabled) {
jit_shader->Run(&state.registers, &input.attr[0], g_state.regs.vs.main_offset);
} else
#endif // ARCHITECTURE_x86_64
{
const auto& attribute_register_map = config.input_register_map;
// TODO: Instead of this cumbersome logic, just load the input data directly like
// for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; }
if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0];
if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1];
if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2];
if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = input.attr[3];
if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = input.attr[4];
if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = input.attr[5];
if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = input.attr[6];
if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = input.attr[7];
if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = input.attr[8];
if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = input.attr[9];
if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = input.attr[10];
if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = input.attr[11];
if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = input.attr[12];
if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = input.attr[13];
if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14];
if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15];
RunInterpreter(state);
}
// Setup output data
OutputVertex ret;

View File

@ -110,6 +110,8 @@ static const X64Reg COND0 = R13;
static const X64Reg COND1 = R14;
/// Pointer to the UnitState instance for the current VS unit
static const X64Reg REGISTERS = R15;
/// Pointer to the input data. Aliased over LOOPCOUNT as this is before any loops execute.
static const X64Reg INPUT = RSI;
/// SIMD scratch register
static const X64Reg SCRATCH = XMM0;
/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
@ -805,6 +807,11 @@ void JitShader::FindReturnOffsets() {
}
void JitShader::Compile() {
auto& config = g_state.regs.vs;
const auto& attribute_register_map = config.input_register_map;
const auto& attribute_config = g_state.regs.vertex_attributes;
int num_attributes = attribute_config.GetNumTotalAttributes();
// Reset flow control state
program = (CompiledShader*)GetCodePtr();
program_counter = 0;
@ -818,7 +825,14 @@ void JitShader::Compile() {
// The stack pointer is 8 modulo 16 at the entry of a procedure
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
// Load inputs
MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1));
MOV(PTRBITS, R(INPUT), R(ABI_PARAM2));
for (int i = 0; i < num_attributes; i++) {
MOVAPS(SCRATCH, MDisp(INPUT, i * 16));
MOVAPS(MDisp(REGISTERS, attribute_register_map.GetRegisterForAttribute(i) * 16), SCRATCH);
}
MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
// Zero address/loop registers
@ -837,7 +851,7 @@ void JitShader::Compile() {
MOVAPS(NEGBIT, MatR(RAX));
// Jump to start of the shader program
JMPptr(R(ABI_PARAM2));
JMPptr(R(ABI_PARAM3));
// Compile entire program
Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));

View File

@ -33,8 +33,8 @@ class JitShader : public Gen::XCodeBlock {
public:
JitShader();
void Run(void* registers, unsigned offset) const {
program(registers, code_ptr[offset]);
void Run(void* registers, const void *input, unsigned offset) const {
program(registers, input, code_ptr[offset]);
}
void Compile();
@ -114,7 +114,7 @@ private:
/// Branches that need to be fixed up once the entire shader program is compiled
std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
using CompiledShader = void(void* registers, const u8* start_addr);
using CompiledShader = void(void* registers, const void *input, const u8* start_addr);
CompiledShader* program = nullptr;
};