mirror of
https://github.com/citra-emu/citra.git
synced 2024-11-25 05:10:15 +00:00
Move the "input map" into the jitted vertex shader.
This commit is contained in:
parent
594bd182b4
commit
545db99381
@ -34,8 +34,13 @@ static const JitShader* jit_shader;
|
||||
void Setup() {
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
if (VideoCore::g_shader_jit_enabled) {
|
||||
u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
|
||||
Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
|
||||
auto& config = g_state.regs.vs;
|
||||
const auto& attribute_register_map = config.input_register_map;
|
||||
|
||||
u64 cache_key =
|
||||
Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
|
||||
Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)) ^
|
||||
Common::ComputeHash64(&attribute_register_map, sizeof(attribute_register_map));
|
||||
|
||||
auto iter = shader_map.find(cache_key);
|
||||
if (iter != shader_map.end()) {
|
||||
@ -68,38 +73,38 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
|
||||
state.debug.max_opdesc_id = 0;
|
||||
|
||||
// Setup input register table
|
||||
const auto& attribute_register_map = config.input_register_map;
|
||||
|
||||
// TODO: Instead of this cumbersome logic, just load the input data directly like
|
||||
// for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; }
|
||||
if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0];
|
||||
if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1];
|
||||
if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2];
|
||||
if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = input.attr[3];
|
||||
if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = input.attr[4];
|
||||
if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = input.attr[5];
|
||||
if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = input.attr[6];
|
||||
if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = input.attr[7];
|
||||
if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = input.attr[8];
|
||||
if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = input.attr[9];
|
||||
if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = input.attr[10];
|
||||
if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = input.attr[11];
|
||||
if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = input.attr[12];
|
||||
if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = input.attr[13];
|
||||
if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14];
|
||||
if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15];
|
||||
|
||||
state.conditional_code[0] = false;
|
||||
state.conditional_code[1] = false;
|
||||
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
if (VideoCore::g_shader_jit_enabled)
|
||||
jit_shader->Run(&state.registers, g_state.regs.vs.main_offset);
|
||||
else
|
||||
RunInterpreter(state);
|
||||
#else
|
||||
RunInterpreter(state);
|
||||
if (VideoCore::g_shader_jit_enabled) {
|
||||
jit_shader->Run(&state.registers, &input.attr[0], g_state.regs.vs.main_offset);
|
||||
} else
|
||||
#endif // ARCHITECTURE_x86_64
|
||||
{
|
||||
const auto& attribute_register_map = config.input_register_map;
|
||||
// TODO: Instead of this cumbersome logic, just load the input data directly like
|
||||
// for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; }
|
||||
if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0];
|
||||
if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1];
|
||||
if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2];
|
||||
if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = input.attr[3];
|
||||
if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = input.attr[4];
|
||||
if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = input.attr[5];
|
||||
if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = input.attr[6];
|
||||
if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = input.attr[7];
|
||||
if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = input.attr[8];
|
||||
if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = input.attr[9];
|
||||
if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = input.attr[10];
|
||||
if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = input.attr[11];
|
||||
if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = input.attr[12];
|
||||
if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = input.attr[13];
|
||||
if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14];
|
||||
if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15];
|
||||
|
||||
RunInterpreter(state);
|
||||
}
|
||||
|
||||
// Setup output data
|
||||
OutputVertex ret;
|
||||
|
@ -110,6 +110,8 @@ static const X64Reg COND0 = R13;
|
||||
static const X64Reg COND1 = R14;
|
||||
/// Pointer to the UnitState instance for the current VS unit
|
||||
static const X64Reg REGISTERS = R15;
|
||||
/// Pointer to the input data. Aliased over LOOPCOUNT as this is before any loops execute.
|
||||
static const X64Reg INPUT = RSI;
|
||||
/// SIMD scratch register
|
||||
static const X64Reg SCRATCH = XMM0;
|
||||
/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
|
||||
@ -805,6 +807,11 @@ void JitShader::FindReturnOffsets() {
|
||||
}
|
||||
|
||||
void JitShader::Compile() {
|
||||
auto& config = g_state.regs.vs;
|
||||
const auto& attribute_register_map = config.input_register_map;
|
||||
const auto& attribute_config = g_state.regs.vertex_attributes;
|
||||
int num_attributes = attribute_config.GetNumTotalAttributes();
|
||||
|
||||
// Reset flow control state
|
||||
program = (CompiledShader*)GetCodePtr();
|
||||
program_counter = 0;
|
||||
@ -818,7 +825,14 @@ void JitShader::Compile() {
|
||||
// The stack pointer is 8 modulo 16 at the entry of a procedure
|
||||
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
|
||||
|
||||
// Load inputs
|
||||
MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1));
|
||||
MOV(PTRBITS, R(INPUT), R(ABI_PARAM2));
|
||||
for (int i = 0; i < num_attributes; i++) {
|
||||
MOVAPS(SCRATCH, MDisp(INPUT, i * 16));
|
||||
MOVAPS(MDisp(REGISTERS, attribute_register_map.GetRegisterForAttribute(i) * 16), SCRATCH);
|
||||
}
|
||||
|
||||
MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
|
||||
|
||||
// Zero address/loop registers
|
||||
@ -837,7 +851,7 @@ void JitShader::Compile() {
|
||||
MOVAPS(NEGBIT, MatR(RAX));
|
||||
|
||||
// Jump to start of the shader program
|
||||
JMPptr(R(ABI_PARAM2));
|
||||
JMPptr(R(ABI_PARAM3));
|
||||
|
||||
// Compile entire program
|
||||
Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
|
||||
|
@ -33,8 +33,8 @@ class JitShader : public Gen::XCodeBlock {
|
||||
public:
|
||||
JitShader();
|
||||
|
||||
void Run(void* registers, unsigned offset) const {
|
||||
program(registers, code_ptr[offset]);
|
||||
void Run(void* registers, const void *input, unsigned offset) const {
|
||||
program(registers, input, code_ptr[offset]);
|
||||
}
|
||||
|
||||
void Compile();
|
||||
@ -114,7 +114,7 @@ private:
|
||||
/// Branches that need to be fixed up once the entire shader program is compiled
|
||||
std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
|
||||
|
||||
using CompiledShader = void(void* registers, const u8* start_addr);
|
||||
using CompiledShader = void(void* registers, const void *input, const u8* start_addr);
|
||||
CompiledShader* program = nullptr;
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user