Shader: Use a POD struct for registers.
This commit is contained in:
		| @@ -67,29 +67,29 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | |||||||
|     // Setup input register table |     // Setup input register table | ||||||
|     const auto& attribute_register_map = config.input_register_map; |     const auto& attribute_register_map = config.input_register_map; | ||||||
|  |  | ||||||
|     if (num_attributes > 0) state.input_registers[attribute_register_map.attribute0_register] = input.attr[0]; |     if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; | ||||||
|     if (num_attributes > 1) state.input_registers[attribute_register_map.attribute1_register] = input.attr[1]; |     if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1]; | ||||||
|     if (num_attributes > 2) state.input_registers[attribute_register_map.attribute2_register] = input.attr[2]; |     if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2]; | ||||||
|     if (num_attributes > 3) state.input_registers[attribute_register_map.attribute3_register] = input.attr[3]; |     if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = input.attr[3]; | ||||||
|     if (num_attributes > 4) state.input_registers[attribute_register_map.attribute4_register] = input.attr[4]; |     if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = input.attr[4]; | ||||||
|     if (num_attributes > 5) state.input_registers[attribute_register_map.attribute5_register] = input.attr[5]; |     if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = input.attr[5]; | ||||||
|     if (num_attributes > 6) state.input_registers[attribute_register_map.attribute6_register] = input.attr[6]; |     if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = input.attr[6]; | ||||||
|     if (num_attributes > 7) state.input_registers[attribute_register_map.attribute7_register] = input.attr[7]; |     if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = input.attr[7]; | ||||||
|     if (num_attributes > 8) state.input_registers[attribute_register_map.attribute8_register] = input.attr[8]; |     if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = input.attr[8]; | ||||||
|     if (num_attributes > 9) state.input_registers[attribute_register_map.attribute9_register] = input.attr[9]; |     if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = input.attr[9]; | ||||||
|     if (num_attributes > 10) state.input_registers[attribute_register_map.attribute10_register] = input.attr[10]; |     if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = input.attr[10]; | ||||||
|     if (num_attributes > 11) state.input_registers[attribute_register_map.attribute11_register] = input.attr[11]; |     if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = input.attr[11]; | ||||||
|     if (num_attributes > 12) state.input_registers[attribute_register_map.attribute12_register] = input.attr[12]; |     if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = input.attr[12]; | ||||||
|     if (num_attributes > 13) state.input_registers[attribute_register_map.attribute13_register] = input.attr[13]; |     if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = input.attr[13]; | ||||||
|     if (num_attributes > 14) state.input_registers[attribute_register_map.attribute14_register] = input.attr[14]; |     if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14]; | ||||||
|     if (num_attributes > 15) state.input_registers[attribute_register_map.attribute15_register] = input.attr[15]; |     if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15]; | ||||||
|  |  | ||||||
|     state.conditional_code[0] = false; |     state.conditional_code[0] = false; | ||||||
|     state.conditional_code[1] = false; |     state.conditional_code[1] = false; | ||||||
|  |  | ||||||
| #ifdef ARCHITECTURE_x86_64 | #ifdef ARCHITECTURE_x86_64 | ||||||
|     if (VideoCore::g_shader_jit_enabled) |     if (VideoCore::g_shader_jit_enabled) | ||||||
|         jit_shader(&state); |         jit_shader(&state.registers); | ||||||
|     else |     else | ||||||
|         RunInterpreter(state); |         RunInterpreter(state); | ||||||
| #else | #else | ||||||
| @@ -117,7 +117,7 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | |||||||
|         for (int comp = 0; comp < 4; ++comp) { |         for (int comp = 0; comp < 4; ++comp) { | ||||||
|             float24* out = ((float24*)&ret) + semantics[comp]; |             float24* out = ((float24*)&ret) + semantics[comp]; | ||||||
|             if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { |             if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { | ||||||
|                 *out = state.output_registers[i][comp]; |                 *out = state.registers.output[i][comp]; | ||||||
|             } else { |             } else { | ||||||
|                 // Zero output so that attributes which aren't output won't have denormals in them, |                 // Zero output so that attributes which aren't output won't have denormals in them, | ||||||
|                 // which would slow us down later. |                 // which would slow us down later. | ||||||
|   | |||||||
| @@ -79,11 +79,14 @@ static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has inva | |||||||
|  * here will make it easier for us to parallelize the shader processing later. |  * here will make it easier for us to parallelize the shader processing later. | ||||||
|  */ |  */ | ||||||
| struct UnitState { | struct UnitState { | ||||||
|     // The registers are accessed by the shader JIT using SSE instructions, and are therefore |     struct Registers { | ||||||
|     // required to be 16-byte aligned. |         // The registers are accessed by the shader JIT using SSE instructions, and are therefore | ||||||
|     Math::Vec4<float24> MEMORY_ALIGNED16(input_registers[16]); |         // required to be 16-byte aligned. | ||||||
|     Math::Vec4<float24> MEMORY_ALIGNED16(output_registers[16]); |         Math::Vec4<float24> MEMORY_ALIGNED16(input[16]); | ||||||
|     Math::Vec4<float24> MEMORY_ALIGNED16(temporary_registers[16]); |         Math::Vec4<float24> MEMORY_ALIGNED16(output[16]); | ||||||
|  |         Math::Vec4<float24> MEMORY_ALIGNED16(temporary[16]); | ||||||
|  |     } registers; | ||||||
|  |     static_assert(std::is_pod<Registers>::value, "Structure is not POD"); | ||||||
|  |  | ||||||
|     u32 program_counter; |     u32 program_counter; | ||||||
|     bool conditional_code[2]; |     bool conditional_code[2]; | ||||||
| @@ -116,10 +119,10 @@ struct UnitState { | |||||||
|     static int InputOffset(const SourceRegister& reg) { |     static int InputOffset(const SourceRegister& reg) { | ||||||
|         switch (reg.GetRegisterType()) { |         switch (reg.GetRegisterType()) { | ||||||
|         case RegisterType::Input: |         case RegisterType::Input: | ||||||
|             return (int)offsetof(UnitState, input_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |             return (int)offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | ||||||
|  |  | ||||||
|         case RegisterType::Temporary: |         case RegisterType::Temporary: | ||||||
|             return (int)offsetof(UnitState, temporary_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |             return (int)offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | ||||||
|  |  | ||||||
|         default: |         default: | ||||||
|             UNREACHABLE(); |             UNREACHABLE(); | ||||||
| @@ -130,10 +133,10 @@ struct UnitState { | |||||||
|     static int OutputOffset(const DestRegister& reg) { |     static int OutputOffset(const DestRegister& reg) { | ||||||
|         switch (reg.GetRegisterType()) { |         switch (reg.GetRegisterType()) { | ||||||
|         case RegisterType::Output: |         case RegisterType::Output: | ||||||
|             return (int)offsetof(UnitState, output_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |             return (int)offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | ||||||
|  |  | ||||||
|         case RegisterType::Temporary: |         case RegisterType::Temporary: | ||||||
|             return (int)offsetof(UnitState, temporary_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |             return (int)offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | ||||||
|  |  | ||||||
|         default: |         default: | ||||||
|             UNREACHABLE(); |             UNREACHABLE(); | ||||||
|   | |||||||
| @@ -62,10 +62,10 @@ void RunInterpreter(UnitState& state) { | |||||||
|         auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { |         auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { | ||||||
|             switch (source_reg.GetRegisterType()) { |             switch (source_reg.GetRegisterType()) { | ||||||
|             case RegisterType::Input: |             case RegisterType::Input: | ||||||
|                 return &state.input_registers[source_reg.GetIndex()].x; |                 return &state.registers.input[source_reg.GetIndex()].x; | ||||||
|  |  | ||||||
|             case RegisterType::Temporary: |             case RegisterType::Temporary: | ||||||
|                 return &state.temporary_registers[source_reg.GetIndex()].x; |                 return &state.registers.temporary[source_reg.GetIndex()].x; | ||||||
|  |  | ||||||
|             case RegisterType::FloatUniform: |             case RegisterType::FloatUniform: | ||||||
|                 return &uniforms.f[source_reg.GetIndex()].x; |                 return &uniforms.f[source_reg.GetIndex()].x; | ||||||
| @@ -114,8 +114,8 @@ void RunInterpreter(UnitState& state) { | |||||||
|                 src2[3] = src2[3] * float24::FromFloat32(-1); |                 src2[3] = src2[3] * float24::FromFloat32(-1); | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers[instr.common.dest.Value().GetIndex()][0] |             float24* dest = (instr.common.dest.Value() < 0x10) ? &state.registers.output[instr.common.dest.Value().GetIndex()][0] | ||||||
|                         : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0] |                         : (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] | ||||||
|                         : dummy_vec4_float24; |                         : dummy_vec4_float24; | ||||||
|  |  | ||||||
|             state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); |             state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||||||
| @@ -355,8 +355,8 @@ void RunInterpreter(UnitState& state) { | |||||||
|                     src3[3] = src3[3] * float24::FromFloat32(-1); |                     src3[3] = src3[3] * float24::FromFloat32(-1); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers[instr.mad.dest.Value().GetIndex()][0] |                 float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0] | ||||||
|                             : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0] |                             : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] | ||||||
|                             : dummy_vec4_float24; |                             : dummy_vec4_float24; | ||||||
|  |  | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|   | |||||||
| @@ -106,7 +106,7 @@ static const X64Reg COND0 = R13; | |||||||
| /// Result of the previous CMP instruction for the Y-component comparison | /// Result of the previous CMP instruction for the Y-component comparison | ||||||
| static const X64Reg COND1 = R14; | static const X64Reg COND1 = R14; | ||||||
| /// Pointer to the UnitState instance for the current VS unit | /// Pointer to the UnitState instance for the current VS unit | ||||||
| static const X64Reg STATE = R15; | static const X64Reg REGISTERS = R15; | ||||||
| /// SIMD scratch register | /// SIMD scratch register | ||||||
| static const X64Reg SCRATCH = XMM0; | static const X64Reg SCRATCH = XMM0; | ||||||
| /// Loaded with the first swizzled source register, otherwise can be used as a scratch register | /// Loaded with the first swizzled source register, otherwise can be used as a scratch register | ||||||
| @@ -140,7 +140,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source | |||||||
|         src_ptr = UNIFORMS; |         src_ptr = UNIFORMS; | ||||||
|         src_offset = src_reg.GetIndex() * sizeof(float24) * 4; |         src_offset = src_reg.GetIndex() * sizeof(float24) * 4; | ||||||
|     } else { |     } else { | ||||||
|         src_ptr = STATE; |         src_ptr = REGISTERS; | ||||||
|         src_offset = UnitState::InputOffset(src_reg); |         src_offset = UnitState::InputOffset(src_reg); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -217,11 +217,11 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | |||||||
|     // If all components are enabled, write the result to the destination register |     // If all components are enabled, write the result to the destination register | ||||||
|     if (swiz.dest_mask == NO_DEST_REG_MASK) { |     if (swiz.dest_mask == NO_DEST_REG_MASK) { | ||||||
|         // Store dest back to memory |         // Store dest back to memory | ||||||
|         MOVAPS(MDisp(STATE, UnitState::OutputOffset(dest)), src); |         MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), src); | ||||||
|  |  | ||||||
|     } else { |     } else { | ||||||
|         // Not all components are enabled, so mask the result when storing to the destination register... |         // Not all components are enabled, so mask the result when storing to the destination register... | ||||||
|         MOVAPS(SCRATCH, MDisp(STATE, UnitState::OutputOffset(dest))); |         MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState::OutputOffset(dest))); | ||||||
|  |  | ||||||
|         if (Common::GetCPUCaps().sse4_1) { |         if (Common::GetCPUCaps().sse4_1) { | ||||||
|             u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); |             u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | ||||||
| @@ -240,7 +240,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Store dest back to memory |         // Store dest back to memory | ||||||
|         MOVAPS(MDisp(STATE, UnitState::OutputOffset(dest)), SCRATCH); |         MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), SCRATCH); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -635,7 +635,7 @@ CompiledShader* JitCompiler::Compile() { | |||||||
|  |  | ||||||
|     ABI_PushAllCalleeSavedRegsAndAdjustStack(); |     ABI_PushAllCalleeSavedRegsAndAdjustStack(); | ||||||
|  |  | ||||||
|     MOV(PTRBITS, R(STATE), R(ABI_PARAM1)); |     MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); | ||||||
|     MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); |     MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); | ||||||
|  |  | ||||||
|     // Zero address/loop  registers |     // Zero address/loop  registers | ||||||
|   | |||||||
| @@ -20,7 +20,7 @@ namespace Pica { | |||||||
|  |  | ||||||
| namespace Shader { | namespace Shader { | ||||||
|  |  | ||||||
| using CompiledShader = void(void* state); | using CompiledShader = void(void* registers); | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 |  * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 bunnei
					bunnei