Merge pull request #344 from bunnei/shader-decompiler-p2
Shader decompiler changes part 2
This commit is contained in:
		| @@ -192,11 +192,6 @@ private: | ||||
|     static_assert(position < 8 * sizeof(T), "Invalid position"); | ||||
|     static_assert(bits <= 8 * sizeof(T), "Invalid number of bits"); | ||||
|     static_assert(bits > 0, "Invalid number of bits"); | ||||
|     static_assert(std::is_pod<T>::value, "Invalid base type"); | ||||
|     static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable in a BitField"); | ||||
| }; | ||||
| #pragma pack() | ||||
|  | ||||
| #if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) | ||||
| static_assert(std::is_trivially_copyable<BitField<0, 1, unsigned>>::value, | ||||
|               "BitField must be trivially copyable"); | ||||
| #endif | ||||
|   | ||||
| @@ -4,6 +4,7 @@ | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <cstring> | ||||
| #include <map> | ||||
| #include <string> | ||||
| #include "common/bit_field.h" | ||||
| @@ -12,14 +13,10 @@ namespace Tegra { | ||||
| namespace Shader { | ||||
|  | ||||
| struct Register { | ||||
|     Register() = default; | ||||
|     constexpr Register() = default; | ||||
|  | ||||
|     constexpr Register(u64 value) : value(value) {} | ||||
|  | ||||
|     constexpr u64 GetIndex() const { | ||||
|         return value; | ||||
|     } | ||||
|  | ||||
|     constexpr operator u64() const { | ||||
|         return value; | ||||
|     } | ||||
| @@ -43,13 +40,13 @@ struct Register { | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     u64 value; | ||||
|     u64 value{}; | ||||
| }; | ||||
|  | ||||
| union Attribute { | ||||
|     Attribute() = default; | ||||
|  | ||||
|     constexpr Attribute(u64 value) : value(value) {} | ||||
|     constexpr explicit Attribute(u64 value) : value(value) {} | ||||
|  | ||||
|     enum class Index : u64 { | ||||
|         Position = 7, | ||||
| @@ -68,7 +65,20 @@ union Attribute { | ||||
|     } fmt28; | ||||
|  | ||||
|     BitField<39, 8, u64> reg; | ||||
|     u64 value; | ||||
|     u64 value{}; | ||||
| }; | ||||
|  | ||||
| union Sampler { | ||||
|     Sampler() = default; | ||||
|  | ||||
|     constexpr explicit Sampler(u64 value) : value(value) {} | ||||
|  | ||||
|     enum class Index : u64 { | ||||
|         Sampler_0 = 8, | ||||
|     }; | ||||
|  | ||||
|     BitField<36, 13, Index> index; | ||||
|     u64 value{}; | ||||
| }; | ||||
|  | ||||
| union Uniform { | ||||
| @@ -238,7 +248,7 @@ union OpCode { | ||||
|     BitField<55, 9, Id> op3; | ||||
|     BitField<52, 12, Id> op4; | ||||
|     BitField<51, 13, Id> op5; | ||||
|     u64 value; | ||||
|     u64 value{}; | ||||
| }; | ||||
| static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size"); | ||||
|  | ||||
| @@ -280,6 +290,7 @@ enum class SubOp : u64 { | ||||
|     Lg2 = 0x3, | ||||
|     Rcp = 0x4, | ||||
|     Rsq = 0x5, | ||||
|     Min = 0x8, | ||||
| }; | ||||
|  | ||||
| union Instruction { | ||||
| @@ -295,15 +306,25 @@ union Instruction { | ||||
|     BitField<20, 8, Register> gpr20; | ||||
|     BitField<20, 7, SubOp> sub_op; | ||||
|     BitField<28, 8, Register> gpr28; | ||||
|     BitField<36, 13, u64> imm36; | ||||
|     BitField<39, 8, Register> gpr39; | ||||
|  | ||||
|     union { | ||||
|         BitField<20, 19, u64> imm20; | ||||
|         BitField<45, 1, u64> negate_b; | ||||
|         BitField<46, 1, u64> abs_a; | ||||
|         BitField<48, 1, u64> negate_a; | ||||
|         BitField<49, 1, u64> abs_b; | ||||
|         BitField<50, 1, u64> abs_d; | ||||
|         BitField<56, 1, u64> negate_imm; | ||||
|  | ||||
|         float GetImm20() const { | ||||
|             float result{}; | ||||
|             u32 imm{static_cast<u32>(imm20)}; | ||||
|             imm <<= 12; | ||||
|             imm |= negate_imm ? 0x80000000 : 0; | ||||
|             std::memcpy(&result, &imm, sizeof(imm)); | ||||
|             return result; | ||||
|         } | ||||
|     } alu; | ||||
|  | ||||
|     union { | ||||
| @@ -311,11 +332,13 @@ union Instruction { | ||||
|         BitField<49, 1, u64> negate_c; | ||||
|     } ffma; | ||||
|  | ||||
|     BitField<61, 1, u64> is_b_imm; | ||||
|     BitField<60, 1, u64> is_b_gpr; | ||||
|     BitField<59, 1, u64> is_c_gpr; | ||||
|  | ||||
|     Attribute attribute; | ||||
|     Uniform uniform; | ||||
|     Sampler sampler; | ||||
|  | ||||
|     u64 hex; | ||||
| }; | ||||
|   | ||||
| @@ -17,6 +17,7 @@ using Tegra::Shader::Attribute; | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
| using Tegra::Shader::Register; | ||||
| using Tegra::Shader::Sampler; | ||||
| using Tegra::Shader::SubOp; | ||||
| using Tegra::Shader::Uniform; | ||||
|  | ||||
| @@ -155,23 +156,27 @@ private: | ||||
|  | ||||
|     /// Generates code representing an input attribute register. | ||||
|     std::string GetInputAttribute(Attribute::Index attribute) { | ||||
|         declr_input_attribute.insert(attribute); | ||||
|         switch (attribute) { | ||||
|         case Attribute::Index::Position: | ||||
|             return "position"; | ||||
|         default: | ||||
|             const u32 index{static_cast<u32>(attribute) - | ||||
|                             static_cast<u32>(Attribute::Index::Attribute_0)}; | ||||
|             if (attribute >= Attribute::Index::Attribute_0) { | ||||
|                 declr_input_attribute.insert(attribute); | ||||
|                 return "input_attribute_" + std::to_string(index); | ||||
|             } | ||||
|  | ||||
|         const u32 index{static_cast<u32>(attribute) - | ||||
|                         static_cast<u32>(Attribute::Index::Attribute_0)}; | ||||
|         if (attribute >= Attribute::Index::Attribute_0) { | ||||
|             return "input_attribute_" + std::to_string(index); | ||||
|             NGLOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index); | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
|  | ||||
|         LOG_CRITICAL(HW_GPU, "Unhandled input attribute: 0x%02x", index); | ||||
|         UNREACHABLE(); | ||||
|     } | ||||
|  | ||||
|     /// Generates code representing an output attribute register. | ||||
|     std::string GetOutputAttribute(Attribute::Index attribute) { | ||||
|         switch (attribute) { | ||||
|         case Attribute::Index::Position: | ||||
|             return "gl_Position"; | ||||
|             return "position"; | ||||
|         default: | ||||
|             const u32 index{static_cast<u32>(attribute) - | ||||
|                             static_cast<u32>(Attribute::Index::Attribute_0)}; | ||||
| @@ -180,22 +185,42 @@ private: | ||||
|                 return "output_attribute_" + std::to_string(index); | ||||
|             } | ||||
|  | ||||
|             LOG_CRITICAL(HW_GPU, "Unhandled output attribute: 0x%02x", index); | ||||
|             NGLOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index); | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Generates code representing an immediate value | ||||
|     static std::string GetImmediate(const Instruction& instr) { | ||||
|         return std::to_string(instr.alu.GetImm20()); | ||||
|     } | ||||
|  | ||||
|     /// Generates code representing a temporary (GPR) register. | ||||
|     std::string GetRegister(const Register& reg) { | ||||
|         return *declr_register.insert("register_" + std::to_string(reg)).first; | ||||
|     std::string GetRegister(const Register& reg, unsigned elem = 0) { | ||||
|         if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg < 4) { | ||||
|             // GPRs 0-3 are output color for the fragment shader | ||||
|             return std::string{"color."} + "rgba"[(reg + elem) & 3]; | ||||
|         } | ||||
|  | ||||
|         return *declr_register.insert("register_" + std::to_string(reg + elem)).first; | ||||
|     } | ||||
|  | ||||
|     /// Generates code representing a uniform (C buffer) register. | ||||
|     std::string GetUniform(const Uniform& reg) { | ||||
|         declr_const_buffers[reg.index].MarkAsUsed(reg.index, reg.offset, stage); | ||||
|         declr_const_buffers[reg.index].MarkAsUsed(static_cast<unsigned>(reg.index), | ||||
|                                                   static_cast<unsigned>(reg.offset), stage); | ||||
|         return 'c' + std::to_string(reg.index) + '[' + std::to_string(reg.offset) + ']'; | ||||
|     } | ||||
|  | ||||
|     /// Generates code representing a texture sampler. | ||||
|     std::string GetSampler(const Sampler& sampler) const { | ||||
|         // TODO(Subv): Support more than just texture sampler 0 | ||||
|         ASSERT_MSG(sampler.index == Sampler::Index::Sampler_0, "unsupported"); | ||||
|         const unsigned index{static_cast<unsigned>(sampler.index.Value()) - | ||||
|                              static_cast<unsigned>(Sampler::Index::Sampler_0)}; | ||||
|         return "tex[" + std::to_string(index) + "]"; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Adds code that calls a subroutine. | ||||
|      * @param subroutine the subroutine to call. | ||||
| @@ -217,12 +242,13 @@ private: | ||||
|      * @param value the code representing the value to assign. | ||||
|      */ | ||||
|     void SetDest(u64 elem, const std::string& reg, const std::string& value, | ||||
|                  u64 dest_num_components, u64 value_num_components) { | ||||
|                  u64 dest_num_components, u64 value_num_components, bool is_abs = false) { | ||||
|         std::string swizzle = "."; | ||||
|         swizzle += "xyzw"[elem]; | ||||
|  | ||||
|         std::string dest = reg + (dest_num_components != 1 ? swizzle : ""); | ||||
|         std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : ""); | ||||
|         src = is_abs ? "abs(" + src + ")" : src; | ||||
|  | ||||
|         shader.AddLine(dest + " = " + src + ";"); | ||||
|     } | ||||
| @@ -240,8 +266,6 @@ private: | ||||
|  | ||||
|         switch (OpCode::GetInfo(instr.opcode).type) { | ||||
|         case OpCode::Type::Arithmetic: { | ||||
|             ASSERT(!instr.alu.abs_d); | ||||
|  | ||||
|             std::string dest = GetRegister(instr.gpr0); | ||||
|             std::string op_a = instr.alu.negate_a ? "-" : ""; | ||||
|             op_a += GetRegister(instr.gpr8); | ||||
| @@ -250,63 +274,109 @@ private: | ||||
|             } | ||||
|  | ||||
|             std::string op_b = instr.alu.negate_b ? "-" : ""; | ||||
|             if (instr.is_b_gpr) { | ||||
|                 op_b += GetRegister(instr.gpr20); | ||||
|  | ||||
|             if (instr.is_b_imm) { | ||||
|                 op_b += GetImmediate(instr); | ||||
|             } else { | ||||
|                 op_b += GetUniform(instr.uniform); | ||||
|                 if (instr.is_b_gpr) { | ||||
|                     op_b += GetRegister(instr.gpr20); | ||||
|                 } else { | ||||
|                     op_b += GetUniform(instr.uniform); | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             if (instr.alu.abs_b) { | ||||
|                 op_b = "abs(" + op_b + ")"; | ||||
|             } | ||||
|  | ||||
|             switch (instr.opcode.EffectiveOpCode()) { | ||||
|             case OpCode::Id::FMUL_C: | ||||
|             case OpCode::Id::FMUL_R: { | ||||
|                 SetDest(0, dest, op_a + " * " + op_b, 1, 1); | ||||
|             case OpCode::Id::FMUL_R: | ||||
|             case OpCode::Id::FMUL_IMM: { | ||||
|                 SetDest(0, dest, op_a + " * " + op_b, 1, 1, instr.alu.abs_d); | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::FADD_C: | ||||
|             case OpCode::Id::FADD_R: { | ||||
|                 SetDest(0, dest, op_a + " + " + op_b, 1, 1); | ||||
|             case OpCode::Id::FADD_R: | ||||
|             case OpCode::Id::FADD_IMM: { | ||||
|                 SetDest(0, dest, op_a + " + " + op_b, 1, 1, instr.alu.abs_d); | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::MUFU: { | ||||
|                 switch (instr.sub_op) { | ||||
|                 case SubOp::Cos: | ||||
|                     SetDest(0, dest, "cos(" + op_a + ")", 1, 1, instr.alu.abs_d); | ||||
|                     break; | ||||
|                 case SubOp::Sin: | ||||
|                     SetDest(0, dest, "sin(" + op_a + ")", 1, 1, instr.alu.abs_d); | ||||
|                     break; | ||||
|                 case SubOp::Ex2: | ||||
|                     SetDest(0, dest, "exp2(" + op_a + ")", 1, 1, instr.alu.abs_d); | ||||
|                     break; | ||||
|                 case SubOp::Lg2: | ||||
|                     SetDest(0, dest, "log2(" + op_a + ")", 1, 1, instr.alu.abs_d); | ||||
|                     break; | ||||
|                 case SubOp::Rcp: | ||||
|                     SetDest(0, dest, "1.0 / " + op_a, 1, 1, instr.alu.abs_d); | ||||
|                     break; | ||||
|                 case SubOp::Rsq: | ||||
|                     SetDest(0, dest, "inversesqrt(" + op_a + ")", 1, 1, instr.alu.abs_d); | ||||
|                     break; | ||||
|                 case SubOp::Min: | ||||
|                     SetDest(0, dest, "min(" + op_a + "," + op_b + ")", 1, 1, instr.alu.abs_d); | ||||
|                     break; | ||||
|                 default: | ||||
|                     NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {}", | ||||
|                                    static_cast<unsigned>(instr.sub_op.Value())); | ||||
|                     UNREACHABLE(); | ||||
|                 } | ||||
|                 break; | ||||
|             } | ||||
|             default: { | ||||
|                 LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", | ||||
|                              static_cast<unsigned>(instr.opcode.EffectiveOpCode()), | ||||
|                              OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); | ||||
|                 throw DecompileFail("Unhandled instruction"); | ||||
|                 break; | ||||
|                 NGLOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {} ({}): {}", | ||||
|                                static_cast<unsigned>(instr.opcode.EffectiveOpCode()), | ||||
|                                OpCode::GetInfo(instr.opcode).name, instr.hex); | ||||
|                 UNREACHABLE(); | ||||
|             } | ||||
|             } | ||||
|             break; | ||||
|         } | ||||
|         case OpCode::Type::Ffma: { | ||||
|             ASSERT_MSG(!instr.ffma.negate_b, "untested"); | ||||
|             ASSERT_MSG(!instr.ffma.negate_c, "untested"); | ||||
|  | ||||
|             std::string dest = GetRegister(instr.gpr0); | ||||
|             std::string op_a = GetRegister(instr.gpr8); | ||||
|  | ||||
|             std::string op_b = instr.ffma.negate_b ? "-" : ""; | ||||
|             op_b += GetUniform(instr.uniform); | ||||
|  | ||||
|             std::string op_c = instr.ffma.negate_c ? "-" : ""; | ||||
|             op_c += GetRegister(instr.gpr39); | ||||
|  | ||||
|             switch (instr.opcode.EffectiveOpCode()) { | ||||
|             case OpCode::Id::FFMA_CR: { | ||||
|                 SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1); | ||||
|                 op_b += GetUniform(instr.uniform); | ||||
|                 op_c += GetRegister(instr.gpr39); | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::FFMA_RR: { | ||||
|                 op_b += GetRegister(instr.gpr20); | ||||
|                 op_c += GetRegister(instr.gpr39); | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::FFMA_RC: { | ||||
|                 op_b += GetRegister(instr.gpr39); | ||||
|                 op_c += GetUniform(instr.uniform); | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::FFMA_IMM: { | ||||
|                 op_b += GetImmediate(instr); | ||||
|                 op_c += GetRegister(instr.gpr39); | ||||
|                 break; | ||||
|             } | ||||
|             default: { | ||||
|                 NGLOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {} ({}): {}", | ||||
|                                static_cast<unsigned>(instr.opcode.EffectiveOpCode()), | ||||
|                                OpCode::GetInfo(instr.opcode).name, instr.hex); | ||||
|                 UNREACHABLE(); | ||||
|             } | ||||
|             } | ||||
|  | ||||
|             default: { | ||||
|                 LOG_CRITICAL(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x", | ||||
|                              static_cast<unsigned>(instr.opcode.EffectiveOpCode()), | ||||
|                              OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); | ||||
|                 throw DecompileFail("Unhandled instruction"); | ||||
|                 break; | ||||
|             } | ||||
|             } | ||||
|             SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1); | ||||
|             break; | ||||
|         } | ||||
|         case OpCode::Type::Memory: { | ||||
| @@ -315,22 +385,33 @@ private: | ||||
|  | ||||
|             switch (instr.opcode.EffectiveOpCode()) { | ||||
|             case OpCode::Id::LD_A: { | ||||
|                 ASSERT(instr.attribute.fmt20.size == 0); | ||||
|                 ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); | ||||
|                 SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4); | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::ST_A: { | ||||
|                 ASSERT(instr.attribute.fmt20.size == 0); | ||||
|                 ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); | ||||
|                 SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1); | ||||
|                 break; | ||||
|             } | ||||
|             default: { | ||||
|                 LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x", | ||||
|                              static_cast<unsigned>(instr.opcode.EffectiveOpCode()), | ||||
|                              OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); | ||||
|                 throw DecompileFail("Unhandled instruction"); | ||||
|             case OpCode::Id::TEXS: { | ||||
|                 ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested"); | ||||
|                 const std::string op_a = GetRegister(instr.gpr8); | ||||
|                 const std::string op_b = GetRegister(instr.gpr20); | ||||
|                 const std::string sampler = GetSampler(instr.sampler); | ||||
|                 const std::string coord = "vec2(" + op_a + ", " + op_b + ")"; | ||||
|                 const std::string texture = "texture(" + sampler + ", " + coord + ")"; | ||||
|                 for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) { | ||||
|                     SetDest(elem, GetRegister(instr.gpr0, elem), texture, 1, 4); | ||||
|                 } | ||||
|                 break; | ||||
|             } | ||||
|             default: { | ||||
|                 NGLOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {} ({}): {}", | ||||
|                                static_cast<unsigned>(instr.opcode.EffectiveOpCode()), | ||||
|                                OpCode::GetInfo(instr.opcode).name, instr.hex); | ||||
|                 UNREACHABLE(); | ||||
|             } | ||||
|             } | ||||
|             break; | ||||
|         } | ||||
| @@ -342,14 +423,18 @@ private: | ||||
|                 offset = PROGRAM_END - 1; | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             default: { | ||||
|                 LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | ||||
|                              static_cast<unsigned>(instr.opcode.EffectiveOpCode()), | ||||
|                              OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); | ||||
|                 throw DecompileFail("Unhandled instruction"); | ||||
|             case OpCode::Id::IPA: { | ||||
|                 const auto& attribute = instr.attribute.fmt28; | ||||
|                 std::string dest = GetRegister(instr.gpr0); | ||||
|                 SetDest(attribute.element, dest, GetInputAttribute(attribute.index), 1, 4); | ||||
|                 break; | ||||
|             } | ||||
|             default: { | ||||
|                 NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {} ({}): {}", | ||||
|                                static_cast<unsigned>(instr.opcode.EffectiveOpCode()), | ||||
|                                OpCode::GetInfo(instr.opcode).name, instr.hex); | ||||
|                 UNREACHABLE(); | ||||
|             } | ||||
|             } | ||||
|  | ||||
|             break; | ||||
| @@ -514,7 +599,7 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, | ||||
|         GLSLGenerator generator(subroutines, program_code, main_offset, stage); | ||||
|         return ProgramResult{generator.GetShaderCode(), generator.GetEntries()}; | ||||
|     } catch (const DecompileFail& exception) { | ||||
|         LOG_ERROR(HW_GPU, "Shader decompilation failed: %s", exception.what()); | ||||
|         NGLOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what()); | ||||
|     } | ||||
|     return boost::none; | ||||
| } | ||||
|   | ||||
| @@ -27,10 +27,13 @@ out gl_PerVertex { | ||||
|     vec4 gl_Position; | ||||
| }; | ||||
|  | ||||
| out vec4 position; | ||||
|  | ||||
| void main() { | ||||
|     exec_shader(); | ||||
| } | ||||
|  | ||||
|     gl_Position = position; | ||||
| } | ||||
| )"; | ||||
|     out += program.first; | ||||
|     return {out, program.second}; | ||||
| @@ -46,6 +49,7 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo | ||||
|                                 .get_value_or({}); | ||||
|     out += R"( | ||||
|  | ||||
| in vec4 position; | ||||
| out vec4 color; | ||||
|  | ||||
| uniform sampler2D tex[32]; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 bunnei
					bunnei