shader_ir: Pass decoded nodes as a whole instead of per basic blocks
Some games call LDG at the top of a basic block, making the tracking heuristic to fail. This commit lets the heuristic the decoded nodes as a whole instead of per basic blocks. This may lead to some false positives but allows it the heuristic to track cases it previously couldn't.
This commit is contained in:
		| @@ -151,7 +151,7 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) { | ||||
|     UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | ||||
|                          "NeverExecute predicate not implemented"); | ||||
|  | ||||
|     static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, const BasicBlock&, u32)> | ||||
|     static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, u32)> | ||||
|         decoders = { | ||||
|             {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, | ||||
|             {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, | ||||
| @@ -181,9 +181,9 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) { | ||||
|  | ||||
|     std::vector<Node> tmp_block; | ||||
|     if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { | ||||
|         pc = (this->*decoder->second)(tmp_block, bb, pc); | ||||
|         pc = (this->*decoder->second)(tmp_block, pc); | ||||
|     } else { | ||||
|         pc = DecodeOther(tmp_block, bb, pc); | ||||
|         pc = DecodeOther(tmp_block, pc); | ||||
|     } | ||||
|  | ||||
|     // Some instructions (like SSY) don't have a predicate field, they are always unconditionally | ||||
| @@ -192,11 +192,14 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) { | ||||
|     const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||||
|  | ||||
|     if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) { | ||||
|         bb.push_back( | ||||
|             Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block))); | ||||
|         const Node conditional = | ||||
|             Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)); | ||||
|         global_code.push_back(conditional); | ||||
|         bb.push_back(conditional); | ||||
|     } else { | ||||
|         for (auto& node : tmp_block) { | ||||
|             bb.push_back(std::move(node)); | ||||
|             global_code.push_back(node); | ||||
|             bb.push_back(node); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
| using Tegra::Shader::SubOp; | ||||
|  | ||||
| u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
|  | ||||
| u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
|  | ||||
| u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
|  | ||||
| u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -15,7 +15,7 @@ using Tegra::Shader::OpCode; | ||||
| using Tegra::Shader::Pred; | ||||
| using Tegra::Shader::Register; | ||||
|  | ||||
| u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -16,7 +16,7 @@ using Tegra::Shader::Pred; | ||||
| using Tegra::Shader::PredicateResultMode; | ||||
| using Tegra::Shader::Register; | ||||
|  | ||||
| u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
|  | ||||
| u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeBfe(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
|  | ||||
| u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
| using Tegra::Shader::Register; | ||||
|  | ||||
| u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
|  | ||||
| u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
|  | ||||
| u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
| using Tegra::Shader::Pred; | ||||
|  | ||||
| u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -14,7 +14,7 @@ namespace VideoCommon::Shader { | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
|  | ||||
| u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
| using Tegra::Shader::Pred; | ||||
|  | ||||
| u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -16,7 +16,7 @@ using Tegra::Shader::HalfType; | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
|  | ||||
| u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeHfma2(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
|  | ||||
| u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
| using Tegra::Shader::Pred; | ||||
|  | ||||
| u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -36,7 +36,7 @@ static std::size_t GetCoordCount(TextureType texture_type) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
| @@ -137,7 +137,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
|         }(); | ||||
|  | ||||
|         const Node addr_register = GetRegister(instr.gpr8); | ||||
|         const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size())); | ||||
|         const Node base_address = | ||||
|             TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); | ||||
|         const auto cbuf = std::get_if<CbufNode>(base_address); | ||||
|         ASSERT(cbuf != nullptr); | ||||
|         const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); | ||||
|   | ||||
| @@ -14,7 +14,7 @@ using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
| using Tegra::Shader::Register; | ||||
|  | ||||
| u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
| using Tegra::Shader::Pred; | ||||
|  | ||||
| u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
|  | ||||
| u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
|  | ||||
| u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
|  | ||||
| u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeShift(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -15,7 +15,7 @@ using Tegra::Shader::Pred; | ||||
| using Tegra::Shader::VideoType; | ||||
| using Tegra::Shader::VmadShr; | ||||
|  | ||||
| u32 ShaderIR::DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
|  | ||||
| u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||||
| u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | ||||
|   | ||||
| @@ -591,31 +591,31 @@ private: | ||||
|      */ | ||||
|     u32 DecodeInstr(BasicBlock& bb, u32 pc); | ||||
|  | ||||
|     u32 DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||||
|     u32 DecodeArithmetic(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeArithmeticImmediate(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeBfe(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeBfi(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeShift(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeArithmeticInteger(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeArithmeticHalf(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeArithmeticHalfImmediate(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeFfma(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeHfma2(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeConversion(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeMemory(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeFloatSetPredicate(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeIntegerSetPredicate(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeHalfSetPredicate(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodePredicateSetRegister(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodePredicateSetPredicate(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeRegisterSetPredicate(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeFloatSet(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeIntegerSet(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeHalfSet(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeVideo(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeXmad(BasicBlock& bb, u32 pc); | ||||
|     u32 DecodeOther(BasicBlock& bb, u32 pc); | ||||
|  | ||||
|     /// Internalizes node's data and returns a managed pointer to a clone of that node | ||||
|     Node StoreNode(NodeData&& node_data); | ||||
| @@ -804,6 +804,7 @@ private: | ||||
|     std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; | ||||
|  | ||||
|     std::map<u32, BasicBlock> basic_blocks; | ||||
|     BasicBlock global_code; | ||||
|  | ||||
|     std::vector<std::unique_ptr<NodeData>> stored_nodes; | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 ReinUsesLisp
					ReinUsesLisp