VideoCore/Shader: Split interpreter and JIT into separate ShaderEngines
This commit is contained in:
		| @@ -50,9 +50,11 @@ set(HEADERS | |||||||
|  |  | ||||||
| if(ARCHITECTURE_x86_64) | if(ARCHITECTURE_x86_64) | ||||||
|     set(SRCS ${SRCS} |     set(SRCS ${SRCS} | ||||||
|  |             shader/shader_jit_x64.cpp | ||||||
|             shader/shader_jit_x64_compiler.cpp) |             shader/shader_jit_x64_compiler.cpp) | ||||||
|  |  | ||||||
|     set(HEADERS ${HEADERS} |     set(HEADERS ${HEADERS} | ||||||
|  |             shader/shader_jit_x64.h | ||||||
|             shader/shader_jit_x64_compiler.h) |             shader/shader_jit_x64_compiler.h) | ||||||
| endif() | endif() | ||||||
|  |  | ||||||
|   | |||||||
| @@ -499,7 +499,7 @@ void Init() { | |||||||
| } | } | ||||||
|  |  | ||||||
| void Shutdown() { | void Shutdown() { | ||||||
|     Shader::ClearCache(); |     Shader::Shutdown(); | ||||||
| } | } | ||||||
|  |  | ||||||
| template <typename T> | template <typename T> | ||||||
|   | |||||||
| @@ -2,14 +2,8 @@ | |||||||
| // Licensed under GPLv2 or any later version | // Licensed under GPLv2 or any later version | ||||||
| // Refer to the license.txt file included. | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
| #include <atomic> |  | ||||||
| #include <cmath> | #include <cmath> | ||||||
| #include <cstring> | #include <cstring> | ||||||
| #include <unordered_map> |  | ||||||
| #include <utility> |  | ||||||
| #include <boost/range/algorithm/fill.hpp> |  | ||||||
| #include "common/bit_field.h" |  | ||||||
| #include "common/hash.h" |  | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
| #include "video_core/pica.h" | #include "video_core/pica.h" | ||||||
| @@ -17,7 +11,7 @@ | |||||||
| #include "video_core/shader/shader.h" | #include "video_core/shader/shader.h" | ||||||
| #include "video_core/shader/shader_interpreter.h" | #include "video_core/shader/shader_interpreter.h" | ||||||
| #ifdef ARCHITECTURE_x86_64 | #ifdef ARCHITECTURE_x86_64 | ||||||
| #include "video_core/shader/shader_jit_x64_compiler.h" | #include "video_core/shader/shader_jit_x64.h" | ||||||
| #endif // ARCHITECTURE_x86_64 | #endif // ARCHITECTURE_x86_64 | ||||||
| #include "video_core/video_core.h" | #include "video_core/video_core.h" | ||||||
|  |  | ||||||
| @@ -87,91 +81,31 @@ void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) { | |||||||
|     conditional_code[1] = false; |     conditional_code[1] = false; | ||||||
| } | } | ||||||
|  |  | ||||||
| class MergedShaderEngine : public ShaderEngine { |  | ||||||
| public: |  | ||||||
|     void SetupBatch(const ShaderSetup* setup) override; |  | ||||||
|     void Run(UnitState& state, unsigned int entry_point) const override; |  | ||||||
|     DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, |  | ||||||
|         unsigned int entry_point) const override; |  | ||||||
|  |  | ||||||
| private: |  | ||||||
|     const ShaderSetup* setup = nullptr; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| #ifdef ARCHITECTURE_x86_64 |  | ||||||
| static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map; |  | ||||||
| static const JitShader* jit_shader; |  | ||||||
| #endif // ARCHITECTURE_x86_64 |  | ||||||
|  |  | ||||||
| void ClearCache() { |  | ||||||
| #ifdef ARCHITECTURE_x86_64 |  | ||||||
|     shader_map.clear(); |  | ||||||
| #endif // ARCHITECTURE_x86_64 |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void MergedShaderEngine::SetupBatch(const ShaderSetup* setup_) { |  | ||||||
|     setup = setup_; |  | ||||||
|     if (setup == nullptr) |  | ||||||
|         return; |  | ||||||
|  |  | ||||||
| #ifdef ARCHITECTURE_x86_64 |  | ||||||
|     if (VideoCore::g_shader_jit_enabled) { |  | ||||||
|         u64 code_hash = Common::ComputeHash64(&setup->program_code, sizeof(setup->program_code)); |  | ||||||
|         u64 swizzle_hash = Common::ComputeHash64(&setup->swizzle_data, sizeof(setup->swizzle_data)); |  | ||||||
|  |  | ||||||
|         u64 cache_key = code_hash ^ swizzle_hash; |  | ||||||
|         auto iter = shader_map.find(cache_key); |  | ||||||
|         if (iter != shader_map.end()) { |  | ||||||
|             jit_shader = iter->second.get(); |  | ||||||
|         } else { |  | ||||||
|             auto shader = std::make_unique<JitShader>(); |  | ||||||
|             shader->Compile(); |  | ||||||
|             jit_shader = shader.get(); |  | ||||||
|             shader_map[cache_key] = std::move(shader); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| #endif // ARCHITECTURE_x86_64 |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | ||||||
|  |  | ||||||
| void MergedShaderEngine::Run(UnitState& state, unsigned int entry_point) const { |  | ||||||
|     ASSERT(setup != nullptr); |  | ||||||
|     ASSERT(entry_point < 1024); |  | ||||||
|  |  | ||||||
|     MICROPROFILE_SCOPE(GPU_Shader); |  | ||||||
|  |  | ||||||
| #ifdef ARCHITECTURE_x86_64 | #ifdef ARCHITECTURE_x86_64 | ||||||
|     if (VideoCore::g_shader_jit_enabled) { | static std::unique_ptr<JitX64Engine> jit_engine; | ||||||
|         jit_shader->Run(*setup, state, entry_point); |  | ||||||
|     } else { |  | ||||||
|         DebugData<false> dummy_debug_data; |  | ||||||
|         RunInterpreter(*setup, state, dummy_debug_data, entry_point); |  | ||||||
|     } |  | ||||||
| #else |  | ||||||
|     DebugData<false> dummy_debug_data; |  | ||||||
|     RunInterpreter(*setup, state, dummy_debug_data, entry_point); |  | ||||||
| #endif // ARCHITECTURE_x86_64 | #endif // ARCHITECTURE_x86_64 | ||||||
| } | static InterpreterEngine interpreter_engine; | ||||||
|  |  | ||||||
| DebugData<true> MergedShaderEngine::ProduceDebugInfo(const InputVertex& input, int num_attributes, |  | ||||||
|                                                      unsigned int entry_point) const { |  | ||||||
|     ASSERT(setup != nullptr); |  | ||||||
|     ASSERT(entry_point < 1024); |  | ||||||
|  |  | ||||||
|     UnitState state; |  | ||||||
|     DebugData<true> debug_data; |  | ||||||
|  |  | ||||||
|     // Setup input register table |  | ||||||
|     boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero())); |  | ||||||
|     state.LoadInputVertex(input, num_attributes); |  | ||||||
|     RunInterpreter(*setup, state, debug_data, entry_point); |  | ||||||
|     return debug_data; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| ShaderEngine* GetEngine() { | ShaderEngine* GetEngine() { | ||||||
|     static MergedShaderEngine merged_engine; | #ifdef ARCHITECTURE_x86_64 | ||||||
|     return &merged_engine; |     // TODO(yuriks): Re-initialize on each change rather than being persistent | ||||||
|  |     if (VideoCore::g_shader_jit_enabled) { | ||||||
|  |         if (jit_engine == nullptr) { | ||||||
|  |             jit_engine = std::make_unique<JitX64Engine>(); | ||||||
|  |         } | ||||||
|  |         return jit_engine.get(); | ||||||
|  |     } | ||||||
|  | #endif // ARCHITECTURE_x86_64 | ||||||
|  |  | ||||||
|  |     return &interpreter_engine; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void Shutdown() { | ||||||
|  | #ifdef ARCHITECTURE_x86_64 | ||||||
|  |     jit_engine = nullptr; | ||||||
|  | #endif // ARCHITECTURE_x86_64 | ||||||
| } | } | ||||||
|  |  | ||||||
| } // namespace Shader | } // namespace Shader | ||||||
|   | |||||||
| @@ -6,7 +6,6 @@ | |||||||
|  |  | ||||||
| #include <array> | #include <array> | ||||||
| #include <cstddef> | #include <cstddef> | ||||||
| #include <memory> |  | ||||||
| #include <type_traits> | #include <type_traits> | ||||||
| #include <nihstro/shader_bytecode.h> | #include <nihstro/shader_bytecode.h> | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| @@ -152,9 +151,6 @@ struct UnitState { | |||||||
|     void LoadInputVertex(const InputVertex& input, int num_attributes); |     void LoadInputVertex(const InputVertex& input, int num_attributes); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| /// Clears the shader cache |  | ||||||
| void ClearCache(); |  | ||||||
|  |  | ||||||
| struct ShaderSetup { | struct ShaderSetup { | ||||||
|     struct { |     struct { | ||||||
|         // The float uniforms are accessed by the shader JIT using SSE instructions, and are |         // The float uniforms are accessed by the shader JIT using SSE instructions, and are | ||||||
| @@ -210,6 +206,7 @@ public: | |||||||
|  |  | ||||||
| // TODO(yuriks): Remove and make it non-global state somewhere | // TODO(yuriks): Remove and make it non-global state somewhere | ||||||
| ShaderEngine* GetEngine(); | ShaderEngine* GetEngine(); | ||||||
|  | void Shutdown(); | ||||||
|  |  | ||||||
| } // namespace Shader | } // namespace Shader | ||||||
|  |  | ||||||
|   | |||||||
| @@ -7,10 +7,12 @@ | |||||||
| #include <cmath> | #include <cmath> | ||||||
| #include <numeric> | #include <numeric> | ||||||
| #include <boost/container/static_vector.hpp> | #include <boost/container/static_vector.hpp> | ||||||
|  | #include <boost/range/algorithm/fill.hpp> | ||||||
| #include <nihstro/shader_bytecode.h> | #include <nihstro/shader_bytecode.h> | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
|  | #include "common/microprofile.h" | ||||||
| #include "common/vector_math.h" | #include "common/vector_math.h" | ||||||
| #include "video_core/pica_state.h" | #include "video_core/pica_state.h" | ||||||
| #include "video_core/pica_types.h" | #include "video_core/pica_types.h" | ||||||
| @@ -37,8 +39,8 @@ struct CallStackElement { | |||||||
| }; | }; | ||||||
|  |  | ||||||
| template <bool Debug> | template <bool Debug> | ||||||
| void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data, | static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data, | ||||||
|                     unsigned offset) { |                            unsigned offset) { | ||||||
|     // TODO: Is there a maximal size for this? |     // TODO: Is there a maximal size for this? | ||||||
|     boost::container::static_vector<CallStackElement, 16> call_stack; |     boost::container::static_vector<CallStackElement, 16> call_stack; | ||||||
|     u32 program_counter = offset; |     u32 program_counter = offset; | ||||||
| @@ -647,9 +649,36 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug> | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| // Explicit instantiation | void InterpreterEngine::SetupBatch(const ShaderSetup* setup_) { | ||||||
| template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<false>&, unsigned offset); |     setup = setup_; | ||||||
| template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<true>&, unsigned offset); | } | ||||||
|  |  | ||||||
|  | MICROPROFILE_DECLARE(GPU_Shader); | ||||||
|  |  | ||||||
|  | void InterpreterEngine::Run(UnitState& state, unsigned int entry_point) const { | ||||||
|  |     ASSERT(setup != nullptr); | ||||||
|  |     ASSERT(entry_point < 1024); | ||||||
|  |  | ||||||
|  |     MICROPROFILE_SCOPE(GPU_Shader); | ||||||
|  |  | ||||||
|  |     DebugData<false> dummy_debug_data; | ||||||
|  |     RunInterpreter(*setup, state, dummy_debug_data, entry_point); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | DebugData<true> InterpreterEngine::ProduceDebugInfo(const InputVertex& input, int num_attributes, | ||||||
|  |                                                     unsigned int entry_point) const { | ||||||
|  |     ASSERT(setup != nullptr); | ||||||
|  |     ASSERT(entry_point < 1024); | ||||||
|  |  | ||||||
|  |     UnitState state; | ||||||
|  |     DebugData<true> debug_data; | ||||||
|  |  | ||||||
|  |     // Setup input register table | ||||||
|  |     boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero())); | ||||||
|  |     state.LoadInputVertex(input, num_attributes); | ||||||
|  |     RunInterpreter(*setup, state, debug_data, entry_point); | ||||||
|  |     return debug_data; | ||||||
|  | } | ||||||
|  |  | ||||||
| } // namespace | } // namespace | ||||||
|  |  | ||||||
|   | |||||||
| @@ -4,19 +4,22 @@ | |||||||
|  |  | ||||||
| #pragma once | #pragma once | ||||||
|  |  | ||||||
|  | #include "video_core/shader/shader.h" | ||||||
|  |  | ||||||
| namespace Pica { | namespace Pica { | ||||||
|  |  | ||||||
| namespace Shader { | namespace Shader { | ||||||
|  |  | ||||||
| struct ShaderSetup; | class InterpreterEngine final : public ShaderEngine { | ||||||
| struct UnitState; | public: | ||||||
|  |     void SetupBatch(const ShaderSetup* setup) override; | ||||||
|  |     void Run(UnitState& state, unsigned int entry_point) const override; | ||||||
|  |     DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, | ||||||
|  |                                      unsigned int entry_point) const override; | ||||||
|  |  | ||||||
| template <bool Debug> | private: | ||||||
| struct DebugData; |     const ShaderSetup* setup = nullptr; | ||||||
|  | }; | ||||||
| template <bool Debug> |  | ||||||
| void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data, |  | ||||||
|                     unsigned offset); |  | ||||||
|  |  | ||||||
| } // namespace | } // namespace | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										56
									
								
								src/video_core/shader/shader_jit_x64.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								src/video_core/shader/shader_jit_x64.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,56 @@ | |||||||
|  | // Copyright 2016 Citra Emulator Project | ||||||
|  | // Licensed under GPLv2 or any later version | ||||||
|  | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
|  | #include "common/hash.h" | ||||||
|  | #include "common/microprofile.h" | ||||||
|  | #include "video_core/shader/shader.h" | ||||||
|  | #include "video_core/shader/shader_jit_x64.h" | ||||||
|  | #include "video_core/shader/shader_jit_x64_compiler.h" | ||||||
|  |  | ||||||
|  | namespace Pica { | ||||||
|  | namespace Shader { | ||||||
|  |  | ||||||
|  | JitX64Engine::JitX64Engine() = default; | ||||||
|  | JitX64Engine::~JitX64Engine() = default; | ||||||
|  |  | ||||||
|  | void JitX64Engine::SetupBatch(const ShaderSetup* setup_) { | ||||||
|  |     cached_shader = nullptr; | ||||||
|  |     setup = setup_; | ||||||
|  |     if (setup == nullptr) | ||||||
|  |         return; | ||||||
|  |  | ||||||
|  |     u64 code_hash = Common::ComputeHash64(&setup->program_code, sizeof(setup->program_code)); | ||||||
|  |     u64 swizzle_hash = Common::ComputeHash64(&setup->swizzle_data, sizeof(setup->swizzle_data)); | ||||||
|  |  | ||||||
|  |     u64 cache_key = code_hash ^ swizzle_hash; | ||||||
|  |     auto iter = cache.find(cache_key); | ||||||
|  |     if (iter != cache.end()) { | ||||||
|  |         cached_shader = iter->second.get(); | ||||||
|  |     } else { | ||||||
|  |         auto shader = std::make_unique<JitShader>(); | ||||||
|  |         shader->Compile(); | ||||||
|  |         cached_shader = shader.get(); | ||||||
|  |         cache.emplace_hint(iter, cache_key, std::move(shader)); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | MICROPROFILE_DECLARE(GPU_Shader); | ||||||
|  |  | ||||||
|  | void JitX64Engine::Run(UnitState& state, unsigned int entry_point) const { | ||||||
|  |     ASSERT(setup != nullptr); | ||||||
|  |     ASSERT(cached_shader != nullptr); | ||||||
|  |     ASSERT(entry_point < 1024); | ||||||
|  |  | ||||||
|  |     MICROPROFILE_SCOPE(GPU_Shader); | ||||||
|  |  | ||||||
|  |     cached_shader->Run(*setup, state, entry_point); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | DebugData<true> JitX64Engine::ProduceDebugInfo(const InputVertex& input, int num_attributes, | ||||||
|  |                                                unsigned int entry_point) const { | ||||||
|  |     UNIMPLEMENTED_MSG("Shader tracing/debugging is not supported by the JIT."); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | } // namespace Shader | ||||||
|  | } // namespace Pica | ||||||
							
								
								
									
										35
									
								
								src/video_core/shader/shader_jit_x64.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								src/video_core/shader/shader_jit_x64.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,35 @@ | |||||||
|  | // Copyright 2016 Citra Emulator Project | ||||||
|  | // Licensed under GPLv2 or any later version | ||||||
|  | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #include <memory> | ||||||
|  | #include <unordered_map> | ||||||
|  | #include "common/common_types.h" | ||||||
|  | #include "video_core/shader/shader.h" | ||||||
|  |  | ||||||
|  | namespace Pica { | ||||||
|  | namespace Shader { | ||||||
|  |  | ||||||
|  | class JitShader; | ||||||
|  |  | ||||||
|  | class JitX64Engine final : public ShaderEngine { | ||||||
|  | public: | ||||||
|  |     JitX64Engine(); | ||||||
|  |     ~JitX64Engine() override; | ||||||
|  |  | ||||||
|  |     void SetupBatch(const ShaderSetup* setup) override; | ||||||
|  |     void Run(UnitState& state, unsigned int entry_point) const override; | ||||||
|  |     DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, | ||||||
|  |                                      unsigned int entry_point) const override; | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     const ShaderSetup* setup = nullptr; | ||||||
|  |  | ||||||
|  |     std::unordered_map<u64, std::unique_ptr<JitShader>> cache; | ||||||
|  |     const JitShader* cached_shader = nullptr; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | } // namespace Shader | ||||||
|  | } // namespace Pica | ||||||
		Reference in New Issue
	
	Block a user
	 Yuri Kunde Schlesner
					Yuri Kunde Schlesner