Merge pull request #7019 from ameerj/videocore-jthread
videocore: Use std::jthread for worker threads
This commit is contained in:
		| @@ -14,7 +14,7 @@ | ||||
| #include <utility> | ||||
|  | ||||
| namespace Common { | ||||
| template <typename T> | ||||
| template <typename T, bool with_stop_token = false> | ||||
| class SPSCQueue { | ||||
| public: | ||||
|     SPSCQueue() { | ||||
| @@ -84,7 +84,7 @@ public: | ||||
|     void Wait() { | ||||
|         if (Empty()) { | ||||
|             std::unique_lock lock{cv_mutex}; | ||||
|             cv.wait(lock, [this]() { return !Empty(); }); | ||||
|             cv.wait(lock, [this] { return !Empty(); }); | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -95,6 +95,19 @@ public: | ||||
|         return t; | ||||
|     } | ||||
|  | ||||
|     T PopWait(std::stop_token stop_token) { | ||||
|         if (Empty()) { | ||||
|             std::unique_lock lock{cv_mutex}; | ||||
|             cv.wait(lock, stop_token, [this] { return !Empty(); }); | ||||
|         } | ||||
|         if (stop_token.stop_requested()) { | ||||
|             return T{}; | ||||
|         } | ||||
|         T t; | ||||
|         Pop(t); | ||||
|         return t; | ||||
|     } | ||||
|  | ||||
|     // not thread-safe | ||||
|     void Clear() { | ||||
|         size.store(0); | ||||
| @@ -123,13 +136,13 @@ private: | ||||
|     ElementPtr* read_ptr; | ||||
|     std::atomic_size_t size{0}; | ||||
|     std::mutex cv_mutex; | ||||
|     std::condition_variable cv; | ||||
|     std::conditional_t<with_stop_token, std::condition_variable_any, std::condition_variable> cv; | ||||
| }; | ||||
|  | ||||
| // a simple thread-safe, | ||||
| // single reader, multiple writer queue | ||||
|  | ||||
| template <typename T> | ||||
| template <typename T, bool with_stop_token = false> | ||||
| class MPSCQueue { | ||||
| public: | ||||
|     [[nodiscard]] std::size_t Size() const { | ||||
| @@ -166,13 +179,17 @@ public: | ||||
|         return spsc_queue.PopWait(); | ||||
|     } | ||||
|  | ||||
|     T PopWait(std::stop_token stop_token) { | ||||
|         return spsc_queue.PopWait(stop_token); | ||||
|     } | ||||
|  | ||||
|     // not thread-safe | ||||
|     void Clear() { | ||||
|         spsc_queue.Clear(); | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     SPSCQueue<T> spsc_queue; | ||||
|     SPSCQueue<T, with_stop_token> spsc_queue; | ||||
|     std::mutex write_lock; | ||||
| }; | ||||
| } // namespace Common | ||||
|   | ||||
| @@ -305,10 +305,7 @@ struct System::Impl { | ||||
|         is_powered_on = false; | ||||
|         exit_lock = false; | ||||
|  | ||||
|         if (gpu_core) { | ||||
|             gpu_core->ShutDown(); | ||||
|         } | ||||
|  | ||||
|         gpu_core.reset(); | ||||
|         services.reset(); | ||||
|         service_manager.reset(); | ||||
|         cheat_engine.reset(); | ||||
| @@ -317,7 +314,6 @@ struct System::Impl { | ||||
|         time_manager.Shutdown(); | ||||
|         core_timing.Shutdown(); | ||||
|         app_loader.reset(); | ||||
|         gpu_core.reset(); | ||||
|         perf_stats.reset(); | ||||
|         kernel.Shutdown(); | ||||
|         memory.Reset(); | ||||
|   | ||||
| @@ -531,14 +531,6 @@ void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { | ||||
|     interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | ||||
| } | ||||
|  | ||||
| void GPU::ShutDown() { | ||||
|     // Signal that threads should no longer block on syncpoint fences | ||||
|     shutting_down.store(true, std::memory_order_relaxed); | ||||
|     sync_cv.notify_all(); | ||||
|  | ||||
|     gpu_thread.ShutDown(); | ||||
| } | ||||
|  | ||||
| void GPU::OnCommandListEnd() { | ||||
|     if (is_async) { | ||||
|         // This command only applies to asynchronous GPU mode | ||||
|   | ||||
| @@ -219,9 +219,6 @@ public: | ||||
|         return *shader_notify; | ||||
|     } | ||||
|  | ||||
|     // Stops the GPU execution and waits for the GPU to finish working | ||||
|     void ShutDown(); | ||||
|  | ||||
|     /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | ||||
|     void WaitFence(u32 syncpoint_id, u32 value); | ||||
|  | ||||
|   | ||||
| @@ -17,9 +17,9 @@ | ||||
| namespace VideoCommon::GPUThread { | ||||
|  | ||||
| /// Runs the GPU thread | ||||
| static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | ||||
|                       Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, | ||||
|                       SynchState& state) { | ||||
| static void RunThread(std::stop_token stop_token, Core::System& system, | ||||
|                       VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, | ||||
|                       Tegra::DmaPusher& dma_pusher, SynchState& state) { | ||||
|     std::string name = "yuzu:GPU"; | ||||
|     MicroProfileOnThreadCreate(name.c_str()); | ||||
|     SCOPE_EXIT({ MicroProfileOnThreadExit(); }); | ||||
| @@ -28,20 +28,14 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | ||||
|     Common::SetCurrentThreadPriority(Common::ThreadPriority::High); | ||||
|     system.RegisterHostThread(); | ||||
|  | ||||
|     // Wait for first GPU command before acquiring the window context | ||||
|     state.queue.Wait(); | ||||
|  | ||||
|     // If emulation was stopped during disk shader loading, abort before trying to acquire context | ||||
|     if (!state.is_running) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     auto current_context = context.Acquire(); | ||||
|     VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer(); | ||||
|  | ||||
|     CommandDataContainer next; | ||||
|     while (state.is_running) { | ||||
|         next = state.queue.PopWait(); | ||||
|     while (!stop_token.stop_requested()) { | ||||
|         CommandDataContainer next = state.queue.PopWait(stop_token); | ||||
|         if (stop_token.stop_requested()) { | ||||
|             break; | ||||
|         } | ||||
|         if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) { | ||||
|             dma_pusher.Push(std::move(submit_list->entries)); | ||||
|             dma_pusher.DispatchCalls(); | ||||
| @@ -55,8 +49,6 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | ||||
|             rasterizer->FlushRegion(flush->addr, flush->size); | ||||
|         } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { | ||||
|             rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); | ||||
|         } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { | ||||
|             ASSERT(state.is_running == false); | ||||
|         } else { | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
| @@ -73,16 +65,14 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | ||||
| ThreadManager::ThreadManager(Core::System& system_, bool is_async_) | ||||
|     : system{system_}, is_async{is_async_} {} | ||||
|  | ||||
| ThreadManager::~ThreadManager() { | ||||
|     ShutDown(); | ||||
| } | ||||
| ThreadManager::~ThreadManager() = default; | ||||
|  | ||||
| void ThreadManager::StartThread(VideoCore::RendererBase& renderer, | ||||
|                                 Core::Frontend::GraphicsContext& context, | ||||
|                                 Tegra::DmaPusher& dma_pusher) { | ||||
|     rasterizer = renderer.ReadRasterizer(); | ||||
|     thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), | ||||
|                          std::ref(dma_pusher), std::ref(state)); | ||||
|     thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), | ||||
|                           std::ref(dma_pusher), std::ref(state)); | ||||
| } | ||||
|  | ||||
| void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | ||||
| @@ -117,26 +107,6 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
|     rasterizer->OnCPUWrite(addr, size); | ||||
| } | ||||
|  | ||||
| void ThreadManager::ShutDown() { | ||||
|     if (!state.is_running) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     { | ||||
|         std::lock_guard lk(state.write_lock); | ||||
|         state.is_running = false; | ||||
|         state.cv.notify_all(); | ||||
|     } | ||||
|  | ||||
|     if (!thread.joinable()) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     // Notify GPU thread that a shutdown is pending | ||||
|     PushCommand(EndProcessingCommand()); | ||||
|     thread.join(); | ||||
| } | ||||
|  | ||||
| void ThreadManager::OnCommandListEnd() { | ||||
|     PushCommand(OnCommandListEndCommand()); | ||||
| } | ||||
| @@ -152,9 +122,8 @@ u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { | ||||
|     state.queue.Push(CommandDataContainer(std::move(command_data), fence, block)); | ||||
|  | ||||
|     if (block) { | ||||
|         state.cv.wait(lk, [this, fence] { | ||||
|             return fence <= state.signaled_fence.load(std::memory_order_relaxed) || | ||||
|                    !state.is_running; | ||||
|         state.cv.wait(lk, thread.get_stop_token(), [this, fence] { | ||||
|             return fence <= state.signaled_fence.load(std::memory_order_relaxed); | ||||
|         }); | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -33,9 +33,6 @@ class RendererBase; | ||||
|  | ||||
| namespace VideoCommon::GPUThread { | ||||
|  | ||||
| /// Command to signal to the GPU thread that processing has ended | ||||
| struct EndProcessingCommand final {}; | ||||
|  | ||||
| /// Command to signal to the GPU thread that a command list is ready for processing | ||||
| struct SubmitListCommand final { | ||||
|     explicit SubmitListCommand(Tegra::CommandList&& entries_) : entries{std::move(entries_)} {} | ||||
| @@ -83,7 +80,7 @@ struct OnCommandListEndCommand final {}; | ||||
| struct GPUTickCommand final {}; | ||||
|  | ||||
| using CommandData = | ||||
|     std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, | ||||
|     std::variant<std::monostate, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, | ||||
|                  InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand, | ||||
|                  GPUTickCommand>; | ||||
|  | ||||
| @@ -100,14 +97,12 @@ struct CommandDataContainer { | ||||
|  | ||||
| /// Struct used to synchronize the GPU thread | ||||
| struct SynchState final { | ||||
|     std::atomic_bool is_running{true}; | ||||
|  | ||||
|     using CommandQueue = Common::SPSCQueue<CommandDataContainer>; | ||||
|     using CommandQueue = Common::SPSCQueue<CommandDataContainer, true>; | ||||
|     std::mutex write_lock; | ||||
|     CommandQueue queue; | ||||
|     u64 last_fence{}; | ||||
|     std::atomic<u64> signaled_fence{}; | ||||
|     std::condition_variable cv; | ||||
|     std::condition_variable_any cv; | ||||
| }; | ||||
|  | ||||
| /// Class used to manage the GPU thread | ||||
| @@ -149,7 +144,7 @@ private: | ||||
|     VideoCore::RasterizerInterface* rasterizer = nullptr; | ||||
|  | ||||
|     SynchState state; | ||||
|     std::thread thread; | ||||
|     std::jthread thread; | ||||
| }; | ||||
|  | ||||
| } // namespace VideoCommon::GPUThread | ||||
|   | ||||
| @@ -43,17 +43,10 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_) | ||||
|       command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { | ||||
|     AcquireNewChunk(); | ||||
|     AllocateWorkerCommandBuffer(); | ||||
|     worker_thread = std::thread(&VKScheduler::WorkerThread, this); | ||||
|     worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); }); | ||||
| } | ||||
|  | ||||
| VKScheduler::~VKScheduler() { | ||||
|     { | ||||
|         std::lock_guard lock{work_mutex}; | ||||
|         quit = true; | ||||
|     } | ||||
|     work_cv.notify_all(); | ||||
|     worker_thread.join(); | ||||
| } | ||||
| VKScheduler::~VKScheduler() = default; | ||||
|  | ||||
| void VKScheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { | ||||
|     SubmitExecution(signal_semaphore, wait_semaphore); | ||||
| @@ -135,7 +128,7 @@ bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { | ||||
|     return true; | ||||
| } | ||||
|  | ||||
| void VKScheduler::WorkerThread() { | ||||
| void VKScheduler::WorkerThread(std::stop_token stop_token) { | ||||
|     Common::SetCurrentThreadName("yuzu:VulkanWorker"); | ||||
|     do { | ||||
|         if (work_queue.empty()) { | ||||
| @@ -144,8 +137,8 @@ void VKScheduler::WorkerThread() { | ||||
|         std::unique_ptr<CommandChunk> work; | ||||
|         { | ||||
|             std::unique_lock lock{work_mutex}; | ||||
|             work_cv.wait(lock, [this] { return !work_queue.empty() || quit; }); | ||||
|             if (quit) { | ||||
|             work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); }); | ||||
|             if (stop_token.stop_requested()) { | ||||
|                 continue; | ||||
|             } | ||||
|             work = std::move(work_queue.front()); | ||||
| @@ -158,7 +151,7 @@ void VKScheduler::WorkerThread() { | ||||
|         } | ||||
|         std::lock_guard reserve_lock{reserve_mutex}; | ||||
|         chunk_reserve.push_back(std::move(work)); | ||||
|     } while (!quit); | ||||
|     } while (!stop_token.stop_requested()); | ||||
| } | ||||
|  | ||||
| void VKScheduler::AllocateWorkerCommandBuffer() { | ||||
|   | ||||
| @@ -187,7 +187,7 @@ private: | ||||
|         GraphicsPipeline* graphics_pipeline = nullptr; | ||||
|     }; | ||||
|  | ||||
|     void WorkerThread(); | ||||
|     void WorkerThread(std::stop_token stop_token); | ||||
|  | ||||
|     void AllocateWorkerCommandBuffer(); | ||||
|  | ||||
| @@ -212,7 +212,7 @@ private: | ||||
|     vk::CommandBuffer current_cmdbuf; | ||||
|  | ||||
|     std::unique_ptr<CommandChunk> chunk; | ||||
|     std::thread worker_thread; | ||||
|     std::jthread worker_thread; | ||||
|  | ||||
|     State state; | ||||
|  | ||||
| @@ -224,9 +224,8 @@ private: | ||||
|     std::vector<std::unique_ptr<CommandChunk>> chunk_reserve; | ||||
|     std::mutex reserve_mutex; | ||||
|     std::mutex work_mutex; | ||||
|     std::condition_variable work_cv; | ||||
|     std::condition_variable_any work_cv; | ||||
|     std::condition_variable wait_cv; | ||||
|     std::atomic_bool quit{}; | ||||
| }; | ||||
|  | ||||
| } // namespace Vulkan | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 bunnei
					bunnei