vulkan: Build pipelines in parallel at runtime

Wait from the worker thread for a pipeline to build before binding it to the command buffer. This allows queueing pipelines to multiple threads.
2021-04-01 01:36:22 -03:00
parent f1dd743731
commit 2fc698b040
9 changed files with 197 additions and 165 deletions
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -27,8 +27,9 @@ DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& inf

 ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
                                 VKUpdateDescriptorQueue& update_descriptor_queue_,
-                                 const Shader::Info& info_, vk::ShaderModule spv_module_)
-    : update_descriptor_queue{&update_descriptor_queue_}, info{info_},
+                                 Common::ThreadWorker* thread_worker, const Shader::Info& info_,
+                                 vk::ShaderModule spv_module_)
+    : update_descriptor_queue{update_descriptor_queue_}, info{info_},
      spv_module(std::move(spv_module_)) {
    DescriptorLayoutTuple tuple{CreateLayout(device, info)};
    descriptor_set_layout = std::move(tuple.descriptor_set_layout);
@@ -36,46 +37,55 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip
    descriptor_update_template = std::move(tuple.descriptor_update_template);
    descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout);

-    const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
-        .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
-        .pNext = nullptr,
-        .requiredSubgroupSize = GuestWarpSize,
-    };
-    pipeline = device.GetLogical().CreateComputePipeline({
-        .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-        .pNext = nullptr,
-        .flags = 0,
-        .stage{
-            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-            .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
+    auto func{[this, &device] {
+        const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
+            .pNext = nullptr,
+            .requiredSubgroupSize = GuestWarpSize,
+        };
+        pipeline = device.GetLogical().CreateComputePipeline({
+            .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+            .pNext = nullptr,
            .flags = 0,
-            .stage = VK_SHADER_STAGE_COMPUTE_BIT,
-            .module = *spv_module,
-            .pName = "main",
-            .pSpecializationInfo = nullptr,
-        },
-        .layout = *pipeline_layout,
-        .basePipelineHandle = 0,
-        .basePipelineIndex = 0,
-    });
+            .stage{
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
+                .flags = 0,
+                .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+                .module = *spv_module,
+                .pName = "main",
+                .pSpecializationInfo = nullptr,
+            },
+            .layout = *pipeline_layout,
+            .basePipelineHandle = 0,
+            .basePipelineIndex = 0,
+        });
+        building_flag.test_and_set();
+        building_flag.notify_all();
+    }};
+    if (thread_worker) {
+        thread_worker->QueueWork(std::move(func));
+    } else {
+        func();
+    }
 }

-void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) {
+void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
+                                Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler,
+                                BufferCache& buffer_cache, TextureCache& texture_cache) {
+    update_descriptor_queue.Acquire();
+
    buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask);
    buffer_cache.UnbindComputeStorageBuffers();
-    size_t index{};
+    size_t ssbo_index{};
    for (const auto& desc : info.storage_buffers_descriptors) {
        ASSERT(desc.count == 1);
-        buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true);
-        ++index;
+        buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, true);
+        ++ssbo_index;
    }
    buffer_cache.UpdateComputeBuffers();
    buffer_cache.BindHostComputeBuffers();
-}

-void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute,
-                                            Tegra::MemoryManager& gpu_memory,
-                                            TextureCache& texture_cache) {
    texture_cache.SynchronizeComputeDescriptors();

    static constexpr size_t max_elements = 64;
@@ -103,15 +113,26 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple
    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
    texture_cache.FillComputeImageViews(indices_span, image_view_ids);

-    size_t index{};
+    size_t image_index{};
    PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache,
-                         *update_descriptor_queue, index);
-}
+                         update_descriptor_queue, image_index);

-VkDescriptorSet ComputePipeline::UpdateDescriptorSet() {
+    if (!building_flag.test()) {
+        // Wait for the pipeline to be built
+        scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); });
+    }
+    scheduler.Record([this](vk::CommandBuffer cmdbuf) {
+        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
+    });
+    if (!descriptor_set_layout) {
+        return;
+    }
    const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
-    update_descriptor_queue->Send(*descriptor_update_template, descriptor_set);
-    return descriptor_set;
+    update_descriptor_queue.Send(*descriptor_update_template, descriptor_set);
+    scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) {
+        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
+                                  descriptor_set, nullptr);
+    });
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -4,7 +4,10 @@

 #pragma once

+#include <atomic>
+
 #include "common/common_types.h"
+#include "common/thread_worker.h"
 #include "shader_recompiler/shader_info.h"
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
@@ -16,36 +19,26 @@
 namespace Vulkan {

 class Device;
+class VKScheduler;

 class ComputePipeline {
 public:
-    explicit ComputePipeline() = default;
    explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
                             VKUpdateDescriptorQueue& update_descriptor_queue,
-                             const Shader::Info& info, vk::ShaderModule spv_module);
+                             Common::ThreadWorker* thread_worker, const Shader::Info& info,
+                             vk::ShaderModule spv_module);

-    ComputePipeline& operator=(ComputePipeline&&) noexcept = default;
-    ComputePipeline(ComputePipeline&&) noexcept = default;
+    ComputePipeline& operator=(ComputePipeline&&) noexcept = delete;
+    ComputePipeline(ComputePipeline&&) noexcept = delete;

    ComputePipeline& operator=(const ComputePipeline&) = delete;
    ComputePipeline(const ComputePipeline&) = delete;

-    void ConfigureBufferCache(BufferCache& buffer_cache);
-    void ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute,
-                               Tegra::MemoryManager& gpu_memory, TextureCache& texture_cache);
-
-    [[nodiscard]] VkDescriptorSet UpdateDescriptorSet();
-
-    [[nodiscard]] VkPipeline Handle() const noexcept {
-        return *pipeline;
-    }
-
-    [[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept {
-        return *pipeline_layout;
-    }
+    void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory,
+                   VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache);

 private:
-    VKUpdateDescriptorQueue* update_descriptor_queue;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
    Shader::Info info;

    vk::ShaderModule spv_module;
@@ -54,6 +47,7 @@ private:
    vk::PipelineLayout pipeline_layout;
    vk::DescriptorUpdateTemplateKHR descriptor_update_template;
    vk::Pipeline pipeline;
+    std::atomic_flag building_flag{};
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -112,13 +112,15 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_,
                                   BufferCache& buffer_cache_, TextureCache& texture_cache_,
                                   const Device& device, VKDescriptorPool& descriptor_pool,
                                   VKUpdateDescriptorQueue& update_descriptor_queue_,
+                                   Common::ThreadWorker* worker_thread,
                                   RenderPassCache& render_pass_cache,
-                                   const FixedPipelineState& state,
+                                   const FixedPipelineState& state_,
                                   std::array<vk::ShaderModule, NUM_STAGES> stages,
                                   const std::array<const Shader::Info*, NUM_STAGES>& infos)
-    : maxwell3d{&maxwell3d_}, gpu_memory{&gpu_memory_}, texture_cache{&texture_cache_},
-      buffer_cache{&buffer_cache_}, scheduler{&scheduler_},
-      update_descriptor_queue{&update_descriptor_queue_}, spv_modules{std::move(stages)} {
+    : maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_},
+      buffer_cache{buffer_cache_}, scheduler{scheduler_},
+      update_descriptor_queue{update_descriptor_queue_}, state{state_}, spv_modules{
+                                                                            std::move(stages)} {
    std::ranges::transform(infos, stage_infos.begin(),
                           [](const Shader::Info* info) { return info ? *info : Shader::Info{}; });

@@ -128,8 +130,17 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_,
    descriptor_update_template = std::move(tuple.descriptor_update_template);
    descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout);

-    const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))};
-    MakePipeline(device, state, render_pass);
+    auto func{[this, &device, &render_pass_cache] {
+        const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))};
+        MakePipeline(device, render_pass);
+        building_flag.test_and_set();
+        building_flag.notify_all();
+    }};
+    if (worker_thread) {
+        worker_thread->QueueWork(std::move(func));
+    } else {
+        func();
+    }
 }

 void GraphicsPipeline::Configure(bool is_indexed) {
@@ -138,67 +149,72 @@ void GraphicsPipeline::Configure(bool is_indexed) {
    static_vector<u32, max_images_elements> image_view_indices;
    static_vector<VkSampler, max_images_elements> samplers;

-    texture_cache->SynchronizeGraphicsDescriptors();
+    texture_cache.SynchronizeGraphicsDescriptors();

-    const auto& regs{maxwell3d->regs};
+    const auto& regs{maxwell3d.regs};
    const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
    for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
        const Shader::Info& info{stage_infos[stage]};
-        buffer_cache->SetEnabledUniformBuffers(stage, info.constant_buffer_mask);
-        buffer_cache->UnbindGraphicsStorageBuffers(stage);
+        buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask);
+        buffer_cache.UnbindGraphicsStorageBuffers(stage);
        size_t index{};
        for (const auto& desc : info.storage_buffers_descriptors) {
            ASSERT(desc.count == 1);
-            buffer_cache->BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset,
-                                                    true);
+            buffer_cache.BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset,
+                                                   true);
            ++index;
        }
-        const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers};
+        const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
        for (const auto& desc : info.texture_descriptors) {
            const u32 cbuf_index{desc.cbuf_index};
            const u32 cbuf_offset{desc.cbuf_offset};
            ASSERT(cbufs[cbuf_index].enabled);
            const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset};
-            const u32 raw_handle{gpu_memory->Read<u32>(addr)};
+            const u32 raw_handle{gpu_memory.Read<u32>(addr)};

            const TextureHandle handle(raw_handle, via_header_index);
            image_view_indices.push_back(handle.image);

-            Sampler* const sampler{texture_cache->GetGraphicsSampler(handle.sampler)};
+            Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)};
            samplers.push_back(sampler->Handle());
        }
    }
    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
-    buffer_cache->UpdateGraphicsBuffers(is_indexed);
-    texture_cache->FillGraphicsImageViews(indices_span, image_view_ids);
+    buffer_cache.UpdateGraphicsBuffers(is_indexed);
+    texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);

-    buffer_cache->BindHostGeometryBuffers(is_indexed);
+    buffer_cache.BindHostGeometryBuffers(is_indexed);

    size_t index{};
    for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
-        buffer_cache->BindHostStageBuffers(stage);
+        buffer_cache.BindHostStageBuffers(stage);
        PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(),
-                             *texture_cache, *update_descriptor_queue, index);
+                             texture_cache, update_descriptor_queue, index);
    }
-    texture_cache->UpdateRenderTargets(false);
-    scheduler->RequestRenderpass(texture_cache->GetFramebuffer());
-
-    scheduler->BindGraphicsPipeline(*pipeline);
+    texture_cache.UpdateRenderTargets(false);
+    scheduler.RequestRenderpass(texture_cache.GetFramebuffer());

+    if (!building_flag.test()) {
+        scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); });
+    }
+    if (scheduler.UpdateGraphicsPipeline(this)) {
+        scheduler.Record([this](vk::CommandBuffer cmdbuf) {
+            cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+        });
+    }
    if (!descriptor_set_layout) {
        return;
    }
    const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
-    update_descriptor_queue->Send(*descriptor_update_template, descriptor_set);
+    update_descriptor_queue.Send(*descriptor_update_template, descriptor_set);

-    scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) {
        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
                                  nullptr);
    });
 }

-void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineState& state,
-                                    VkRenderPass render_pass) {
+void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) {
    FixedPipelineState::DynamicState dynamic{};
    if (!device.IsExtExtendedDynamicStateSupported()) {
        dynamic = state.dynamic_state;
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -5,13 +5,15 @@
 #pragma once

 #include <array>
+#include <atomic>

+#include "common/thread_worker.h"
 #include "shader_recompiler/shader_info.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
-#include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"

 namespace Vulkan {
@@ -25,34 +27,34 @@ class GraphicsPipeline {
    static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;

 public:
-    explicit GraphicsPipeline() = default;
    explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d,
                              Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler,
-                              BufferCache& buffer_cache,
-                              TextureCache& texture_cache, const Device& device, VKDescriptorPool& descriptor_pool,
+                              BufferCache& buffer_cache, TextureCache& texture_cache,
+                              const Device& device, VKDescriptorPool& descriptor_pool,
                              VKUpdateDescriptorQueue& update_descriptor_queue,
+                              Common::ThreadWorker* worker_thread,
                              RenderPassCache& render_pass_cache, const FixedPipelineState& state,
                              std::array<vk::ShaderModule, NUM_STAGES> stages,
                              const std::array<const Shader::Info*, NUM_STAGES>& infos);

    void Configure(bool is_indexed);

-    GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = default;
-    GraphicsPipeline(GraphicsPipeline&&) noexcept = default;
+    GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
+    GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;

    GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
    GraphicsPipeline(const GraphicsPipeline&) = delete;

 private:
-    void MakePipeline(const Device& device, const FixedPipelineState& state,
-                      VkRenderPass render_pass);
+    void MakePipeline(const Device& device, VkRenderPass render_pass);

-    Tegra::Engines::Maxwell3D* maxwell3d{};
-    Tegra::MemoryManager* gpu_memory{};
-    TextureCache* texture_cache{};
-    BufferCache* buffer_cache{};
-    VKScheduler* scheduler{};
-    VKUpdateDescriptorQueue* update_descriptor_queue{};
+    Tegra::Engines::Maxwell3D& maxwell3d;
+    Tegra::MemoryManager& gpu_memory;
+    TextureCache& texture_cache;
+    BufferCache& buffer_cache;
+    VKScheduler& scheduler;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+    const FixedPipelineState state;

    std::array<vk::ShaderModule, NUM_STAGES> spv_modules;
    std::array<Shader::Info, NUM_STAGES> stage_infos;
@@ -61,6 +63,7 @@ private:
    vk::PipelineLayout pipeline_layout;
    vk::DescriptorUpdateTemplateKHR descriptor_update_template;
    vk::Pipeline pipeline;
+    std::atomic_flag building_flag{};
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -518,9 +518,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
    }
    pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id);

-    Common::ThreadWorker worker(11, "PipelineBuilder");
-    std::mutex cache_mutex;
    struct {
+        std::mutex mutex;
        size_t total{0};
        size_t built{0};
        bool has_loaded{false};
@@ -542,51 +541,53 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
        }
        u32 num_envs{};
        file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs));
-        auto envs{std::make_shared<std::vector<FileEnvironment>>(num_envs)};
-        for (FileEnvironment& env : *envs) {
+        std::vector<FileEnvironment> envs(num_envs);
+        for (FileEnvironment& env : envs) {
            env.Deserialize(file);
        }
-        if (envs->front().ShaderStage() == Shader::Stage::Compute) {
+        if (envs.front().ShaderStage() == Shader::Stage::Compute) {
            ComputePipelineCacheKey key;
            file.read(reinterpret_cast<char*>(&key), sizeof(key));

-            worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] {
+            workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
                ShaderPools pools;
-                ComputePipeline pipeline{CreateComputePipeline(pools, key, envs->front())};
+                auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)};

-                std::lock_guard lock{cache_mutex};
+                std::lock_guard lock{state.mutex};
                compute_cache.emplace(key, std::move(pipeline));
+                ++state.built;
                if (state.has_loaded) {
-                    callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total);
+                    callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
                }
            });
        } else {
            GraphicsPipelineCacheKey key;
            file.read(reinterpret_cast<char*>(&key), sizeof(key));

-            worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] {
+            workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
                ShaderPools pools;
                boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
-                for (auto& env : *envs) {
+                for (auto& env : envs) {
                    env_ptrs.push_back(&env);
                }
-                GraphicsPipeline pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs))};
+                auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};

-                std::lock_guard lock{cache_mutex};
+                std::lock_guard lock{state.mutex};
                graphics_cache.emplace(key, std::move(pipeline));
+                ++state.built;
                if (state.has_loaded) {
-                    callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total);
+                    callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
                }
            });
        }
        ++state.total;
    }
    {
-        std::lock_guard lock{cache_mutex};
+        std::lock_guard lock{state.mutex};
        callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
        state.has_loaded = true;
    }
-    worker.WaitForRequests();
+    workers.WaitForRequests();
 }

 size_t ComputePipelineCacheKey::Hash() const noexcept {
@@ -619,7 +620,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
      kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
      scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
      update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
-      buffer_cache{buffer_cache_}, texture_cache{texture_cache_} {
+      buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, workers(11, "PipelineBuilder") {
    const auto& float_control{device.FloatControlProperties()};
    const VkDriverIdKHR driver_id{device.GetDriverID()};
    base_profile = Shader::Profile{
@@ -662,10 +663,10 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
    const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
    auto& pipeline{pair->second};
    if (!is_new) {
-        return &pipeline;
+        return pipeline.get();
    }
    pipeline = CreateGraphicsPipeline();
-    return &pipeline;
+    return pipeline.get();
 }

 ComputePipeline* PipelineCache::CurrentComputePipeline() {
@@ -691,10 +692,10 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
    const auto [pair, is_new]{compute_cache.try_emplace(key)};
    auto& pipeline{pair->second};
    if (!is_new) {
-        return &pipeline;
+        return pipeline.get();
    }
    pipeline = CreateComputePipeline(key, shader);
-    return &pipeline;
+    return pipeline.get();
 }

 bool PipelineCache::RefreshStages() {
@@ -743,9 +744,9 @@ const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr c
    return result;
 }

-GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools,
-                                                       const GraphicsPipelineCacheKey& key,
-                                                       std::span<Shader::Environment* const> envs) {
+std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
+    ShaderPools& pools, const GraphicsPipelineCacheKey& key,
+    std::span<Shader::Environment* const> envs, bool build_in_parallel) {
    LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
    size_t env_index{0};
    std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
@@ -783,12 +784,14 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools,
            modules[stage_index].SetObjectNameEXT(name.c_str());
        }
    }
-    return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device,
-                            descriptor_pool, update_descriptor_queue, render_pass_cache, key.state,
-                            std::move(modules), infos);
+    Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
+    return std::make_unique<GraphicsPipeline>(
+        maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool,
+        update_descriptor_queue, thread_worker, render_pass_cache, key.state, std::move(modules),
+        infos);
 }

-GraphicsPipeline PipelineCache::CreateGraphicsPipeline() {
+std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
    main_pools.ReleaseContents();

    std::array<GraphicsEnvironment, Maxwell::MaxShaderProgram> graphics_envs;
@@ -809,22 +812,22 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline() {
        generic_envs.push_back(&env);
        envs.push_back(&env);
    }
-    GraphicsPipeline pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs))};
+    auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)};
    if (!pipeline_cache_filename.empty()) {
        SerializePipeline(graphics_key, generic_envs, pipeline_cache_filename);
    }
    return pipeline;
 }

-ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheKey& key,
-                                                     const ShaderInfo* shader) {
+std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
+    const ComputePipelineCacheKey& key, const ShaderInfo* shader) {
    const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
    const auto& qmd{kepler_compute.launch_description};
    ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
    env.SetCachedSize(shader->size_bytes);

    main_pools.ReleaseContents();
-    ComputePipeline pipeline{CreateComputePipeline(main_pools, key, env)};
+    auto pipeline{CreateComputePipeline(main_pools, key, env, true)};
    if (!pipeline_cache_filename.empty()) {
        SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env},
                          pipeline_cache_filename);
@@ -832,9 +835,9 @@ ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheK
    return pipeline;
 }

-ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools,
-                                                     const ComputePipelineCacheKey& key,
-                                                     Shader::Environment& env) const {
+std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
+    ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
+    bool build_in_parallel) {
    LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());

    Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
@@ -846,8 +849,9 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools,
        const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])};
        spv_module.SetObjectNameEXT(name.c_str());
    }
-    return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info,
-                           std::move(spv_module)};
+    Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
+    return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue,
+                                             thread_worker, program.info, std::move(spv_module));
 }

 static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) {
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -14,6 +14,7 @@
 #include <vector>

 #include "common/common_types.h"
+#include "common/thread_worker.h"
 #include "shader_recompiler/frontend/ir/basic_block.h"
 #include "shader_recompiler/frontend/ir/microinstruction.h"
 #include "shader_recompiler/frontend/maxwell/control_flow.h"
@@ -145,16 +146,19 @@ private:

    const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr);

-    GraphicsPipeline CreateGraphicsPipeline();
+    std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();

-    GraphicsPipeline CreateGraphicsPipeline(ShaderPools& pools, const GraphicsPipelineCacheKey& key,
-                                            std::span<Shader::Environment* const> envs);
+    std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
+        ShaderPools& pools, const GraphicsPipelineCacheKey& key,
+        std::span<Shader::Environment* const> envs, bool build_in_parallel);

-    ComputePipeline CreateComputePipeline(const ComputePipelineCacheKey& key,
-                                          const ShaderInfo* shader);
+    std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key,
+                                                           const ShaderInfo* shader);

-    ComputePipeline CreateComputePipeline(ShaderPools& pools, const ComputePipelineCacheKey& key,
-                                          Shader::Environment& env) const;
+    std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools,
+                                                           const ComputePipelineCacheKey& key,
+                                                           Shader::Environment& env,
+                                                           bool build_in_parallel);

    Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage);

@@ -174,13 +178,15 @@ private:
    GraphicsPipelineCacheKey graphics_key{};
    std::array<const ShaderInfo*, 6> shader_infos{};

-    std::unordered_map<ComputePipelineCacheKey, ComputePipeline> compute_cache;
-    std::unordered_map<GraphicsPipelineCacheKey, GraphicsPipeline> graphics_cache;
+    std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
+    std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;

    ShaderPools main_pools;

    Shader::Profile base_profile;
    std::string pipeline_cache_filename;
+
+    Common::ThreadWorker workers;
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -276,22 +276,11 @@ void RasterizerVulkan::DispatchCompute() {
        return;
    }
    std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
-    update_descriptor_queue.Acquire();
-    pipeline->ConfigureBufferCache(buffer_cache);
-    pipeline->ConfigureTextureCache(kepler_compute, gpu_memory, texture_cache);
-    const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()};
+    pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache);

    const auto& qmd{kepler_compute.launch_description};
    const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
-    const VkPipeline pipeline_handle{pipeline->Handle()};
-    const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()};
-    scheduler.Record(
-        [pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) {
-            cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
-            cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0,
-                                      descriptor_set, nullptr);
-            cmdbuf.Dispatch(dim[0], dim[1], dim[2]);
-        });
+    scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
 }

 void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -124,18 +124,16 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() {
    EndRenderPass();
 }

-void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) {
+bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
    if (state.graphics_pipeline == pipeline) {
-        return;
+        return false;
    }
    state.graphics_pipeline = pipeline;
-    Record([pipeline](vk::CommandBuffer cmdbuf) {
-        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
-    });
+    return true;
 }

 void VKScheduler::WorkerThread() {
-    Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
+    Common::SetCurrentThreadName("yuzu:VulkanWorker");
    std::unique_lock lock{mutex};
    do {
        cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -22,6 +22,7 @@ namespace Vulkan {
 class CommandPool;
 class Device;
 class Framebuffer;
+class GraphicsPipeline;
 class StateTracker;
 class VKQueryCache;

@@ -52,8 +53,8 @@ public:
    /// of a renderpass.
    void RequestOutsideRenderPassOperationContext();

-    /// Binds a pipeline to the current execution context.
-    void BindGraphicsPipeline(VkPipeline pipeline);
+    /// Update the pipeline to the current execution context.
+    bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);

    /// Invalidates current command buffer state except for render passes
    void InvalidateState();
@@ -170,7 +171,7 @@ private:
        VkRenderPass renderpass = nullptr;
        VkFramebuffer framebuffer = nullptr;
        VkExtent2D render_area = {0, 0};
-        VkPipeline graphics_pipeline = nullptr;
+        GraphicsPipeline* graphics_pipeline = nullptr;
    };

    void WorkerThread();