yuzu/src/video_core/shader/async_shaders.cpp

// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.

#include <condition_variable>
#include <mutex>
#include <thread>
#include <vector>
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/shader/async_shaders.h"

namespace VideoCommon::Shader {

AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {}

AsyncShaders::~AsyncShaders() {
    KillWorkers();
}

void AsyncShaders::AllocateWorkers() {
    // Use at least one thread
    u32 num_workers = 1;

    // Deduce how many more threads we can use
    const u32 thread_count = std::thread::hardware_concurrency();
    if (thread_count >= 8) {
        // Increase async workers by 1 for every 2 threads >= 8
        num_workers += 1 + (thread_count - 8) / 2;
    }

    // If we already have workers queued, ignore
    if (num_workers == worker_threads.size()) {
        return;
    }

    // If workers already exist, clear them
    if (!worker_threads.empty()) {
        FreeWorkers();
    }

    // Create workers
    for (std::size_t i = 0; i < num_workers; i++) {
        context_list.push_back(emu_window.CreateSharedContext());
        worker_threads.push_back(
            std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()));
    }
}

void AsyncShaders::FreeWorkers() {
    // Mark all threads to quit
    is_thread_exiting.store(true);
    cv.notify_all();
    for (auto& thread : worker_threads) {
        thread.join();
    }
    // Clear our shared contexts
    context_list.clear();

    // Clear our worker threads
    worker_threads.clear();
}

void AsyncShaders::KillWorkers() {
    is_thread_exiting.store(true);
    for (auto& thread : worker_threads) {
        thread.detach();
    }
    // Clear our shared contexts
    context_list.clear();

    // Clear our worker threads
    worker_threads.clear();
}

bool AsyncShaders::HasWorkQueued() const {
    return !pending_queue.empty();
}

bool AsyncShaders::HasCompletedWork() const {
    std::shared_lock lock{completed_mutex};
    return !finished_work.empty();
}

bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
    const auto& regs = gpu.Maxwell3D().regs;

    // If something is using depth, we can assume that games are not rendering anything which will
    // be used one time.
    if (regs.zeta_enable) {
        return true;
    }

    // If games are using a small index count, we can assume these are full screen quads. Usually
    // these shaders are only used once for building textures so we can assume they can't be built
    // async
    if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
        return false;
    }

    return true;
}

std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
    std::vector<Result> results;
    {
        std::unique_lock lock{completed_mutex};
        results = std::move(finished_work);
        finished_work.clear();
    }
    return results;
}

void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
                                     Tegra::Engines::ShaderType shader_type, u64 uid,
                                     std::vector<u64> code, std::vector<u64> code_b,
                                     u32 main_offset, CompilerSettings compiler_settings,
                                     const Registry& registry, VAddr cpu_addr) {
    std::unique_lock lock(queue_mutex);
    pending_queue.push({
        .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
        .device = &device,
        .shader_type = shader_type,
        .uid = uid,
        .code = std::move(code),
        .code_b = std::move(code_b),
        .main_offset = main_offset,
        .compiler_settings = compiler_settings,
        .registry = registry,
        .cpu_address = cpu_addr,
    });
    cv.notify_one();
}

void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
                                     const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,
                                     Vulkan::VKDescriptorPool& descriptor_pool,
                                     Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
                                     Vulkan::VKRenderPassCache& renderpass_cache,
                                     std::vector<VkDescriptorSetLayoutBinding> bindings,
                                     Vulkan::SPIRVProgram program,
                                     Vulkan::GraphicsPipelineCacheKey key) {
    std::unique_lock lock(queue_mutex);
    pending_queue.push({
        .backend = Backend::Vulkan,
        .pp_cache = pp_cache,
        .vk_device = &device,
        .scheduler = &scheduler,
        .descriptor_pool = &descriptor_pool,
        .update_descriptor_queue = &update_descriptor_queue,
        .renderpass_cache = &renderpass_cache,
        .bindings = std::move(bindings),
        .program = std::move(program),
        .key = key,
    });
    cv.notify_one();
}

void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
    while (!is_thread_exiting.load(std::memory_order_relaxed)) {
        std::unique_lock lock{queue_mutex};
        cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
        if (is_thread_exiting) {
            return;
        }

        // Partial lock to allow all threads to read at the same time
        if (!HasWorkQueued()) {
            continue;
        }
        // Another thread beat us, just unlock and wait for the next load
        if (pending_queue.empty()) {
            continue;
        }

        // Pull work from queue
        WorkerParams work = std::move(pending_queue.front());
        pending_queue.pop();
        lock.unlock();

        if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
            const auto scope = context->Acquire();
            auto program =
                OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
            Result result{};
            result.backend = work.backend;
            result.cpu_address = work.cpu_address;
            result.uid = work.uid;
            result.code = std::move(work.code);
            result.code_b = std::move(work.code_b);
            result.shader_type = work.shader_type;

            if (work.backend == Backend::OpenGL) {
                result.program.opengl = std::move(program->source_program);
            } else if (work.backend == Backend::GLASM) {
                result.program.glasm = std::move(program->assembly_program);
            }

            {
                std::unique_lock complete_lock(completed_mutex);
                finished_work.push_back(std::move(result));
            }
        } else if (work.backend == Backend::Vulkan) {
            auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
                *work.vk_device, *work.scheduler, *work.descriptor_pool,
                *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings,
                work.program);

            work.pp_cache->EmplacePipeline(std::move(pipeline));
        }
    }
}

} // namespace VideoCommon::Shader
async shaders 2020-07-10 03:36:38 +00:00			`// Copyright 2020 yuzu Emulator Project`
			`// Licensed under GPLv2 or any later version`
			`// Refer to the license.txt file included.`

Fix style issues 2020-07-18 04:24:32 +00:00			`#include <condition_variable>`
			`#include <mutex>`
			`#include <thread>`
			`#include <vector>`
async shaders 2020-07-10 03:36:38 +00:00			`#include "video_core/engines/maxwell_3d.h"`
			`#include "video_core/renderer_base.h"`
			`#include "video_core/renderer_opengl/gl_shader_cache.h"`
			`#include "video_core/shader/async_shaders.h"`

			`namespace VideoCommon::Shader {`
Fix style issues 2020-07-18 04:24:32 +00:00
async shaders 2020-07-10 03:36:38 +00:00			`AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {}`
Fix style issues 2020-07-18 04:24:32 +00:00
async shaders 2020-07-10 03:36:38 +00:00			`AsyncShaders::~AsyncShaders() {`
			`KillWorkers();`
			`}`

move thread 1/4 count computation into allocate workers method 2020-08-05 16:53:26 +00:00			`void AsyncShaders::AllocateWorkers() {`
async_shaders: Increase Async worker thread count for 8+ thread cpus Adds 1 async worker thread for every 2 available threads above 8 2020-10-29 18:16:45 +00:00			`// Use at least one thread`
			`u32 num_workers = 1;`

			`// Deduce how many more threads we can use`
			`const u32 thread_count = std::thread::hardware_concurrency();`
			`if (thread_count >= 8) {`
			`// Increase async workers by 1 for every 2 threads >= 8`
			`num_workers += 1 + (thread_count - 8) / 2;`
			`}`
move thread 1/4 count computation into allocate workers method 2020-08-05 16:53:26 +00:00
Morph: Update worker allocation comment Co-authored-by: Morph <39850852+Morph1984@users.noreply.github.com> 2020-08-05 20:41:22 +00:00			`// If we already have workers queued, ignore`
move thread 1/4 count computation into allocate workers method 2020-08-05 16:53:26 +00:00			`if (num_workers == worker_threads.size()) {`
async shaders 2020-07-10 03:36:38 +00:00			`return;`
			`}`

			`// If workers already exist, clear them`
			`if (!worker_threads.empty()) {`
			`FreeWorkers();`
			`}`

			`// Create workers`
			`for (std::size_t i = 0; i < num_workers; i++) {`
			`context_list.push_back(emu_window.CreateSharedContext());`
async_shaders: Resolve -Wpessimizing-move warning Prevents pessimization of the move constructor (which thankfully didn't actually happen in practice here, given std::thread isn't copyable). 2020-08-14 12:16:03 +00:00			`worker_threads.push_back(`
			`std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()));`
async shaders 2020-07-10 03:36:38 +00:00			`}`
			`}`

			`void AsyncShaders::FreeWorkers() {`
			`// Mark all threads to quit`
			`is_thread_exiting.store(true);`
Remove duplicate config 2020-07-16 08:51:32 +00:00			`cv.notify_all();`
async shaders 2020-07-10 03:36:38 +00:00			`for (auto& thread : worker_threads) {`
			`thread.join();`
			`}`
			`// Clear our shared contexts`
			`context_list.clear();`

			`// Clear our worker threads`
			`worker_threads.clear();`
			`}`

			`void AsyncShaders::KillWorkers() {`
			`is_thread_exiting.store(true);`
			`for (auto& thread : worker_threads) {`
			`thread.detach();`
			`}`
			`// Clear our shared contexts`
			`context_list.clear();`

			`// Clear our worker threads`
			`worker_threads.clear();`
			`}`

async_shaders: Mark getters as const member functions While we're at it, we can also mark them as nodiscard. 2020-08-24 05:15:48 +00:00			`bool AsyncShaders::HasWorkQueued() const {`
async shaders 2020-07-10 03:36:38 +00:00			`return !pending_queue.empty();`
			`}`

async_shaders: Mark getters as const member functions While we're at it, we can also mark them as nodiscard. 2020-08-24 05:15:48 +00:00			`bool AsyncShaders::HasCompletedWork() const {`
Fix style issues 2020-07-18 04:24:32 +00:00			`std::shared_lock lock{completed_mutex};`
async shaders 2020-07-10 03:36:38 +00:00			`return !finished_work.empty();`
			`}`

			`bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {`
			`const auto& regs = gpu.Maxwell3D().regs;`

			`// If something is using depth, we can assume that games are not rendering anything which will`
			`// be used one time.`
			`if (regs.zeta_enable) {`
			`return true;`
			`}`

			`// If games are using a small index count, we can assume these are full screen quads. Usually`
			`// these shaders are only used once for building textures so we can assume they can't be built`
			`// async`
			`if (regs.index_array.count <= 6 \|\| regs.vertex_buffer.count <= 6) {`
			`return false;`
			`}`

			`return true;`
			`}`

			`std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {`
async_shaders: Mark getters as const member functions While we're at it, we can also mark them as nodiscard. 2020-08-24 05:15:48 +00:00			`std::vector<Result> results;`
async shaders 2020-07-10 03:36:38 +00:00			`{`
Fix style issues 2020-07-18 04:24:32 +00:00			`std::unique_lock lock{completed_mutex};`
async_shaders: Simplify implementation of GetCompletedWork() This is equivalent to moving all the contents and then clearing the vector. This avoids a redundant allocation. 2020-11-20 09:44:42 +00:00			`results = std::move(finished_work);`
async shaders 2020-07-10 03:36:38 +00:00			`finished_work.clear();`
			`}`
			`return results;`
			`}`

			`void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,`
			`Tegra::Engines::ShaderType shader_type, u64 uid,`
			`std::vector<u64> code, std::vector<u64> code_b,`
async_shaders: Simplify moving data into the pending queue 2020-11-20 09:41:27 +00:00			`u32 main_offset, CompilerSettings compiler_settings,`
			`const Registry& registry, VAddr cpu_addr) {`
			`std::unique_lock lock(queue_mutex);`
			`pending_queue.push({`
Address feedback, add shader compile notifier, update setting text 2020-08-02 17:05:41 +00:00			`.backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,`
			`.device = &device,`
			`.shader_type = shader_type,`
			`.uid = uid,`
			`.code = std::move(code),`
			`.code_b = std::move(code_b),`
			`.main_offset = main_offset,`
			`.compiler_settings = compiler_settings,`
Remove unneeded newlines, optional Registry in shader params Addressing feedback from Rodrigo 2020-08-16 20:33:21 +00:00			`.registry = registry,`
Address feedback, add shader compile notifier, update setting text 2020-08-02 17:05:41 +00:00			`.cpu_address = cpu_addr,`
async_shaders: Simplify moving data into the pending queue 2020-11-20 09:41:27 +00:00			`});`
Vk Async pipeline compilation 2020-07-28 04:08:02 +00:00			`cv.notify_one();`
			`}`

Address feedback, add shader compile notifier, update setting text 2020-08-02 17:05:41 +00:00			`void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,`
			`const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,`
			`Vulkan::VKDescriptorPool& descriptor_pool,`
			`Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,`
			`Vulkan::VKRenderPassCache& renderpass_cache,`
			`std::vector<VkDescriptorSetLayoutBinding> bindings,`
			`Vulkan::SPIRVProgram program,`
			`Vulkan::GraphicsPipelineCacheKey key) {`
async_shaders: Simplify moving data into the pending queue 2020-11-20 09:41:27 +00:00			`std::unique_lock lock(queue_mutex);`
			`pending_queue.push({`
Address feedback, add shader compile notifier, update setting text 2020-08-02 17:05:41 +00:00			`.backend = Backend::Vulkan,`
			`.pp_cache = pp_cache,`
			`.vk_device = &device,`
			`.scheduler = &scheduler,`
			`.descriptor_pool = &descriptor_pool,`
			`.update_descriptor_queue = &update_descriptor_queue,`
			`.renderpass_cache = &renderpass_cache,`
async_shaders: std::move data within QueueVulkanShader() Same behavior, but avoids redundant copies. While we're at it, we can simplify the pushing of the parameters into the pending queue. 2020-11-20 09:34:02 +00:00			`.bindings = std::move(bindings),`
			`.program = std::move(program),`
Address feedback, add shader compile notifier, update setting text 2020-08-02 17:05:41 +00:00			`.key = key,`
async_shaders: Simplify moving data into the pending queue 2020-11-20 09:41:27 +00:00			`});`
Use conditional var 2020-07-16 08:38:35 +00:00			`cv.notify_one();`
async shaders 2020-07-10 03:36:38 +00:00			`}`

			`void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {`
			`while (!is_thread_exiting.load(std::memory_order_relaxed)) {`
Fix style issues 2020-07-18 04:24:32 +00:00			`std::unique_lock lock{queue_mutex};`
			`cv.wait(lock, [this] { return HasWorkQueued() \|\| is_thread_exiting; });`
Use conditional var 2020-07-16 08:38:35 +00:00			`if (is_thread_exiting) {`
			`return;`
			`}`

async shaders 2020-07-10 03:36:38 +00:00			`// Partial lock to allow all threads to read at the same time`
			`if (!HasWorkQueued()) {`
			`continue;`
			`}`
			`// Another thread beat us, just unlock and wait for the next load`
			`if (pending_queue.empty()) {`
			`continue;`
			`}`
Use conditional var 2020-07-16 08:38:35 +00:00
async shaders 2020-07-10 03:36:38 +00:00			`// Pull work from queue`
			`WorkerParams work = std::move(pending_queue.front());`
Address feedback. Bruteforce delete duplicates 2020-07-30 19:41:11 +00:00			`pending_queue.pop();`
Use conditional var 2020-07-16 08:38:35 +00:00			`lock.unlock();`
async shaders 2020-07-10 03:36:38 +00:00
Address feedback, add shader compile notifier, update setting text 2020-08-02 17:05:41 +00:00			`if (work.backend == Backend::OpenGL \|\| work.backend == Backend::GLASM) {`
Remove unneeded newlines, optional Registry in shader params Addressing feedback from Rodrigo 2020-08-16 20:33:21 +00:00			`const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);`
async shaders 2020-07-10 03:36:38 +00:00			`const auto scope = context->Acquire();`
			`auto program =`
Remove unneeded newlines, optional Registry in shader params Addressing feedback from Rodrigo 2020-08-16 20:33:21 +00:00			`OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry);`
async shaders 2020-07-10 03:36:38 +00:00			`Result result{};`
			`result.backend = work.backend;`
			`result.cpu_address = work.cpu_address;`
			`result.uid = work.uid;`
			`result.code = std::move(work.code);`
			`result.code_b = std::move(work.code_b);`
			`result.shader_type = work.shader_type;`

Address feedback, add shader compile notifier, update setting text 2020-08-02 17:05:41 +00:00			`if (work.backend == Backend::OpenGL) {`
async shaders 2020-07-10 03:36:38 +00:00			`result.program.opengl = std::move(program->source_program);`
Address feedback, add shader compile notifier, update setting text 2020-08-02 17:05:41 +00:00			`} else if (work.backend == Backend::GLASM) {`
async shaders 2020-07-10 03:36:38 +00:00			`result.program.glasm = std::move(program->assembly_program);`
			`}`

			`{`
			`std::unique_lock complete_lock(completed_mutex);`
			`finished_work.push_back(std::move(result));`
			`}`
Address feedback, add shader compile notifier, update setting text 2020-08-02 17:05:41 +00:00			`} else if (work.backend == Backend::Vulkan) {`
Address feedback. Bruteforce delete duplicates 2020-07-30 19:41:11 +00:00			`auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(`
Address feedback, add shader compile notifier, update setting text 2020-08-02 17:05:41 +00:00			`work.vk_device, work.scheduler, *work.descriptor_pool,`
			`work.update_descriptor_queue, work.renderpass_cache, work.key, work.bindings,`
			`work.program);`
Vk Async pipeline compilation 2020-07-28 04:08:02 +00:00
Address feedback, add shader compile notifier, update setting text 2020-08-02 17:05:41 +00:00			`work.pp_cache->EmplacePipeline(std::move(pipeline));`
async shaders 2020-07-10 03:36:38 +00:00			`}`
			`}`
			`}`

			`} // namespace VideoCommon::Shader`