From 59549a2eb6171a5d09f3045506da8508e815c5f0 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Tue, 25 Jul 2023 22:16:41 +0300 Subject: [PATCH] renderer_vulkan: Add vulkan backend --- src/android/app/src/main/jni/native.cpp | 3 + .../configuration/configure_graphics.cpp | 1 + src/common/settings.cpp | 2 + src/video_core/CMakeLists.txt | 29 + .../rasterizer_cache/rasterizer_cache.h | 6 +- src/video_core/rasterizer_interface.h | 2 +- src/video_core/regs_framebuffer.h | 1 + src/video_core/renderer_base.h | 3 + .../renderer_opengl/gl_texture_mailbox.cpp | 1 + src/video_core/renderer_vulkan/pica_to_vk.h | 10 + .../renderer_vulkan/renderer_vulkan.cpp | 1098 ++++++++++++ .../renderer_vulkan/renderer_vulkan.h | 139 ++ .../renderer_vulkan/vk_blit_helper.cpp | 548 ++++++ .../renderer_vulkan/vk_blit_helper.h | 71 + src/video_core/renderer_vulkan/vk_common.h | 1 + .../renderer_vulkan/vk_descriptor_pool.cpp | 141 ++ .../renderer_vulkan/vk_descriptor_pool.h | 92 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 290 +++ .../renderer_vulkan/vk_graphics_pipeline.h | 192 ++ .../renderer_vulkan/vk_instance.cpp | 8 +- .../renderer_vulkan/vk_master_semaphore.cpp | 207 +++ .../renderer_vulkan/vk_master_semaphore.h | 107 ++ .../renderer_vulkan/vk_pipeline_cache.cpp | 519 ++++++ .../renderer_vulkan/vk_pipeline_cache.h | 123 ++ .../renderer_vulkan/vk_present_window.cpp | 514 ++++++ .../renderer_vulkan/vk_present_window.h | 101 ++ .../renderer_vulkan/vk_rasterizer.cpp | 1138 ++++++++++++ .../renderer_vulkan/vk_rasterizer.h | 171 ++ .../renderer_vulkan/vk_rasterizer_cache.cpp | 10 + .../renderer_vulkan/vk_renderpass_cache.cpp | 211 +++ .../renderer_vulkan/vk_renderpass_cache.h | 73 + .../renderer_vulkan/vk_resource_pool.cpp | 113 ++ .../renderer_vulkan/vk_resource_pool.h | 67 + .../renderer_vulkan/vk_scheduler.cpp | 203 +++ src/video_core/renderer_vulkan/vk_scheduler.h | 210 +++ .../renderer_vulkan/vk_shader_gen.cpp | 18 +- .../renderer_vulkan/vk_shader_gen_spv.cpp | 2 +- .../renderer_vulkan/vk_shader_util.cpp | 8 +- .../renderer_vulkan/vk_stream_buffer.cpp | 201 +++ .../renderer_vulkan/vk_stream_buffer.h | 86 + .../renderer_vulkan/vk_swapchain.cpp | 236 +++ src/video_core/renderer_vulkan/vk_swapchain.h | 110 ++ .../renderer_vulkan/vk_texture_runtime.cpp | 1570 +++++++++++++++++ .../renderer_vulkan/vk_texture_runtime.h | 295 ++++ src/video_core/video_core.cpp | 4 + 45 files changed, 8914 insertions(+), 21 deletions(-) create mode 100644 src/video_core/renderer_vulkan/renderer_vulkan.cpp create mode 100644 src/video_core/renderer_vulkan/renderer_vulkan.h create mode 100644 src/video_core/renderer_vulkan/vk_blit_helper.cpp create mode 100644 src/video_core/renderer_vulkan/vk_blit_helper.h create mode 100644 src/video_core/renderer_vulkan/vk_descriptor_pool.cpp create mode 100644 src/video_core/renderer_vulkan/vk_descriptor_pool.h create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.h create mode 100644 src/video_core/renderer_vulkan/vk_master_semaphore.cpp create mode 100644 src/video_core/renderer_vulkan/vk_master_semaphore.h create mode 100644 src/video_core/renderer_vulkan/vk_pipeline_cache.cpp create mode 100644 src/video_core/renderer_vulkan/vk_pipeline_cache.h create mode 100644 src/video_core/renderer_vulkan/vk_present_window.cpp create mode 100644 src/video_core/renderer_vulkan/vk_present_window.h create mode 100644 src/video_core/renderer_vulkan/vk_rasterizer.cpp create mode 100644 src/video_core/renderer_vulkan/vk_rasterizer.h create mode 100644 src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp create mode 100644 src/video_core/renderer_vulkan/vk_renderpass_cache.cpp create mode 100644 src/video_core/renderer_vulkan/vk_renderpass_cache.h create mode 100644 src/video_core/renderer_vulkan/vk_resource_pool.cpp create mode 100644 src/video_core/renderer_vulkan/vk_resource_pool.h create mode 100644 src/video_core/renderer_vulkan/vk_scheduler.cpp create mode 100644 src/video_core/renderer_vulkan/vk_scheduler.h create mode 100644 src/video_core/renderer_vulkan/vk_stream_buffer.cpp create mode 100644 src/video_core/renderer_vulkan/vk_stream_buffer.h create mode 100644 src/video_core/renderer_vulkan/vk_swapchain.cpp create mode 100644 src/video_core/renderer_vulkan/vk_swapchain.h create mode 100644 src/video_core/renderer_vulkan/vk_texture_runtime.cpp create mode 100644 src/video_core/renderer_vulkan/vk_texture_runtime.h diff --git a/src/android/app/src/main/jni/native.cpp b/src/android/app/src/main/jni/native.cpp index 72241baf2..cddc45923 100644 --- a/src/android/app/src/main/jni/native.cpp +++ b/src/android/app/src/main/jni/native.cpp @@ -242,6 +242,9 @@ void Java_org_citra_citra_1emu_NativeLibrary_SurfaceChanged(JNIEnv* env, if (window) { window->OnSurfaceChanged(s_surf); } + if (VideoCore::g_renderer) { + VideoCore::g_renderer->NotifySurfaceChanged(); + } LOG_INFO(Frontend, "Surface changed"); } diff --git a/src/citra_qt/configuration/configure_graphics.cpp b/src/citra_qt/configuration/configure_graphics.cpp index 34a433586..6424e3af1 100644 --- a/src/citra_qt/configuration/configure_graphics.cpp +++ b/src/citra_qt/configuration/configure_graphics.cpp @@ -7,6 +7,7 @@ #include "citra_qt/configuration/configure_graphics.h" #include "common/settings.h" #include "ui_configure_graphics.h" +#include "video_core/renderer_vulkan/vk_instance.h" ConfigureGraphics::ConfigureGraphics(std::span physical_devices, bool is_powered_on, QWidget* parent) diff --git a/src/common/settings.cpp b/src/common/settings.cpp index bf9086e14..d0b9af658 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -31,6 +31,8 @@ std::string_view GetGraphicsAPIName(GraphicsAPI api) { return "Software"; case GraphicsAPI::OpenGL: return "OpenGL"; + case GraphicsAPI::Vulkan: + return "Vulkan"; default: return "Invalid"; } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 900d9b3d0..00b58d5d4 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -101,18 +101,47 @@ add_library(video_core STATIC renderer_software/sw_texturing.cpp renderer_software/sw_texturing.h renderer_vulkan/pica_to_vk.h + renderer_vulkan/renderer_vulkan.cpp + renderer_vulkan/renderer_vulkan.h + renderer_vulkan/vk_blit_helper.cpp + renderer_vulkan/vk_blit_helper.h renderer_vulkan/vk_common.cpp renderer_vulkan/vk_common.h + renderer_vulkan/vk_descriptor_pool.cpp + renderer_vulkan/vk_descriptor_pool.h + renderer_vulkan/vk_graphics_pipeline.cpp + renderer_vulkan/vk_graphics_pipeline.h + renderer_vulkan/vk_master_semaphore.cpp + renderer_vulkan/vk_master_semaphore.h + renderer_vulkan/vk_rasterizer.cpp + renderer_vulkan/vk_rasterizer.h + renderer_vulkan/vk_rasterizer_cache.cpp + renderer_vulkan/vk_scheduler.cpp + renderer_vulkan/vk_scheduler.h + renderer_vulkan/vk_resource_pool.cpp + renderer_vulkan/vk_resource_pool.h renderer_vulkan/vk_instance.cpp renderer_vulkan/vk_instance.h + renderer_vulkan/vk_pipeline_cache.cpp + renderer_vulkan/vk_pipeline_cache.h renderer_vulkan/vk_platform.cpp renderer_vulkan/vk_platform.h + renderer_vulkan/vk_present_window.cpp + renderer_vulkan/vk_present_window.h + renderer_vulkan/vk_renderpass_cache.cpp + renderer_vulkan/vk_renderpass_cache.h renderer_vulkan/vk_shader_gen.cpp renderer_vulkan/vk_shader_gen.h renderer_vulkan/vk_shader_gen_spv.cpp renderer_vulkan/vk_shader_gen_spv.h renderer_vulkan/vk_shader_util.cpp renderer_vulkan/vk_shader_util.h + renderer_vulkan/vk_stream_buffer.cpp + renderer_vulkan/vk_stream_buffer.h + renderer_vulkan/vk_swapchain.cpp + renderer_vulkan/vk_swapchain.h + renderer_vulkan/vk_texture_runtime.cpp + renderer_vulkan/vk_texture_runtime.h shader/debug_data.h shader/shader.cpp shader/shader.h diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index cebf54377..c981d08d7 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -708,8 +708,8 @@ FramebufferHelper RasterizerCache::GetFramebufferSurfaces(bool using_color fb_rect = depth_rect; } - const Surface* color_surface = color_id ? &slot_surfaces[color_id] : nullptr; - const Surface* depth_surface = depth_id ? &slot_surfaces[depth_id] : nullptr; + Surface* color_surface = color_id ? &slot_surfaces[color_id] : nullptr; + Surface* depth_surface = depth_id ? &slot_surfaces[depth_id] : nullptr; if (color_id) { color_level = color_surface->LevelOf(color_params.addr); @@ -722,7 +722,7 @@ FramebufferHelper RasterizerCache::GetFramebufferSurfaces(bool using_color boost::icl::length(depth_vp_interval)); } - fb_params = FramebufferParams{ + const FramebufferParams fb_params = { .color_id = color_id, .depth_id = depth_id, .color_level = color_level, diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 3a86d1eb4..5bb55b7bb 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -26,7 +26,7 @@ using DiskResourceLoadCallback = std::function depth_test_enable; BitField<4, 3, CompareFunc> depth_test_func; BitField<8, 1, u32> red_enable; diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 6e1f327e4..d6958ec1e 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -63,6 +63,9 @@ public: /// Synchronizes fixed function renderer state virtual void Sync() {} + /// This is called to notify the rendering backend of a surface change + virtual void NotifySurfaceChanged() {} + /// Returns the resolution scale factor relative to the native 3DS screen resolution u32 GetResolutionScaleFactor(); diff --git a/src/video_core/renderer_opengl/gl_texture_mailbox.cpp b/src/video_core/renderer_opengl/gl_texture_mailbox.cpp index 11a8b6eb9..d32a46455 100644 --- a/src/video_core/renderer_opengl/gl_texture_mailbox.cpp +++ b/src/video_core/renderer_opengl/gl_texture_mailbox.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/logging/log.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_texture_mailbox.h" diff --git a/src/video_core/renderer_vulkan/pica_to_vk.h b/src/video_core/renderer_vulkan/pica_to_vk.h index cc0941db0..96776ce70 100644 --- a/src/video_core/renderer_vulkan/pica_to_vk.h +++ b/src/video_core/renderer_vulkan/pica_to_vk.h @@ -6,6 +6,7 @@ #include "common/logging/log.h" #include "core/core.h" +#include "core/telemetry_session.h" #include "video_core/regs.h" #include "video_core/renderer_vulkan/vk_common.h" @@ -172,7 +173,10 @@ inline vk::PrimitiveTopology PrimitiveTopology(Pica::PipelineRegs::TriangleTopol return vk::PrimitiveTopology::eTriangleList; case Pica::PipelineRegs::TriangleTopology::Strip: return vk::PrimitiveTopology::eTriangleStrip; + default: + UNREACHABLE_MSG("Unknown triangle topology {}", topology); } + return vk::PrimitiveTopology::eTriangleList; } inline vk::CullModeFlags CullMode(Pica::RasterizerRegs::CullMode mode) { @@ -182,7 +186,10 @@ inline vk::CullModeFlags CullMode(Pica::RasterizerRegs::CullMode mode) { case Pica::RasterizerRegs::CullMode::KeepClockWise: case Pica::RasterizerRegs::CullMode::KeepCounterClockWise: return vk::CullModeFlagBits::eBack; + default: + UNREACHABLE_MSG("Unknown cull mode {}", mode); } + return vk::CullModeFlagBits::eNone; } inline vk::FrontFace FrontFace(Pica::RasterizerRegs::CullMode mode) { @@ -192,7 +199,10 @@ inline vk::FrontFace FrontFace(Pica::RasterizerRegs::CullMode mode) { return vk::FrontFace::eCounterClockwise; case Pica::RasterizerRegs::CullMode::KeepCounterClockWise: return vk::FrontFace::eClockwise; + default: + UNREACHABLE_MSG("Unknown cull mode {}", mode); } + return vk::FrontFace::eClockwise; } inline Common::Vec4f ColorRGBA8(const u32 color) { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp new file mode 100644 index 000000000..8ccc2a2e3 --- /dev/null +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -0,0 +1,1098 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "common/settings.h" +#include "core/core.h" +#include "core/frontend/emu_window.h" +#include "core/hw/gpu.h" +#include "core/hw/hw.h" +#include "core/hw/lcd.h" +#include "core/telemetry_session.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" + +#include "video_core/host_shaders/vulkan_present_anaglyph_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_interlaced_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_vert_spv.h" + +#include + +MICROPROFILE_DEFINE(Vulkan_RenderFrame, "Vulkan", "Render Frame", MP_RGB(128, 128, 64)); + +namespace Vulkan { + +/** + * Vertex structure that the drawn screen rectangles are composed of. + */ +struct ScreenRectVertex { + ScreenRectVertex() = default; + ScreenRectVertex(float x, float y, float u, float v) + : position{Common::MakeVec(x, y)}, tex_coord{Common::MakeVec(u, v)} {} + + Common::Vec2f position; + Common::Vec2f tex_coord; +}; + +constexpr u32 VERTEX_BUFFER_SIZE = sizeof(ScreenRectVertex) * 8192; + +constexpr std::array MakeOrthographicMatrix(u32 width, u32 height) { + // clang-format off + return { 2.f / width, 0.f, 0.f, -1.f, + 0.f, 2.f / height, 0.f, -1.f, + 0.f, 0.f, 1.f, 0.f, + 0.f, 0.f, 0.f, 1.f}; + // clang-format on +} + +namespace { + +std::string GetReadableVersion(u32 version) { + return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), + VK_VERSION_PATCH(version)); +} + +std::string GetDriverVersion(const Instance& instance) { + // Extracted from + // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314 + const u32 version = instance.GetDriverVersion(); + if (instance.GetDriverID() == vk::DriverId::eNvidiaProprietary) { + const u32 major = (version >> 22) & 0x3ff; + const u32 minor = (version >> 14) & 0x0ff; + const u32 secondary = (version >> 6) & 0x0ff; + const u32 tertiary = version & 0x003f; + return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary); + } + if (instance.GetDriverID() == vk::DriverId::eIntelProprietaryWindows) { + const u32 major = version >> 14; + const u32 minor = version & 0x3fff; + return fmt::format("{}.{}", major, minor); + } + return GetReadableVersion(version); +} + +constexpr std::array PRESENT_BINDINGS = {{ + {0, vk::DescriptorType::eCombinedImageSampler, 3, vk::ShaderStageFlagBits::eFragment}, +}}; + +} // Anonymous namespace + +RendererVulkan::RendererVulkan(Core::System& system, Frontend::EmuWindow& window, + Frontend::EmuWindow* secondary_window) + : RendererBase{system, window, secondary_window}, memory{system.Memory()}, + telemetry_session{system.TelemetrySession()}, + instance{window, Settings::values.physical_device.GetValue()}, scheduler{instance, + renderpass_cache}, + renderpass_cache{instance, scheduler}, pool{instance}, main_window{window, instance, + scheduler}, + vertex_buffer{instance, scheduler, vk::BufferUsageFlagBits::eVertexBuffer, + VERTEX_BUFFER_SIZE}, + rasterizer{memory, + system.CustomTexManager(), + *this, + render_window, + instance, + scheduler, + pool, + renderpass_cache, + main_window.ImageCount()}, + present_set_provider{instance, pool, PRESENT_BINDINGS} { + ReportDriver(); + CompileShaders(); + BuildLayouts(); + BuildPipelines(); + if (secondary_window) { + second_window = std::make_unique(*secondary_window, instance, scheduler); + } +} + +RendererVulkan::~RendererVulkan() { + vk::Device device = instance.GetDevice(); + scheduler.Finish(); + device.waitIdle(); + + device.destroyShaderModule(present_vertex_shader); + for (u32 i = 0; i < PRESENT_PIPELINES; i++) { + device.destroyPipeline(present_pipelines[i]); + device.destroyShaderModule(present_shaders[i]); + } + + for (auto& sampler : present_samplers) { + device.destroySampler(sampler); + } + + for (auto& info : screen_infos) { + device.destroyImageView(info.texture.image_view); + vmaDestroyImage(instance.GetAllocator(), info.texture.image, info.texture.allocation); + } +} + +void RendererVulkan::Sync() { + rasterizer.SyncEntireState(); +} + +void RendererVulkan::PrepareRendertarget() { + for (u32 i = 0; i < 3; i++) { + const u32 fb_id = i == 2 ? 1 : 0; + const auto& framebuffer = GPU::g_regs.framebuffer_config[fb_id]; + + // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 + u32 lcd_color_addr = + (fb_id == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom); + lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr; + LCD::Regs::ColorFill color_fill{0}; + LCD::Read(color_fill.raw, lcd_color_addr); + + if (color_fill.is_enabled) { + LoadColorToActiveVkTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, + screen_infos[i].texture); + } else { + TextureInfo& texture = screen_infos[i].texture; + if (texture.width != framebuffer.width || texture.height != framebuffer.height || + texture.format != framebuffer.color_format) { + + // Reallocate texture if the framebuffer size has changed. + // This is expected to not happen very often and hence should not be a + // performance problem. + ConfigureFramebufferTexture(texture, framebuffer); + } + + LoadFBToScreenInfo(framebuffer, screen_infos[i], i == 1); + + // Resize the texture in case the framebuffer size has changed + texture.width = framebuffer.width; + texture.height = framebuffer.height; + } + } +} + +void RendererVulkan::PrepareDraw(Frame* frame, const Layout::FramebufferLayout& layout) { + const auto sampler = present_samplers[!Settings::values.filter_mode.GetValue()]; + std::transform(screen_infos.begin(), screen_infos.end(), present_textures.begin(), + [&](auto& info) { + return DescriptorData{vk::DescriptorImageInfo{sampler, info.image_view, + vk::ImageLayout::eGeneral}}; + }); + + const auto descriptor_set = present_set_provider.Acquire(present_textures); + + renderpass_cache.EndRendering(); + scheduler.Record([this, layout, frame, descriptor_set, renderpass = main_window.Renderpass(), + index = current_pipeline](vk::CommandBuffer cmdbuf) { + const vk::Viewport viewport = { + .x = 0.0f, + .y = 0.0f, + .width = static_cast(layout.width), + .height = static_cast(layout.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + + const vk::Rect2D scissor = { + .offset = {0, 0}, + .extent = {layout.width, layout.height}, + }; + + cmdbuf.setViewport(0, viewport); + cmdbuf.setScissor(0, scissor); + + const vk::ClearValue clear{.color = clear_color}; + const vk::PipelineLayout layout{*present_pipeline_layout}; + const vk::RenderPassBeginInfo renderpass_begin_info = { + .renderPass = renderpass, + .framebuffer = frame->framebuffer, + .renderArea = + vk::Rect2D{ + .offset = {0, 0}, + .extent = {frame->width, frame->height}, + }, + .clearValueCount = 1, + .pClearValues = &clear, + }; + + cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, present_pipelines[index]); + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, descriptor_set, {}); + }); +} + +void RendererVulkan::RenderToWindow(PresentWindow& window, const Layout::FramebufferLayout& layout, + bool flipped) { + Frame* frame = window.GetRenderFrame(); + + if (layout.width != frame->width || layout.height != frame->height) { + window.WaitPresent(); + scheduler.Finish(); + window.RecreateFrame(frame, layout.width, layout.height); + } + + DrawScreens(frame, layout, flipped); + scheduler.Flush(frame->render_ready); + + window.Present(frame); +} + +void RendererVulkan::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, + ScreenInfo& screen_info, bool right_eye) { + + if (framebuffer.address_right1 == 0 || framebuffer.address_right2 == 0) { + right_eye = false; + } + + const PAddr framebuffer_addr = + framebuffer.active_fb == 0 + ? (right_eye ? framebuffer.address_right1 : framebuffer.address_left1) + : (right_eye ? framebuffer.address_right2 : framebuffer.address_left2); + + LOG_TRACE(Render_Vulkan, "0x{:08x} bytes from 0x{:08x}({}x{}), fmt {:x}", + framebuffer.stride * framebuffer.height, framebuffer_addr, framebuffer.width.Value(), + framebuffer.height.Value(), framebuffer.format); + + const int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); + const std::size_t pixel_stride = framebuffer.stride / bpp; + + ASSERT(pixel_stride * bpp == framebuffer.stride); + ASSERT(pixel_stride % 4 == 0); + + if (!rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, static_cast(pixel_stride), + screen_info)) { + // Reset the screen info's display texture to its own permanent texture + screen_info.image_view = screen_info.texture.image_view; + screen_info.texcoords = {0.f, 0.f, 1.f, 1.f}; + + ASSERT(false); + } +} + +void RendererVulkan::CompileShaders() { + vk::Device device = instance.GetDevice(); + present_vertex_shader = CompileSPV(VULKAN_PRESENT_VERT_SPV, device); + present_shaders[0] = CompileSPV(VULKAN_PRESENT_FRAG_SPV, device); + present_shaders[1] = CompileSPV(VULKAN_PRESENT_ANAGLYPH_FRAG_SPV, device); + present_shaders[2] = CompileSPV(VULKAN_PRESENT_INTERLACED_FRAG_SPV, device); + + auto properties = instance.GetPhysicalDevice().getProperties(); + for (std::size_t i = 0; i < present_samplers.size(); i++) { + const vk::Filter filter_mode = i == 0 ? vk::Filter::eLinear : vk::Filter::eNearest; + const vk::SamplerCreateInfo sampler_info = { + .magFilter = filter_mode, + .minFilter = filter_mode, + .mipmapMode = vk::SamplerMipmapMode::eLinear, + .addressModeU = vk::SamplerAddressMode::eClampToEdge, + .addressModeV = vk::SamplerAddressMode::eClampToEdge, + .anisotropyEnable = instance.IsAnisotropicFilteringSupported(), + .maxAnisotropy = properties.limits.maxSamplerAnisotropy, + .compareEnable = false, + .compareOp = vk::CompareOp::eAlways, + .borderColor = vk::BorderColor::eIntOpaqueBlack, + .unnormalizedCoordinates = false, + }; + + present_samplers[i] = device.createSampler(sampler_info); + } +} + +void RendererVulkan::BuildLayouts() { + const vk::PushConstantRange push_range = { + .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, + .offset = 0, + .size = sizeof(PresentUniformData), + }; + + const auto descriptor_set_layout = present_set_provider.Layout(); + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = 1, + .pSetLayouts = &descriptor_set_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_range, + }; + present_pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); +} + +void RendererVulkan::BuildPipelines() { + const vk::VertexInputBindingDescription binding = { + .binding = 0, + .stride = sizeof(ScreenRectVertex), + .inputRate = vk::VertexInputRate::eVertex, + }; + + const std::array attributes = { + vk::VertexInputAttributeDescription{ + .location = 0, + .binding = 0, + .format = vk::Format::eR32G32Sfloat, + .offset = offsetof(ScreenRectVertex, position), + }, + vk::VertexInputAttributeDescription{ + .location = 1, + .binding = 0, + .format = vk::Format::eR32G32Sfloat, + .offset = offsetof(ScreenRectVertex, tex_coord), + }, + }; + + const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = &binding, + .vertexAttributeDescriptionCount = static_cast(attributes.size()), + .pVertexAttributeDescriptions = attributes.data(), + }; + + const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { + .topology = vk::PrimitiveTopology::eTriangleStrip, + .primitiveRestartEnable = false, + }; + + const vk::PipelineRasterizationStateCreateInfo raster_state = { + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .cullMode = vk::CullModeFlagBits::eNone, + .frontFace = vk::FrontFace::eClockwise, + .depthBiasEnable = false, + .lineWidth = 1.0f, + }; + + const vk::PipelineMultisampleStateCreateInfo multisampling = { + .rasterizationSamples = vk::SampleCountFlagBits::e1, + .sampleShadingEnable = false, + }; + + const vk::PipelineColorBlendAttachmentState colorblend_attachment = { + .blendEnable = false, + .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | + vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA, + }; + + const vk::PipelineColorBlendStateCreateInfo color_blending = { + .logicOpEnable = false, + .attachmentCount = 1, + .pAttachments = &colorblend_attachment, + .blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f}, + }; + + const vk::Viewport placeholder_viewport = vk::Viewport{0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; + const vk::Rect2D placeholder_scissor = vk::Rect2D{{0, 0}, {1, 1}}; + const vk::PipelineViewportStateCreateInfo viewport_info = { + .viewportCount = 1, + .pViewports = &placeholder_viewport, + .scissorCount = 1, + .pScissors = &placeholder_scissor, + }; + + const std::array dynamic_states = { + vk::DynamicState::eViewport, + vk::DynamicState::eScissor, + }; + + const vk::PipelineDynamicStateCreateInfo dynamic_info = { + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + + const vk::PipelineDepthStencilStateCreateInfo depth_info = { + .depthTestEnable = false, + .depthWriteEnable = false, + .depthCompareOp = vk::CompareOp::eAlways, + .depthBoundsTestEnable = false, + .stencilTestEnable = false, + }; + + for (u32 i = 0; i < PRESENT_PIPELINES; i++) { + const std::array shader_stages = { + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eVertex, + .module = present_vertex_shader, + .pName = "main", + }, + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eFragment, + .module = present_shaders[i], + .pName = "main", + }, + }; + + const vk::GraphicsPipelineCreateInfo pipeline_info = { + .stageCount = static_cast(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_info, + .pRasterizationState = &raster_state, + .pMultisampleState = &multisampling, + .pDepthStencilState = &depth_info, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_info, + .layout = *present_pipeline_layout, + .renderPass = main_window.Renderpass(), + }; + + const auto [result, pipeline] = + instance.GetDevice().createGraphicsPipeline({}, pipeline_info); + ASSERT_MSG(result == vk::Result::eSuccess, "Unable to build present pipelines"); + present_pipelines[i] = pipeline; + } +} + +void RendererVulkan::ConfigureFramebufferTexture(TextureInfo& texture, + const GPU::Regs::FramebufferConfig& framebuffer) { + vk::Device device = instance.GetDevice(); + if (texture.image_view) { + device.destroyImageView(texture.image_view); + } + if (texture.image) { + vmaDestroyImage(instance.GetAllocator(), texture.image, texture.allocation); + } + + const VideoCore::PixelFormat pixel_format = + VideoCore::PixelFormatFromGPUPixelFormat(framebuffer.color_format); + const vk::Format format = instance.GetTraits(pixel_format).native; + const vk::ImageCreateInfo image_info = { + .imageType = vk::ImageType::e2D, + .format = format, + .extent = {framebuffer.width, framebuffer.height, 1}, + .mipLevels = 1, + .arrayLayers = 1, + .samples = vk::SampleCountFlagBits::e1, + .usage = vk::ImageUsageFlagBits::eSampled, + }; + + const VmaAllocationCreateInfo alloc_info = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + .requiredFlags = 0, + .preferredFlags = 0, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + }; + + VkImage unsafe_image{}; + VkImageCreateInfo unsafe_image_info = static_cast(image_info); + + VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info, + &unsafe_image, &texture.allocation, nullptr); + if (result != VK_SUCCESS) [[unlikely]] { + LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result); + UNREACHABLE(); + } + texture.image = vk::Image{unsafe_image}; + + const vk::ImageViewCreateInfo view_info = { + .image = texture.image, + .viewType = vk::ImageViewType::e2D, + .format = format, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + texture.image_view = device.createImageView(view_info); + + texture.width = framebuffer.width; + texture.height = framebuffer.height; + texture.format = framebuffer.color_format; +} + +void RendererVulkan::LoadColorToActiveVkTexture(u8 color_r, u8 color_g, u8 color_b, + const TextureInfo& texture) { + const vk::ClearColorValue clear_color = { + .float32 = + std::array{ + color_r / 255.0f, + color_g / 255.0f, + color_b / 255.0f, + 1.0f, + }, + }; + + renderpass_cache.EndRendering(); + scheduler.Record([image = texture.image, clear_color](vk::CommandBuffer cmdbuf) { + const vk::ImageSubresourceRange range = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }; + + const vk::ImageMemoryBarrier pre_barrier = { + .srcAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = range, + }; + + const vk::ImageMemoryBarrier post_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = range, + }; + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eFragmentShader, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); + + cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear_color, range); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eFragmentShader, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + }); +} + +void RendererVulkan::ReloadPipeline() { + const Settings::StereoRenderOption render_3d = Settings::values.render_3d.GetValue(); + switch (render_3d) { + case Settings::StereoRenderOption::Anaglyph: + current_pipeline = 1; + break; + case Settings::StereoRenderOption::Interlaced: + case Settings::StereoRenderOption::ReverseInterlaced: + current_pipeline = 2; + draw_info.reverse_interlaced = render_3d == Settings::StereoRenderOption::ReverseInterlaced; + break; + default: + current_pipeline = 0; + break; + } +} + +void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w, float h, + Layout::DisplayOrientation orientation) { + const ScreenInfo& screen_info = screen_infos[screen_id]; + const auto& texcoords = screen_info.texcoords; + + std::array vertices; + switch (orientation) { + case Layout::DisplayOrientation::Landscape: + vertices = {{ + ScreenRectVertex(x, y, texcoords.bottom, texcoords.left), + ScreenRectVertex(x + w, y, texcoords.bottom, texcoords.right), + ScreenRectVertex(x, y + h, texcoords.top, texcoords.left), + ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.right), + }}; + break; + case Layout::DisplayOrientation::Portrait: + vertices = {{ + ScreenRectVertex(x, y, texcoords.bottom, texcoords.right), + ScreenRectVertex(x + w, y, texcoords.top, texcoords.right), + ScreenRectVertex(x, y + h, texcoords.bottom, texcoords.left), + ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.left), + }}; + std::swap(h, w); + break; + case Layout::DisplayOrientation::LandscapeFlipped: + vertices = {{ + ScreenRectVertex(x, y, texcoords.top, texcoords.right), + ScreenRectVertex(x + w, y, texcoords.top, texcoords.left), + ScreenRectVertex(x, y + h, texcoords.bottom, texcoords.right), + ScreenRectVertex(x + w, y + h, texcoords.bottom, texcoords.left), + }}; + break; + case Layout::DisplayOrientation::PortraitFlipped: + vertices = {{ + ScreenRectVertex(x, y, texcoords.top, texcoords.left), + ScreenRectVertex(x + w, y, texcoords.bottom, texcoords.left), + ScreenRectVertex(x, y + h, texcoords.top, texcoords.right), + ScreenRectVertex(x + w, y + h, texcoords.bottom, texcoords.right), + }}; + std::swap(h, w); + break; + default: + LOG_ERROR(Render_Vulkan, "Unknown DisplayOrientation: {}", orientation); + break; + } + + const u64 size = sizeof(ScreenRectVertex) * vertices.size(); + auto [data, offset, invalidate] = vertex_buffer.Map(size, 16); + std::memcpy(data, vertices.data(), size); + vertex_buffer.Commit(size); + + const u32 scale_factor = GetResolutionScaleFactor(); + draw_info.i_resolution = + Common::MakeVec(static_cast(screen_info.texture.width * scale_factor), + static_cast(screen_info.texture.height * scale_factor), + 1.0f / static_cast(screen_info.texture.width * scale_factor), + 1.0f / static_cast(screen_info.texture.height * scale_factor)); + draw_info.o_resolution = Common::MakeVec(h, w, 1.0f / h, 1.0f / w); + draw_info.screen_id_l = screen_id; + + scheduler.Record([this, offset = offset, info = draw_info](vk::CommandBuffer cmdbuf) { + const u32 first_vertex = static_cast(offset) / sizeof(ScreenRectVertex); + cmdbuf.pushConstants(*present_pipeline_layout, + vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex, + 0, sizeof(info), &info); + + cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0}); + cmdbuf.draw(4, 1, first_vertex, 0); + }); +} + +void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, float x, float y, + float w, float h, + Layout::DisplayOrientation orientation) { + const ScreenInfo& screen_info_l = screen_infos[screen_id_l]; + const auto& texcoords = screen_info_l.texcoords; + + std::array vertices; + switch (orientation) { + case Layout::DisplayOrientation::Landscape: + vertices = {{ + ScreenRectVertex(x, y, texcoords.bottom, texcoords.left), + ScreenRectVertex(x + w, y, texcoords.bottom, texcoords.right), + ScreenRectVertex(x, y + h, texcoords.top, texcoords.left), + ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.right), + }}; + break; + case Layout::DisplayOrientation::Portrait: + vertices = {{ + ScreenRectVertex(x, y, texcoords.bottom, texcoords.right), + ScreenRectVertex(x + w, y, texcoords.top, texcoords.right), + ScreenRectVertex(x, y + h, texcoords.bottom, texcoords.left), + ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.left), + }}; + std::swap(h, w); + break; + case Layout::DisplayOrientation::LandscapeFlipped: + vertices = {{ + ScreenRectVertex(x, y, texcoords.top, texcoords.right), + ScreenRectVertex(x + w, y, texcoords.top, texcoords.left), + ScreenRectVertex(x, y + h, texcoords.bottom, texcoords.right), + ScreenRectVertex(x + w, y + h, texcoords.bottom, texcoords.left), + }}; + break; + case Layout::DisplayOrientation::PortraitFlipped: + vertices = {{ + ScreenRectVertex(x, y, texcoords.top, texcoords.left), + ScreenRectVertex(x + w, y, texcoords.bottom, texcoords.left), + ScreenRectVertex(x, y + h, texcoords.top, texcoords.right), + ScreenRectVertex(x + w, y + h, texcoords.bottom, texcoords.right), + }}; + std::swap(h, w); + break; + default: + LOG_ERROR(Render_Vulkan, "Unknown DisplayOrientation: {}", orientation); + break; + } + + const u64 size = sizeof(ScreenRectVertex) * vertices.size(); + auto [data, offset, invalidate] = vertex_buffer.Map(size, 16); + std::memcpy(data, vertices.data(), size); + vertex_buffer.Commit(size); + + const u32 scale_factor = GetResolutionScaleFactor(); + draw_info.i_resolution = + Common::MakeVec(static_cast(screen_info_l.texture.width * scale_factor), + static_cast(screen_info_l.texture.height * scale_factor), + 1.0f / static_cast(screen_info_l.texture.width * scale_factor), + 1.0f / static_cast(screen_info_l.texture.height * scale_factor)); + draw_info.o_resolution = Common::MakeVec(h, w, 1.0f / h, 1.0f / w); + draw_info.screen_id_l = screen_id_l; + draw_info.screen_id_r = screen_id_r; + + scheduler.Record([this, offset = offset, info = draw_info](vk::CommandBuffer cmdbuf) { + const u32 first_vertex = static_cast(offset) / sizeof(ScreenRectVertex); + cmdbuf.pushConstants(*present_pipeline_layout, + vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex, + 0, sizeof(info), &info); + + cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0}); + cmdbuf.draw(4, 1, first_vertex, 0); + }); +} + +void RendererVulkan::DrawTopScreen(const Layout::FramebufferLayout& layout, + const Common::Rectangle& top_screen) { + if (!layout.top_screen_enabled) { + return; + } + + const float top_screen_left = static_cast(top_screen.left); + const float top_screen_top = static_cast(top_screen.top); + const float top_screen_width = static_cast(top_screen.GetWidth()); + const float top_screen_height = static_cast(top_screen.GetHeight()); + + const auto orientation = layout.is_rotated ? Layout::DisplayOrientation::Landscape + : Layout::DisplayOrientation::Portrait; + switch (Settings::values.render_3d.GetValue()) { + case Settings::StereoRenderOption::Off: { + const int eye = static_cast(Settings::values.mono_render_option.GetValue()); + DrawSingleScreen(eye, top_screen_left, top_screen_top, top_screen_width, top_screen_height, + orientation); + break; + } + case Settings::StereoRenderOption::SideBySide: { + DrawSingleScreen(0, top_screen_left / 2, top_screen_top, top_screen_width / 2, + top_screen_height, orientation); + draw_info.layer = 1; + DrawSingleScreen(1, static_cast((top_screen_left / 2) + (layout.width / 2)), + top_screen_top, top_screen_width / 2, top_screen_height, orientation); + break; + } + case Settings::StereoRenderOption::CardboardVR: { + DrawSingleScreen(0, top_screen_left, top_screen_top, top_screen_width, top_screen_height, + orientation); + draw_info.layer = 1; + DrawSingleScreen( + 1, static_cast(layout.cardboard.top_screen_right_eye + (layout.width / 2)), + top_screen_top, top_screen_width, top_screen_height, orientation); + break; + } + case Settings::StereoRenderOption::Anaglyph: + case Settings::StereoRenderOption::Interlaced: + case Settings::StereoRenderOption::ReverseInterlaced: { + DrawSingleScreenStereo(0, 1, top_screen_left, top_screen_top, top_screen_width, + top_screen_height, orientation); + break; + } + } +} + +void RendererVulkan::DrawBottomScreen(const Layout::FramebufferLayout& layout, + const Common::Rectangle& bottom_screen) { + if (!layout.bottom_screen_enabled) { + return; + } + + const float bottom_screen_left = static_cast(bottom_screen.left); + const float bottom_screen_top = static_cast(bottom_screen.top); + const float bottom_screen_width = static_cast(bottom_screen.GetWidth()); + const float bottom_screen_height = static_cast(bottom_screen.GetHeight()); + + const auto orientation = layout.is_rotated ? Layout::DisplayOrientation::Landscape + : Layout::DisplayOrientation::Portrait; + + switch (Settings::values.render_3d.GetValue()) { + case Settings::StereoRenderOption::Off: { + DrawSingleScreen(2, bottom_screen_left, bottom_screen_top, bottom_screen_width, + bottom_screen_height, orientation); + break; + } + case Settings::StereoRenderOption::SideBySide: { + DrawSingleScreen(2, bottom_screen_left / 2, bottom_screen_top, bottom_screen_width / 2, + bottom_screen_height, orientation); + draw_info.layer = 1; + DrawSingleScreen(2, static_cast((bottom_screen_left / 2) + (layout.width / 2)), + bottom_screen_top, bottom_screen_width / 2, bottom_screen_height, + orientation); + break; + } + case Settings::StereoRenderOption::CardboardVR: { + DrawSingleScreen(2, bottom_screen_left, bottom_screen_top, bottom_screen_width, + bottom_screen_height, orientation); + draw_info.layer = 1; + DrawSingleScreen( + 2, static_cast(layout.cardboard.bottom_screen_right_eye + (layout.width / 2)), + bottom_screen_top, bottom_screen_width, bottom_screen_height, orientation); + break; + } + case Settings::StereoRenderOption::Anaglyph: + case Settings::StereoRenderOption::Interlaced: + case Settings::StereoRenderOption::ReverseInterlaced: { + DrawSingleScreenStereo(2, 2, bottom_screen_left, bottom_screen_top, bottom_screen_width, + bottom_screen_height, orientation); + break; + } + } +} + +void RendererVulkan::DrawScreens(Frame* frame, const Layout::FramebufferLayout& layout, + bool flipped) { + if (settings.bg_color_update_requested.exchange(false)) { + clear_color.float32[0] = Settings::values.bg_red.GetValue(); + clear_color.float32[1] = Settings::values.bg_green.GetValue(); + clear_color.float32[2] = Settings::values.bg_blue.GetValue(); + } + if (settings.shader_update_requested.exchange(false)) { + ReloadPipeline(); + } + + PrepareDraw(frame, layout); + + const auto& top_screen = layout.top_screen; + const auto& bottom_screen = layout.bottom_screen; + draw_info.modelview = MakeOrthographicMatrix(layout.width, layout.height); + + draw_info.layer = 0; + if (!Settings::values.swap_screen.GetValue()) { + DrawTopScreen(layout, top_screen); + draw_info.layer = 0; + DrawBottomScreen(layout, bottom_screen); + } else { + DrawBottomScreen(layout, bottom_screen); + draw_info.layer = 0; + DrawTopScreen(layout, top_screen); + } + + scheduler.Record([image = frame->image](vk::CommandBuffer cmdbuf) { + const vk::ImageMemoryBarrier render_barrier = { + .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + + cmdbuf.endRenderPass(); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, render_barrier); + }); +} + +void RendererVulkan::SwapBuffers() { + const Layout::FramebufferLayout& layout = render_window.GetFramebufferLayout(); + PrepareRendertarget(); + RenderScreenshot(); + RenderToWindow(main_window, layout, false); +#ifndef ANDROID + if (Settings::values.layout_option.GetValue() == Settings::LayoutOption::SeparateWindows) { + ASSERT(secondary_window); + const auto& secondary_layout = secondary_window->GetFramebufferLayout(); + if (!second_window) { + second_window = std::make_unique(*secondary_window, instance, scheduler); + } + RenderToWindow(*second_window, secondary_layout, false); + secondary_window->PollEvents(); + } +#endif + rasterizer.TickFrame(); + EndFrame(); +} + +void RendererVulkan::RenderScreenshot() { + if (!settings.screenshot_requested.exchange(false)) { + return; + } + + const Layout::FramebufferLayout layout{settings.screenshot_framebuffer_layout}; + const u32 width = layout.width; + const u32 height = layout.height; + + const vk::ImageCreateInfo staging_image_info = { + .imageType = vk::ImageType::e2D, + .format = vk::Format::eB8G8R8A8Unorm, + .extent{ + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = vk::SampleCountFlagBits::e1, + .tiling = vk::ImageTiling::eLinear, + .usage = vk::ImageUsageFlagBits::eTransferDst, + .initialLayout = vk::ImageLayout::eUndefined, + }; + + const VmaAllocationCreateInfo alloc_create_info = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT, + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST, + .requiredFlags = 0, + .preferredFlags = 0, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + }; + + VkImage unsafe_image{}; + VmaAllocation allocation{}; + VmaAllocationInfo alloc_info; + VkImageCreateInfo unsafe_image_info = static_cast(staging_image_info); + + VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, + &alloc_create_info, &unsafe_image, &allocation, &alloc_info); + if (result != VK_SUCCESS) [[unlikely]] { + LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result); + UNREACHABLE(); + } + vk::Image staging_image{unsafe_image}; + + Frame frame{}; + main_window.RecreateFrame(&frame, width, height); + + DrawScreens(&frame, layout, false); + + scheduler.Record( + [width, height, source_image = frame.image, staging_image](vk::CommandBuffer cmdbuf) { + const std::array read_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = source_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = staging_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + const std::array write_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferRead, + .dstAccessMask = vk::AccessFlagBits::eMemoryWrite, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = source_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = staging_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + static constexpr vk::MemoryBarrier memory_write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + + const std::array offsets = { + vk::Offset3D{0, 0, 0}, + vk::Offset3D{static_cast(width), static_cast(height), 1}, + }; + + const vk::ImageBlit blit_area = { + .srcSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .srcOffsets = offsets, + .dstSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .dstOffsets = offsets, + }; + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barriers); + cmdbuf.blitImage(source_image, vk::ImageLayout::eTransferSrcOptimal, staging_image, + vk::ImageLayout::eTransferDstOptimal, blit_area, vk::Filter::eNearest); + cmdbuf.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, memory_write_barrier, {}, write_barriers); + }); + + // Ensure the copy is fully completed before saving the screenshot + scheduler.Finish(); + + const vk::Device device = instance.GetDevice(); + + // Get layout of the image (including row pitch) + const vk::ImageSubresource subresource = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .arrayLayer = 0, + }; + + const vk::SubresourceLayout subresource_layout = + device.getImageSubresourceLayout(staging_image, subresource); + + // Copy backing image data to the QImage screenshot buffer + const u8* data = reinterpret_cast(alloc_info.pMappedData); + std::memcpy(settings.screenshot_bits, data + subresource_layout.offset, + subresource_layout.size); + + // Destroy allocated resources + vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation); + device.destroyFramebuffer(frame.framebuffer); + device.destroyImageView(frame.image_view); + + settings.screenshot_complete_callback(); +} + +void RendererVulkan::ReportDriver() const { + const std::string vendor_name{instance.GetVendorName()}; + const std::string model_name{instance.GetModelName()}; + const std::string driver_version = GetDriverVersion(instance); + const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version); + + const std::string api_version = GetReadableVersion(instance.ApiVersion()); + + const std::string extensions = + fmt::format("{}", fmt::join(instance.GetAvailableExtensions(), ", ")); + + LOG_INFO(Render_Vulkan, "VK_DRIVER: {}", driver_name); + LOG_INFO(Render_Vulkan, "VK_DEVICE: {}", model_name); + LOG_INFO(Render_Vulkan, "VK_VERSION: {}", api_version); + + static constexpr auto field = Common::Telemetry::FieldType::UserSystem; + telemetry_session.AddField(field, "GPU_Vendor", vendor_name); + telemetry_session.AddField(field, "GPU_Model", model_name); + telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name); + telemetry_session.AddField(field, "GPU_Vulkan_Version", api_version); + telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h new file mode 100644 index 000000000..165e11e4c --- /dev/null +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -0,0 +1,139 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "common/common_types.h" +#include "common/math_util.h" +#include "core/hw/gpu.h" +#include "video_core/renderer_base.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_present_window.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" + +namespace Core { +class System; +class TelemetrySession; +} // namespace Core + +namespace Memory { +class MemorySystem; +} + +namespace Layout { +struct FramebufferLayout; +} + +namespace Vulkan { + +struct TextureInfo { + u32 width; + u32 height; + GPU::Regs::PixelFormat format; + vk::Image image; + vk::ImageView image_view; + VmaAllocation allocation; +}; + +struct ScreenInfo { + TextureInfo texture; + Common::Rectangle texcoords; + vk::ImageView image_view; +}; + +struct PresentUniformData { + std::array modelview; + Common::Vec4f i_resolution; + Common::Vec4f o_resolution; + int screen_id_l = 0; + int screen_id_r = 0; + int layer = 0; + int reverse_interlaced = 0; +}; +static_assert(sizeof(PresentUniformData) == 112, + "PresentUniformData does not structure in shader!"); + +class RendererVulkan : public VideoCore::RendererBase { + static constexpr std::size_t PRESENT_PIPELINES = 3; + +public: + explicit RendererVulkan(Core::System& system, Frontend::EmuWindow& window, + Frontend::EmuWindow* secondary_window); + ~RendererVulkan() override; + + [[nodiscard]] VideoCore::RasterizerInterface* Rasterizer() override { + return &rasterizer; + } + + void NotifySurfaceChanged() override { + main_window.NotifySurfaceChanged(); + } + + void SwapBuffers() override; + void TryPresent(int timeout_ms, bool is_secondary) override {} + void Sync() override; + +private: + void ReportDriver() const; + void ReloadPipeline(); + void CompileShaders(); + void BuildLayouts(); + void BuildPipelines(); + void ConfigureFramebufferTexture(TextureInfo& texture, + const GPU::Regs::FramebufferConfig& framebuffer); + void ConfigureRenderPipeline(); + void PrepareRendertarget(); + void RenderScreenshot(); + void PrepareDraw(Frame* frame, const Layout::FramebufferLayout& layout); + void RenderToWindow(PresentWindow& window, const Layout::FramebufferLayout& layout, + bool flipped); + + void DrawScreens(Frame* frame, const Layout::FramebufferLayout& layout, bool flipped); + void DrawBottomScreen(const Layout::FramebufferLayout& layout, + const Common::Rectangle& bottom_screen); + void DrawTopScreen(const Layout::FramebufferLayout& layout, + const Common::Rectangle& top_screen); + void DrawSingleScreen(u32 screen_id, float x, float y, float w, float h, + Layout::DisplayOrientation orientation); + void DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, float x, float y, float w, + float h, Layout::DisplayOrientation orientation); + void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, + ScreenInfo& screen_info, bool right_eye); + void LoadColorToActiveVkTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture); + +private: + Memory::MemorySystem& memory; + Core::TelemetrySession& telemetry_session; + + Instance instance; + Scheduler scheduler; + RenderpassCache renderpass_cache; + DescriptorPool pool; + PresentWindow main_window; + StreamBuffer vertex_buffer; + RasterizerVulkan rasterizer; + std::unique_ptr second_window; + + vk::UniquePipelineLayout present_pipeline_layout; + DescriptorSetProvider present_set_provider; + std::array present_pipelines; + std::array present_shaders; + std::array present_samplers; + vk::ShaderModule present_vertex_shader; + u32 current_pipeline = 0; + + std::array screen_infos{}; + std::array present_textures{}; + PresentUniformData draw_info{}; + vk::ClearColorValue clear_color{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.cpp b/src/video_core/renderer_vulkan/vk_blit_helper.cpp new file mode 100644 index 000000000..a9e428703 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_blit_helper.cpp @@ -0,0 +1,548 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/vector_math.h" +#include "video_core/renderer_vulkan/vk_blit_helper.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" + +#include "video_core/host_shaders/format_reinterpreter/vulkan_d24s8_to_rgba8_comp_spv.h" +#include "video_core/host_shaders/full_screen_triangle_vert_spv.h" +#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" +#include "video_core/host_shaders/vulkan_depth_to_buffer_comp_spv.h" + +namespace Vulkan { + +using VideoCore::PixelFormat; + +namespace { +struct PushConstants { + std::array tex_scale; + std::array tex_offset; +}; + +struct ComputeInfo { + Common::Vec2i src_offset; + Common::Vec2i src_extent; +}; + +inline constexpr vk::PushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ + .stageFlags = vk::ShaderStageFlagBits::eCompute, + .offset = 0, + .size = 2 * sizeof(Common::Vec2i), +}; + +constexpr std::array COMPUTE_BINDINGS = {{ + {0, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eCompute}, + {1, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eCompute}, + {2, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eCompute}, +}}; + +constexpr std::array COMPUTE_BUFFER_BINDINGS = {{ + {0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eCompute}, + {1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eCompute}, + {2, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute}, +}}; + +constexpr std::array TWO_TEXTURES_BINDINGS = {{ + {0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, + {1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, +}}; + +inline constexpr vk::PushConstantRange PUSH_CONSTANT_RANGE{ + .stageFlags = vk::ShaderStageFlagBits::eVertex, + .offset = 0, + .size = sizeof(PushConstants), +}; +constexpr vk::PipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{ + .vertexBindingDescriptionCount = 0, + .pVertexBindingDescriptions = nullptr, + .vertexAttributeDescriptionCount = 0, + .pVertexAttributeDescriptions = nullptr, +}; +constexpr vk::PipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{ + .topology = vk::PrimitiveTopology::eTriangleList, + .primitiveRestartEnable = VK_FALSE, +}; +constexpr vk::PipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{ + .viewportCount = 1, + .pViewports = nullptr, + .scissorCount = 1, + .pScissors = nullptr, +}; +constexpr vk::PipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{ + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = vk::PolygonMode::eFill, + .cullMode = vk::CullModeFlagBits::eBack, + .frontFace = vk::FrontFace::eClockwise, + .depthBiasEnable = VK_FALSE, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, +}; +constexpr vk::PipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{ + .rasterizationSamples = vk::SampleCountFlagBits::e1, + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, +}; +constexpr std::array DYNAMIC_STATES{ + vk::DynamicState::eViewport, + vk::DynamicState::eScissor, +}; +constexpr vk::PipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{ + .dynamicStateCount = static_cast(DYNAMIC_STATES.size()), + .pDynamicStates = DYNAMIC_STATES.data(), +}; +constexpr vk::PipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{ + .logicOpEnable = VK_FALSE, + .logicOp = vk::LogicOp::eClear, + .attachmentCount = 0, + .pAttachments = nullptr, + .blendConstants = std::array{0.0f, 0.0f, 0.0f, 0.0f}, +}; +constexpr vk::PipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{ + .depthTestEnable = VK_TRUE, + .depthWriteEnable = VK_TRUE, + .depthCompareOp = vk::CompareOp::eAlways, + .depthBoundsTestEnable = VK_FALSE, + .stencilTestEnable = VK_FALSE, + .front = vk::StencilOpState{}, + .back = vk::StencilOpState{}, + .minDepthBounds = 0.0f, + .maxDepthBounds = 0.0f, +}; + +template +inline constexpr vk::SamplerCreateInfo SAMPLER_CREATE_INFO{ + .magFilter = filter, + .minFilter = filter, + .mipmapMode = vk::SamplerMipmapMode::eNearest, + .addressModeU = vk::SamplerAddressMode::eClampToBorder, + .addressModeV = vk::SamplerAddressMode::eClampToBorder, + .addressModeW = vk::SamplerAddressMode::eClampToBorder, + .mipLodBias = 0.0f, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 0.0f, + .compareEnable = VK_FALSE, + .compareOp = vk::CompareOp::eNever, + .minLod = 0.0f, + .maxLod = 0.0f, + .borderColor = vk::BorderColor::eFloatOpaqueWhite, + .unnormalizedCoordinates = VK_FALSE, +}; + +constexpr vk::PipelineLayoutCreateInfo PipelineLayoutCreateInfo( + const vk::DescriptorSetLayout* set_layout, bool compute = false) { + return vk::PipelineLayoutCreateInfo{ + .setLayoutCount = 1, + .pSetLayouts = set_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = (compute ? &COMPUTE_PUSH_CONSTANT_RANGE : &PUSH_CONSTANT_RANGE), + }; +} + +constexpr std::array MakeStages( + vk::ShaderModule vertex_shader, vk::ShaderModule fragment_shader) { + return std::array{ + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eVertex, + .module = vertex_shader, + .pName = "main", + }, + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eFragment, + .module = fragment_shader, + .pName = "main", + }, + }; +} + +constexpr vk::PipelineShaderStageCreateInfo MakeStages(vk::ShaderModule compute_shader) { + return vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eCompute, + .module = compute_shader, + .pName = "main", + }; +} + +} // Anonymous namespace + +BlitHelper::BlitHelper(const Instance& instance_, Scheduler& scheduler_, DescriptorPool& pool, + RenderpassCache& renderpass_cache_) + : instance{instance_}, scheduler{scheduler_}, renderpass_cache{renderpass_cache_}, + device{instance.GetDevice()}, compute_provider{instance, pool, COMPUTE_BINDINGS}, + compute_buffer_provider{instance, pool, COMPUTE_BUFFER_BINDINGS}, + two_textures_provider{instance, pool, TWO_TEXTURES_BINDINGS}, + compute_pipeline_layout{ + device.createPipelineLayout(PipelineLayoutCreateInfo(&compute_provider.Layout(), true))}, + compute_buffer_pipeline_layout{device.createPipelineLayout( + PipelineLayoutCreateInfo(&compute_buffer_provider.Layout(), true))}, + two_textures_pipeline_layout{ + device.createPipelineLayout(PipelineLayoutCreateInfo(&two_textures_provider.Layout()))}, + full_screen_vert{CompileSPV(FULL_SCREEN_TRIANGLE_VERT_SPV, device)}, + d24s8_to_rgba8_comp{CompileSPV(VULKAN_D24S8_TO_RGBA8_COMP_SPV, device)}, + depth_to_buffer_comp{CompileSPV(VULKAN_DEPTH_TO_BUFFER_COMP_SPV, device)}, + blit_depth_stencil_frag{CompileSPV(VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV, device)}, + d24s8_to_rgba8_pipeline{MakeComputePipeline(d24s8_to_rgba8_comp, compute_pipeline_layout)}, + depth_to_buffer_pipeline{ + MakeComputePipeline(depth_to_buffer_comp, compute_buffer_pipeline_layout)}, + depth_blit_pipeline{MakeDepthStencilBlitPipeline()}, + linear_sampler{device.createSampler(SAMPLER_CREATE_INFO)}, + nearest_sampler{device.createSampler(SAMPLER_CREATE_INFO)} {} + +BlitHelper::~BlitHelper() { + device.destroyPipelineLayout(compute_pipeline_layout); + device.destroyPipelineLayout(compute_buffer_pipeline_layout); + device.destroyPipelineLayout(two_textures_pipeline_layout); + device.destroyShaderModule(full_screen_vert); + device.destroyShaderModule(d24s8_to_rgba8_comp); + device.destroyShaderModule(depth_to_buffer_comp); + device.destroyShaderModule(blit_depth_stencil_frag); + device.destroyPipeline(depth_to_buffer_pipeline); + device.destroyPipeline(d24s8_to_rgba8_pipeline); + device.destroyPipeline(depth_blit_pipeline); + device.destroySampler(linear_sampler); + device.destroySampler(nearest_sampler); +} + +void BindBlitState(vk::CommandBuffer cmdbuf, vk::PipelineLayout layout, + const VideoCore::TextureBlit& blit) { + const vk::Offset2D offset{ + .x = std::min(blit.dst_rect.left, blit.dst_rect.right), + .y = std::min(blit.dst_rect.bottom, blit.dst_rect.top), + }; + const vk::Extent2D extent{ + .width = blit.dst_rect.GetWidth(), + .height = blit.dst_rect.GetHeight(), + }; + const vk::Viewport viewport{ + .x = static_cast(offset.x), + .y = static_cast(offset.y), + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + const vk::Rect2D scissor{ + .offset = offset, + .extent = extent, + }; + const float scale_x = static_cast(blit.src_rect.GetWidth()); + const float scale_y = static_cast(blit.src_rect.GetHeight()); + const PushConstants push_constants{ + .tex_scale = {scale_x, scale_y}, + .tex_offset = {static_cast(blit.src_rect.left), + static_cast(blit.src_rect.bottom)}, + }; + cmdbuf.setViewport(0, viewport); + cmdbuf.setScissor(0, scissor); + cmdbuf.pushConstants(layout, vk::ShaderStageFlagBits::eVertex, 0, sizeof(push_constants), + &push_constants); +} + +bool BlitHelper::BlitDepthStencil(Surface& source, Surface& dest, + const VideoCore::TextureBlit& blit) { + if (!instance.IsShaderStencilExportSupported()) { + LOG_ERROR(Render_Vulkan, "Unable to emulate depth stencil images"); + return false; + } + + const vk::Rect2D dst_render_area = { + .offset = {0, 0}, + .extent = {dest.GetScaledWidth(), dest.GetScaledHeight()}, + }; + + std::array textures{}; + textures[0].image_info = vk::DescriptorImageInfo{ + .sampler = nearest_sampler, + .imageView = source.DepthView(), + .imageLayout = vk::ImageLayout::eGeneral, + }; + textures[1].image_info = vk::DescriptorImageInfo{ + .sampler = nearest_sampler, + .imageView = source.StencilView(), + .imageLayout = vk::ImageLayout::eGeneral, + }; + + const auto descriptor_set = two_textures_provider.Acquire(textures); + + const RenderPass depth_pass = { + .framebuffer = dest.Framebuffer(), + .render_pass = + renderpass_cache.GetRenderpass(PixelFormat::Invalid, dest.pixel_format, false), + .render_area = dst_render_area, + }; + renderpass_cache.BeginRendering(depth_pass); + + scheduler.Record([blit, descriptor_set, this](vk::CommandBuffer cmdbuf) { + const vk::PipelineLayout layout = two_textures_pipeline_layout; + + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, depth_blit_pipeline); + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, descriptor_set, {}); + BindBlitState(cmdbuf, layout, blit); + cmdbuf.draw(3, 1, 0, 0); + }); + scheduler.MakeDirty(StateFlags::Pipeline); + return true; +} + +bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, + const VideoCore::TextureBlit& blit) { + std::array textures{}; + textures[0].image_info = vk::DescriptorImageInfo{ + .imageView = source.DepthView(), + .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + }; + textures[1].image_info = vk::DescriptorImageInfo{ + .imageView = source.StencilView(), + .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + }; + textures[2].image_info = vk::DescriptorImageInfo{ + .imageView = dest.ImageView(), + .imageLayout = vk::ImageLayout::eGeneral, + }; + + const auto descriptor_set = compute_provider.Acquire(textures); + + renderpass_cache.EndRendering(); + scheduler.Record([this, descriptor_set, blit, src_image = source.Image(), + dst_image = dest.Image()](vk::CommandBuffer cmdbuf) { + const std::array pre_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = + vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eShaderWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + const std::array post_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderRead, + .dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite | + vk::AccessFlagBits::eDepthStencilAttachmentRead, + .oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = + vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }}; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests, + vk::PipelineStageFlagBits::eComputeShader, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); + + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, + descriptor_set, {}); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, d24s8_to_rgba8_pipeline); + + const auto src_offset = Common::MakeVec(blit.src_rect.left, blit.src_rect.bottom); + cmdbuf.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, + sizeof(Common::Vec2i), src_offset.AsArray()); + + cmdbuf.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests | + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); + }); + return true; +} + +bool BlitHelper::DepthToBuffer(Surface& source, vk::Buffer buffer, + const VideoCore::BufferTextureCopy& copy) { + std::array textures{}; + textures[0].image_info = vk::DescriptorImageInfo{ + .sampler = nearest_sampler, + .imageView = source.DepthView(), + .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + }; + textures[1].image_info = vk::DescriptorImageInfo{ + .sampler = nearest_sampler, + .imageView = source.StencilView(), + .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + }; + textures[2].buffer_info = vk::DescriptorBufferInfo{ + .buffer = buffer, + .offset = copy.buffer_offset, + .range = copy.buffer_size, + }; + + const auto descriptor_set = compute_buffer_provider.Acquire(textures); + + renderpass_cache.EndRendering(); + scheduler.Record([this, descriptor_set, copy, src_image = source.Image(), + extent = source.RealExtent(false)](vk::CommandBuffer cmdbuf) { + const vk::ImageMemoryBarrier pre_barrier = { + .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + const vk::ImageMemoryBarrier post_barrier = { + .srcAccessMask = vk::AccessFlagBits::eShaderRead, + .dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite | + vk::AccessFlagBits::eDepthStencilAttachmentRead, + .oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests, + vk::PipelineStageFlagBits::eComputeShader, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); + + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_buffer_pipeline_layout, + 0, descriptor_set, {}); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, depth_to_buffer_pipeline); + + const ComputeInfo info = { + .src_offset = Common::Vec2i{static_cast(copy.texture_rect.left), + static_cast(copy.texture_rect.bottom)}, + .src_extent = + Common::Vec2i{static_cast(extent.width), static_cast(extent.height)}, + }; + cmdbuf.pushConstants(compute_buffer_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, + sizeof(ComputeInfo), &info); + + cmdbuf.dispatch(copy.texture_rect.GetWidth() / 8, copy.texture_rect.GetHeight() / 8, 1); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests | + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + }); + return true; +} + +vk::Pipeline BlitHelper::MakeComputePipeline(vk::ShaderModule shader, vk::PipelineLayout layout) { + const vk::ComputePipelineCreateInfo compute_info = { + .stage = MakeStages(shader), + .layout = layout, + }; + + if (const auto result = device.createComputePipeline({}, compute_info); + result.result == vk::Result::eSuccess) { + return result.value; + } else { + LOG_CRITICAL(Render_Vulkan, "Compute pipeline creation failed!"); + UNREACHABLE(); + } +} + +vk::Pipeline BlitHelper::MakeDepthStencilBlitPipeline() { + if (!instance.IsShaderStencilExportSupported()) { + return VK_NULL_HANDLE; + } + + const std::array stages = MakeStages(full_screen_vert, blit_depth_stencil_frag); + const auto renderpass = renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid, + VideoCore::PixelFormat::D24S8, false); + vk::GraphicsPipelineCreateInfo depth_stencil_info = { + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = two_textures_pipeline_layout, + .renderPass = renderpass, + }; + + if (const auto result = device.createGraphicsPipeline({}, depth_stencil_info); + result.result == vk::Result::eSuccess) { + return result.value; + } else { + LOG_CRITICAL(Render_Vulkan, "Depth stencil blit pipeline creation failed!"); + UNREACHABLE(); + } + return VK_NULL_HANDLE; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.h b/src/video_core/renderer_vulkan/vk_blit_helper.h new file mode 100644 index 000000000..785286dbb --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_blit_helper.h @@ -0,0 +1,71 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/rasterizer_cache/pixel_format.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" + +namespace VideoCore { +struct TextureBlit; +struct BufferTextureCopy; +} // namespace VideoCore + +namespace Vulkan { + +class Instance; +class RenderpassCache; +class Scheduler; +class Surface; + +class BlitHelper { + friend class TextureRuntime; + +public: + BlitHelper(const Instance& instance, Scheduler& scheduler, DescriptorPool& pool, + RenderpassCache& renderpass_cache); + ~BlitHelper(); + + bool BlitDepthStencil(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); + + bool ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); + + bool DepthToBuffer(Surface& source, vk::Buffer buffer, + const VideoCore::BufferTextureCopy& copy); + +private: + /// Creates compute pipelines used for blit + vk::Pipeline MakeComputePipeline(vk::ShaderModule shader, vk::PipelineLayout layout); + + /// Creates graphics pipelines used for blit + vk::Pipeline MakeDepthStencilBlitPipeline(); + +private: + const Instance& instance; + Scheduler& scheduler; + RenderpassCache& renderpass_cache; + + vk::Device device; + vk::RenderPass r32_renderpass; + + DescriptorSetProvider compute_provider; + DescriptorSetProvider compute_buffer_provider; + DescriptorSetProvider two_textures_provider; + vk::PipelineLayout compute_pipeline_layout; + vk::PipelineLayout compute_buffer_pipeline_layout; + vk::PipelineLayout two_textures_pipeline_layout; + + vk::ShaderModule full_screen_vert; + vk::ShaderModule d24s8_to_rgba8_comp; + vk::ShaderModule depth_to_buffer_comp; + vk::ShaderModule blit_depth_stencil_frag; + + vk::Pipeline d24s8_to_rgba8_pipeline; + vk::Pipeline depth_to_buffer_pipeline; + vk::Pipeline depth_blit_pipeline; + vk::Sampler linear_sampler; + vk::Sampler nearest_sampler; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_common.h b/src/video_core/renderer_vulkan/vk_common.h index a8147acbe..3fd6bc45c 100644 --- a/src/video_core/renderer_vulkan/vk_common.h +++ b/src/video_core/renderer_vulkan/vk_common.h @@ -9,6 +9,7 @@ #define VK_NO_PROTOTYPES #define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 #define VULKAN_HPP_NO_CONSTRUCTORS +#define VULKAN_HPP_NO_UNION_CONSTRUCTORS #define VULKAN_HPP_NO_STRUCT_SETTERS #include diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp new file mode 100644 index 000000000..3909da237 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -0,0 +1,141 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/microprofile.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +namespace Vulkan { + +MICROPROFILE_DEFINE(Vulkan_DescriptorSetAcquire, "Vulkan", "Descriptor Set Acquire", + MP_RGB(64, 128, 256)); + +constexpr u32 MAX_BATCH_SIZE = 8; + +DescriptorPool::DescriptorPool(const Instance& instance_) : instance{instance_} { + auto& pool = pools.emplace_back(); + pool = CreatePool(); +} + +DescriptorPool::~DescriptorPool() = default; + +std::vector DescriptorPool::Allocate(vk::DescriptorSetLayout layout, + u32 num_sets) { + std::array layouts; + layouts.fill(layout); + + u32 current_pool = 0; + vk::DescriptorSetAllocateInfo alloc_info = { + .descriptorPool = *pools[current_pool], + .descriptorSetCount = num_sets, + .pSetLayouts = layouts.data(), + }; + + while (true) { + try { + return instance.GetDevice().allocateDescriptorSets(alloc_info); + } catch (const vk::OutOfPoolMemoryError&) { + current_pool++; + if (current_pool == pools.size()) { + LOG_INFO(Render_Vulkan, "Run out of pools, creating new one!"); + auto& pool = pools.emplace_back(); + pool = CreatePool(); + } + alloc_info.descriptorPool = *pools[current_pool]; + } + } +} + +vk::DescriptorSet DescriptorPool::Allocate(vk::DescriptorSetLayout layout) { + const auto sets = Allocate(layout, 1); + return sets[0]; +} + +vk::UniqueDescriptorPool DescriptorPool::CreatePool() { + // Choose a sane pool size good for most games + static constexpr std::array pool_sizes = {{ + {vk::DescriptorType::eUniformBufferDynamic, 64}, + {vk::DescriptorType::eUniformTexelBuffer, 64}, + {vk::DescriptorType::eCombinedImageSampler, 4096}, + {vk::DescriptorType::eSampledImage, 256}, + {vk::DescriptorType::eStorageImage, 256}, + {vk::DescriptorType::eStorageBuffer, 32}, + }}; + + const vk::DescriptorPoolCreateInfo descriptor_pool_info = { + .maxSets = 4098, + .poolSizeCount = static_cast(pool_sizes.size()), + .pPoolSizes = pool_sizes.data(), + }; + + return instance.GetDevice().createDescriptorPoolUnique(descriptor_pool_info); +} + +DescriptorSetProvider::DescriptorSetProvider( + const Instance& instance, DescriptorPool& pool_, + std::span bindings) + : pool{pool_}, device{instance.GetDevice()} { + std::array update_entries; + + for (u32 i = 0; i < bindings.size(); i++) { + update_entries[i] = vk::DescriptorUpdateTemplateEntry{ + .dstBinding = bindings[i].binding, + .dstArrayElement = 0, + .descriptorCount = bindings[i].descriptorCount, + .descriptorType = bindings[i].descriptorType, + .offset = i * sizeof(DescriptorData), + .stride = sizeof(DescriptorData), + }; + } + + const vk::DescriptorSetLayoutCreateInfo layout_info = { + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }; + layout = device.createDescriptorSetLayoutUnique(layout_info); + + const vk::DescriptorUpdateTemplateCreateInfo template_info = { + .descriptorUpdateEntryCount = static_cast(bindings.size()), + .pDescriptorUpdateEntries = update_entries.data(), + .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, + .descriptorSetLayout = *layout, + }; + update_template = device.createDescriptorUpdateTemplateUnique(template_info); +} + +DescriptorSetProvider::~DescriptorSetProvider() = default; + +vk::DescriptorSet DescriptorSetProvider::Acquire(std::span data) { + MICROPROFILE_SCOPE(Vulkan_DescriptorSetAcquire); + DescriptorSetData key{}; + std::memcpy(key.data(), data.data(), data.size_bytes()); + const auto [it, new_set] = descriptor_set_map.try_emplace(key); + if (!new_set) { + return it->second; + } + if (free_sets.empty()) { + free_sets = pool.Allocate(*layout, MAX_BATCH_SIZE); + } + it.value() = free_sets.back(); + free_sets.pop_back(); + device.updateDescriptorSetWithTemplate(it->second, *update_template, data[0]); + return it->second; +} + +void DescriptorSetProvider::FreeWithImage(vk::ImageView image_view) { + for (auto it = descriptor_set_map.begin(); it != descriptor_set_map.end();) { + const auto& [data, set] = *it; + const bool has_image = std::any_of(data.begin(), data.end(), [image_view](auto& info) { + return info.image_info.imageView == image_view; + }); + if (has_image) { + free_sets.push_back(set); + it = descriptor_set_map.erase(it); + } else { + it++; + } + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h new file mode 100644 index 000000000..2990cd294 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -0,0 +1,92 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/hash.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; + +constexpr u32 MAX_DESCRIPTORS = 7; + +union DescriptorData { + vk::DescriptorImageInfo image_info; + vk::DescriptorBufferInfo buffer_info; + vk::BufferView buffer_view; + + bool operator==(const DescriptorData& other) const noexcept { + return std::memcmp(this, &other, sizeof(DescriptorData)) == 0; + } +}; + +using DescriptorSetData = std::array; + +struct DataHasher { + u64 operator()(const DescriptorSetData& data) const noexcept { + return Common::ComputeHash64(data.data(), sizeof(data)); + } +}; + +/** + * An interface for allocating descriptor sets that manages a collection of descriptor pools. + */ +class DescriptorPool { +public: + explicit DescriptorPool(const Instance& instance); + ~DescriptorPool(); + + std::vector Allocate(vk::DescriptorSetLayout layout, u32 num_sets); + + vk::DescriptorSet Allocate(vk::DescriptorSetLayout layout); + +private: + vk::UniqueDescriptorPool CreatePool(); + +private: + const Instance& instance; + std::vector pools; +}; + +/** + * Allocates and caches descriptor sets of a specific layout. + */ +class DescriptorSetProvider { +public: + explicit DescriptorSetProvider(const Instance& instance, DescriptorPool& pool, + std::span bindings); + ~DescriptorSetProvider(); + + vk::DescriptorSet Acquire(std::span data); + + void FreeWithImage(vk::ImageView image_view); + + [[nodiscard]] vk::DescriptorSetLayout Layout() const noexcept { + return *layout; + } + + [[nodiscard]] vk::DescriptorSetLayout& Layout() noexcept { + return layout.get(); + } + + [[nodiscard]] vk::DescriptorUpdateTemplate UpdateTemplate() const noexcept { + return *update_template; + } + +private: + DescriptorPool& pool; + vk::Device device; + vk::UniqueDescriptorSetLayout layout; + vk::UniqueDescriptorUpdateTemplate update_template; + std::vector free_sets; + tsl::robin_map descriptor_set_map; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp new file mode 100644 index 000000000..7d6f72351 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -0,0 +1,290 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/hash.h" +#include "common/microprofile.h" +#include "video_core/renderer_vulkan/pica_to_vk.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" + +namespace Vulkan { + +MICROPROFILE_DEFINE(Vulkan_Pipeline, "Vulkan", "Pipeline Building", MP_RGB(0, 192, 32)); + +vk::ShaderStageFlagBits MakeShaderStage(std::size_t index) { + switch (index) { + case 0: + return vk::ShaderStageFlagBits::eVertex; + case 1: + return vk::ShaderStageFlagBits::eFragment; + case 2: + return vk::ShaderStageFlagBits::eGeometry; + default: + LOG_CRITICAL(Render_Vulkan, "Invalid shader stage index!"); + UNREACHABLE(); + } + return vk::ShaderStageFlagBits::eVertex; +} + +u64 PipelineInfo::Hash(const Instance& instance) const { + u64 info_hash = 0; + const auto append_hash = [&info_hash](const auto& data) { + const u64 data_hash = Common::ComputeStructHash64(data); + info_hash = Common::HashCombine(info_hash, data_hash); + }; + + append_hash(vertex_layout); + append_hash(attachments); + append_hash(blending); + + if (!instance.IsExtendedDynamicStateSupported()) { + append_hash(rasterization); + append_hash(depth_stencil); + } + + return info_hash; +} + +Shader::Shader(const Instance& instance) : device{instance.GetDevice()} {} + +Shader::Shader(const Instance& instance, vk::ShaderStageFlagBits stage, std::string code) + : Shader{instance} { + module = Compile(code, stage, instance.GetDevice()); + MarkDone(); +} + +Shader::~Shader() { + if (device && module) { + device.destroyShaderModule(module); + } +} + +GraphicsPipeline::GraphicsPipeline(const Instance& instance_, RenderpassCache& renderpass_cache_, + const PipelineInfo& info_, vk::PipelineCache pipeline_cache_, + vk::PipelineLayout layout_, std::array stages_, + Common::ThreadWorker* worker_) + : instance{instance_}, renderpass_cache{renderpass_cache_}, worker{worker_}, + pipeline_layout{layout_}, pipeline_cache{pipeline_cache_}, info{info_}, stages{stages_} {} + +GraphicsPipeline::~GraphicsPipeline() = default; + +bool GraphicsPipeline::TryBuild(bool wait_built) { + if (is_pending) { + return true; + } + + // If the shaders haven't been compiled yet, we cannot proceed + const bool shaders_pending = std::any_of( + stages.begin(), stages.end(), [](Shader* shader) { return shader && !shader->IsDone(); }); + if (!wait_built && shaders_pending) { + return false; + } + + // Ask the driver if it can give us the pipeline quickly + if (!wait_built && instance.IsPipelineCreationCacheControlSupported() && Build(true)) { + return true; + } + + // Fallback to (a)synchronous compilation + if (worker) { + worker->QueueWork([this] { Build(); }); + is_pending = true; + } else { + Build(); + } + return true; +} + +bool GraphicsPipeline::Build(bool fail_on_compile_required) { + MICROPROFILE_SCOPE(Vulkan_Pipeline); + const vk::Device device = instance.GetDevice(); + + std::array bindings; + for (u32 i = 0; i < info.vertex_layout.binding_count; i++) { + const auto& binding = info.vertex_layout.bindings[i]; + bindings[i] = vk::VertexInputBindingDescription{ + .binding = binding.binding, + .stride = binding.stride, + .inputRate = binding.fixed.Value() ? vk::VertexInputRate::eInstance + : vk::VertexInputRate::eVertex, + }; + } + + std::array attributes; + for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) { + const auto& attr = info.vertex_layout.attributes[i]; + const FormatTraits& traits = instance.GetTraits(attr.type, attr.size); + attributes[i] = vk::VertexInputAttributeDescription{ + .location = attr.location, + .binding = attr.binding, + .format = traits.native, + .offset = attr.offset, + }; + + // At the end there's always the fixed binding which takes up + // at least 16 bytes so we should always be able to alias. + if (traits.needs_emulation) { + const FormatTraits& comp_four_traits = instance.GetTraits(attr.type, 4); + attributes[i].format = comp_four_traits.native; + } + } + + const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { + .vertexBindingDescriptionCount = info.vertex_layout.binding_count, + .pVertexBindingDescriptions = bindings.data(), + .vertexAttributeDescriptionCount = info.vertex_layout.attribute_count, + .pVertexAttributeDescriptions = attributes.data(), + }; + + const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { + .topology = PicaToVK::PrimitiveTopology(info.rasterization.topology), + .primitiveRestartEnable = false, + }; + + const vk::PipelineRasterizationStateCreateInfo raster_state = { + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .cullMode = PicaToVK::CullMode(info.rasterization.cull_mode), + .frontFace = PicaToVK::FrontFace(info.rasterization.cull_mode), + .depthBiasEnable = false, + .lineWidth = 1.0f, + }; + + const vk::PipelineMultisampleStateCreateInfo multisampling = { + .rasterizationSamples = vk::SampleCountFlagBits::e1, + .sampleShadingEnable = false, + }; + + const vk::PipelineColorBlendAttachmentState colorblend_attachment = { + .blendEnable = info.blending.blend_enable, + .srcColorBlendFactor = PicaToVK::BlendFunc(info.blending.src_color_blend_factor), + .dstColorBlendFactor = PicaToVK::BlendFunc(info.blending.dst_color_blend_factor), + .colorBlendOp = PicaToVK::BlendEquation(info.blending.color_blend_eq), + .srcAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.src_alpha_blend_factor), + .dstAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.dst_alpha_blend_factor), + .alphaBlendOp = PicaToVK::BlendEquation(info.blending.alpha_blend_eq), + .colorWriteMask = static_cast(info.blending.color_write_mask), + }; + + const vk::PipelineColorBlendStateCreateInfo color_blending = { + .logicOpEnable = !info.blending.blend_enable && !instance.NeedsLogicOpEmulation(), + .logicOp = PicaToVK::LogicOp(info.blending.logic_op), + .attachmentCount = 1, + .pAttachments = &colorblend_attachment, + .blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f}, + }; + + const vk::Viewport viewport = { + .x = 0.0f, + .y = 0.0f, + .width = 1.0f, + .height = 1.0f, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + + const vk::Rect2D scissor = { + .offset = {0, 0}, + .extent = {1, 1}, + }; + + const vk::PipelineViewportStateCreateInfo viewport_info = { + .viewportCount = 1, + .pViewports = &viewport, + .scissorCount = 1, + .pScissors = &scissor, + }; + + boost::container::static_vector dynamic_states = { + vk::DynamicState::eViewport, vk::DynamicState::eScissor, + vk::DynamicState::eStencilCompareMask, vk::DynamicState::eStencilWriteMask, + vk::DynamicState::eStencilReference, vk::DynamicState::eBlendConstants, + }; + + if (instance.IsExtendedDynamicStateSupported()) { + constexpr std::array extended = { + vk::DynamicState::eCullModeEXT, vk::DynamicState::eDepthCompareOpEXT, + vk::DynamicState::eDepthTestEnableEXT, vk::DynamicState::eDepthWriteEnableEXT, + vk::DynamicState::eFrontFaceEXT, vk::DynamicState::ePrimitiveTopologyEXT, + vk::DynamicState::eStencilOpEXT, vk::DynamicState::eStencilTestEnableEXT, + }; + dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); + } + + const vk::PipelineDynamicStateCreateInfo dynamic_info = { + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + + const vk::StencilOpState stencil_op_state = { + .failOp = PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op), + .passOp = PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op), + .depthFailOp = PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op), + .compareOp = PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op), + }; + + const vk::PipelineDepthStencilStateCreateInfo depth_info = { + .depthTestEnable = static_cast(info.depth_stencil.depth_test_enable.Value()), + .depthWriteEnable = static_cast(info.depth_stencil.depth_write_enable.Value()), + .depthCompareOp = PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op), + .depthBoundsTestEnable = false, + .stencilTestEnable = static_cast(info.depth_stencil.stencil_test_enable.Value()), + .front = stencil_op_state, + .back = stencil_op_state, + }; + + u32 shader_count = 0; + std::array shader_stages; + for (std::size_t i = 0; i < stages.size(); i++) { + Shader* shader = stages[i]; + if (!shader) { + continue; + } + + shader->WaitDone(); + shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{ + .stage = MakeShaderStage(i), + .module = shader->Handle(), + .pName = "main", + }; + } + + vk::GraphicsPipelineCreateInfo pipeline_info = { + .stageCount = shader_count, + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_info, + .pRasterizationState = &raster_state, + .pMultisampleState = &multisampling, + .pDepthStencilState = &depth_info, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_info, + .layout = pipeline_layout, + .renderPass = + renderpass_cache.GetRenderpass(info.attachments.color, info.attachments.depth, false), + }; + + if (fail_on_compile_required) { + pipeline_info.flags |= vk::PipelineCreateFlagBits::eFailOnPipelineCompileRequiredEXT; + } + + auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info); + if (result.result == vk::Result::eSuccess) { + pipeline = std::move(result.value); + } else if (result.result == vk::Result::eErrorPipelineCompileRequiredEXT) { + return false; + } else { + UNREACHABLE_MSG("Graphics pipeline creation failed!"); + } + + MarkDone(); + return true; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h new file mode 100644 index 000000000..26d1daa50 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -0,0 +1,192 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/thread_worker.h" +#include "video_core/rasterizer_cache/pixel_format.h" +#include "video_core/renderer_vulkan/vk_common.h" +#include "video_core/renderer_vulkan/vk_shader_gen.h" + +namespace Common { + +struct AsyncHandle { +public: + AsyncHandle(bool is_done_ = false) : is_done{is_done_} {} + + [[nodiscard]] bool IsDone() noexcept { + return is_done.load(std::memory_order::relaxed); + } + + void WaitDone() noexcept { + std::unique_lock lock{mutex}; + condvar.wait(lock, [this] { return is_done.load(std::memory_order::relaxed); }); + } + + void MarkDone(bool done = true) noexcept { + std::scoped_lock lock{mutex}; + is_done = done; + condvar.notify_all(); + } + +private: + std::condition_variable condvar; + std::mutex mutex; + std::atomic_bool is_done{false}; +}; + +} // namespace Common + +namespace Vulkan { + +class Instance; +class RenderpassCache; + +constexpr u32 MAX_SHADER_STAGES = 3; +constexpr u32 MAX_VERTEX_ATTRIBUTES = 16; +constexpr u32 MAX_VERTEX_BINDINGS = 16; + +/** + * The pipeline state is tightly packed with bitfields to reduce + * the overhead of hashing as much as possible + */ +union RasterizationState { + u8 value = 0; + BitField<0, 2, Pica::PipelineRegs::TriangleTopology> topology; + BitField<4, 2, Pica::RasterizerRegs::CullMode> cull_mode; +}; + +union DepthStencilState { + u32 value = 0; + BitField<0, 1, u32> depth_test_enable; + BitField<1, 1, u32> depth_write_enable; + BitField<2, 1, u32> stencil_test_enable; + BitField<3, 3, Pica::FramebufferRegs::CompareFunc> depth_compare_op; + BitField<6, 3, Pica::FramebufferRegs::StencilAction> stencil_fail_op; + BitField<9, 3, Pica::FramebufferRegs::StencilAction> stencil_pass_op; + BitField<12, 3, Pica::FramebufferRegs::StencilAction> stencil_depth_fail_op; + BitField<15, 3, Pica::FramebufferRegs::CompareFunc> stencil_compare_op; +}; + +struct BlendingState { + u16 blend_enable; + u16 color_write_mask; + Pica::FramebufferRegs::LogicOp logic_op; + union { + u32 value = 0; + BitField<0, 4, Pica::FramebufferRegs::BlendFactor> src_color_blend_factor; + BitField<4, 4, Pica::FramebufferRegs::BlendFactor> dst_color_blend_factor; + BitField<8, 3, Pica::FramebufferRegs::BlendEquation> color_blend_eq; + BitField<11, 4, Pica::FramebufferRegs::BlendFactor> src_alpha_blend_factor; + BitField<15, 4, Pica::FramebufferRegs::BlendFactor> dst_alpha_blend_factor; + BitField<19, 3, Pica::FramebufferRegs::BlendEquation> alpha_blend_eq; + }; +}; + +struct DynamicState { + u32 blend_color = 0; + u8 stencil_reference; + u8 stencil_compare_mask; + u8 stencil_write_mask; + + bool operator==(const DynamicState& other) const noexcept { + return std::memcmp(this, &other, sizeof(DynamicState)) == 0; + } +}; + +union VertexBinding { + u16 value = 0; + BitField<0, 4, u16> binding; + BitField<4, 1, u16> fixed; + BitField<5, 11, u16> stride; +}; + +union VertexAttribute { + u32 value = 0; + BitField<0, 4, u32> binding; + BitField<4, 4, u32> location; + BitField<8, 3, Pica::PipelineRegs::VertexAttributeFormat> type; + BitField<11, 3, u32> size; + BitField<14, 11, u32> offset; +}; + +struct VertexLayout { + u8 binding_count; + u8 attribute_count; + std::array bindings; + std::array attributes; +}; + +struct AttachmentInfo { + VideoCore::PixelFormat color; + VideoCore::PixelFormat depth; +}; + +/** + * Information about a graphics/compute pipeline + */ +struct PipelineInfo { + VertexLayout vertex_layout; + BlendingState blending; + AttachmentInfo attachments; + RasterizationState rasterization; + DepthStencilState depth_stencil; + DynamicState dynamic; + + [[nodiscard]] u64 Hash(const Instance& instance) const; + + [[nodiscard]] bool IsDepthWriteEnabled() const noexcept { + const bool has_stencil = attachments.depth == VideoCore::PixelFormat::D24S8; + const bool depth_write = + depth_stencil.depth_test_enable && depth_stencil.depth_write_enable; + const bool stencil_write = + has_stencil && depth_stencil.stencil_test_enable && dynamic.stencil_write_mask != 0; + + return depth_write || stencil_write; + } +}; + +struct Shader : public Common::AsyncHandle { + explicit Shader(const Instance& instance); + explicit Shader(const Instance& instance, vk::ShaderStageFlagBits stage, std::string code); + ~Shader(); + + [[nodiscard]] vk::ShaderModule Handle() const noexcept { + return module; + } + + vk::ShaderModule module; + vk::Device device; + std::string program; +}; + +class GraphicsPipeline : public Common::AsyncHandle { +public: + explicit GraphicsPipeline(const Instance& instance, RenderpassCache& renderpass_cache, + const PipelineInfo& info, vk::PipelineCache pipeline_cache, + vk::PipelineLayout layout, std::array stages, + Common::ThreadWorker* worker); + ~GraphicsPipeline(); + + bool TryBuild(bool wait_built); + + bool Build(bool fail_on_compile_required = false); + + [[nodiscard]] vk::Pipeline Handle() const noexcept { + return *pipeline; + } + +private: + const Instance& instance; + RenderpassCache& renderpass_cache; + Common::ThreadWorker* worker; + + vk::UniquePipeline pipeline; + vk::PipelineLayout pipeline_layout; + vk::PipelineCache pipeline_cache; + + PipelineInfo info; + std::array stages; + bool is_pending{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 817a1569a..02a236c5a 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -210,12 +210,16 @@ FormatTraits Instance::DetermineTraits(VideoCore::PixelFormat pixel_format, vk:: best_usage |= vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eTransferSrc; } - if (supports_attachment) { + // Attachment flag is only needed for color and depth formats. + if (supports_attachment && + VideoCore::GetFormatType(pixel_format) != VideoCore::SurfaceType::Texture) { best_usage |= (format_aspect & vk::ImageAspectFlagBits::eDepth) ? vk::ImageUsageFlagBits::eDepthStencilAttachment : vk::ImageUsageFlagBits::eColorAttachment; } - if (supports_storage) { + // Storage flag is only needed for shadow rendering with RGBA8 texture. + // Keeping it disables can boost performance on mobile drivers. + if (supports_storage && pixel_format == VideoCore::PixelFormat::RGBA8) { best_usage |= vk::ImageUsageFlagBits::eStorage; } diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp new file mode 100644 index 000000000..4ab4c9cd9 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -0,0 +1,207 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace Vulkan { + +constexpr u64 WAIT_TIMEOUT = std::numeric_limits::max(); + +MasterSemaphoreTimeline::MasterSemaphoreTimeline(const Instance& instance_) : instance{instance_} { + const vk::StructureChain semaphore_chain = { + vk::SemaphoreCreateInfo{}, + vk::SemaphoreTypeCreateInfoKHR{ + .semaphoreType = vk::SemaphoreType::eTimeline, + .initialValue = 0, + }, + }; + semaphore = instance.GetDevice().createSemaphoreUnique(semaphore_chain.get()); +} + +MasterSemaphoreTimeline::~MasterSemaphoreTimeline() = default; + +void MasterSemaphoreTimeline::Refresh() { + u64 this_tick{}; + u64 counter{}; + do { + this_tick = gpu_tick.load(std::memory_order_acquire); + counter = instance.GetDevice().getSemaphoreCounterValueKHR(*semaphore); + if (counter < this_tick) { + return; + } + } while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release, + std::memory_order_relaxed)); +} + +void MasterSemaphoreTimeline::Wait(u64 tick) { + // No need to wait if the GPU is ahead of the tick + if (IsFree(tick)) { + return; + } + // Update the GPU tick and try again + Refresh(); + if (IsFree(tick)) { + return; + } + + // If none of the above is hit, fallback to a regular wait + const vk::SemaphoreWaitInfoKHR wait_info = { + .semaphoreCount = 1, + .pSemaphores = &semaphore.get(), + .pValues = &tick, + }; + + while (instance.GetDevice().waitSemaphoresKHR(&wait_info, WAIT_TIMEOUT) != + vk::Result::eSuccess) { + } + Refresh(); +} + +void MasterSemaphoreTimeline::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, + vk::Semaphore signal, u64 signal_value) { + cmdbuf.end(); + + const u32 num_signal_semaphores = signal ? 2U : 1U; + const std::array signal_values{signal_value, u64(0)}; + const std::array signal_semaphores{Handle(), signal}; + + const u32 num_wait_semaphores = wait ? 2U : 1U; + const std::array wait_values{signal_value - 1, u64(1)}; + const std::array wait_semaphores{Handle(), wait}; + + static constexpr std::array wait_stage_masks = { + vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + }; + + const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = { + .waitSemaphoreValueCount = num_wait_semaphores, + .pWaitSemaphoreValues = wait_values.data(), + .signalSemaphoreValueCount = num_signal_semaphores, + .pSignalSemaphoreValues = signal_values.data(), + }; + + const vk::SubmitInfo submit_info = { + .pNext = &timeline_si, + .waitSemaphoreCount = num_wait_semaphores, + .pWaitSemaphores = wait_semaphores.data(), + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = 1u, + .pCommandBuffers = &cmdbuf, + .signalSemaphoreCount = num_signal_semaphores, + .pSignalSemaphores = signal_semaphores.data(), + }; + + try { + instance.GetGraphicsQueue().submit(submit_info); + } catch (vk::DeviceLostError& err) { + LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what()); + UNREACHABLE(); + } +} + +constexpr u64 FENCE_RESERVE = 8; + +MasterSemaphoreFence::MasterSemaphoreFence(const Instance& instance_) : instance{instance_} { + const vk::Device device{instance.GetDevice()}; + for (u64 i = 0; i < FENCE_RESERVE; i++) { + free_queue.push(device.createFenceUnique({})); + } + wait_thread = std::jthread([this](std::stop_token token) { WaitThread(token); }); +} + +MasterSemaphoreFence::~MasterSemaphoreFence() = default; + +void MasterSemaphoreFence::Refresh() {} + +void MasterSemaphoreFence::Wait(u64 tick) { + while (true) { + u64 current_value = gpu_tick.load(std::memory_order_relaxed); + if (current_value >= tick) { + return; + } + gpu_tick.wait(current_value); + } +} + +void MasterSemaphoreFence::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, + vk::Semaphore signal, u64 signal_value) { + cmdbuf.end(); + + const u32 num_signal_semaphores = signal ? 1U : 0U; + const u32 num_wait_semaphores = wait ? 1U : 0U; + + static constexpr std::array wait_stage_masks = { + vk::PipelineStageFlagBits::eColorAttachmentOutput, + }; + + const vk::SubmitInfo submit_info = { + .waitSemaphoreCount = num_wait_semaphores, + .pWaitSemaphores = &wait, + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = 1u, + .pCommandBuffers = &cmdbuf, + .signalSemaphoreCount = num_signal_semaphores, + .pSignalSemaphores = &signal, + }; + + vk::UniqueFence fence{GetFreeFence()}; + try { + instance.GetGraphicsQueue().submit(submit_info, *fence); + } catch (vk::DeviceLostError& err) { + LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what()); + UNREACHABLE(); + } + + std::scoped_lock lock{wait_mutex}; + wait_queue.push({ + .handle = std::move(fence), + .signal_value = signal_value, + }); + wait_cv.notify_one(); +} + +void MasterSemaphoreFence::WaitThread(std::stop_token token) { + const vk::Device device{instance.GetDevice()}; + while (!token.stop_requested()) { + Fence fence; + { + std::unique_lock lock{wait_mutex}; + Common::CondvarWait(wait_cv, lock, token, [this] { return !wait_queue.empty(); }); + if (token.stop_requested()) { + return; + } + fence = std::move(wait_queue.front()); + wait_queue.pop(); + } + + const vk::Result result = device.waitForFences(*fence.handle, true, WAIT_TIMEOUT); + if (result != vk::Result::eSuccess) { + LOG_CRITICAL(Render_Vulkan, "Fence wait failed with error {}", vk::to_string(result)); + UNREACHABLE(); + } + device.resetFences(*fence.handle); + + gpu_tick.store(fence.signal_value); + gpu_tick.notify_all(); + + std::scoped_lock lock{free_mutex}; + free_queue.push(std::move(fence.handle)); + } +} + +vk::UniqueFence MasterSemaphoreFence::GetFreeFence() { + std::scoped_lock lock{free_mutex}; + if (free_queue.empty()) { + return instance.GetDevice().createFenceUnique({}); + } + + vk::UniqueFence fence{std::move(free_queue.front())}; + free_queue.pop(); + return fence; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h new file mode 100644 index 000000000..875e1b8d4 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -0,0 +1,107 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include "common/common_types.h" +#include "common/polyfill_thread.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; +class Scheduler; + +class MasterSemaphore { +public: + virtual ~MasterSemaphore() = default; + + [[nodiscard]] u64 CurrentTick() const noexcept { + return current_tick.load(std::memory_order_acquire); + } + + [[nodiscard]] u64 KnownGpuTick() const noexcept { + return gpu_tick.load(std::memory_order_acquire); + } + + [[nodiscard]] bool IsFree(u64 tick) const noexcept { + return KnownGpuTick() >= tick; + } + + [[nodiscard]] u64 NextTick() noexcept { + return current_tick.fetch_add(1, std::memory_order_release); + } + + /// Refresh the known GPU tick + virtual void Refresh() = 0; + + /// Waits for a tick to be hit on the GPU + virtual void Wait(u64 tick) = 0; + + /// Submits the provided command buffer for execution + virtual void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal, + u64 signal_value) = 0; + +protected: + std::atomic gpu_tick{0}; ///< Current known GPU tick. + std::atomic current_tick{1}; ///< Current logical tick. +}; + +class MasterSemaphoreTimeline : public MasterSemaphore { +public: + explicit MasterSemaphoreTimeline(const Instance& instance); + ~MasterSemaphoreTimeline() override; + + [[nodiscard]] vk::Semaphore Handle() const noexcept { + return semaphore.get(); + } + + void Refresh() override; + + void Wait(u64 tick) override; + + void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal, + u64 signal_value) override; + +private: + const Instance& instance; + vk::UniqueSemaphore semaphore; ///< Timeline semaphore. +}; + +class MasterSemaphoreFence : public MasterSemaphore { +public: + explicit MasterSemaphoreFence(const Instance& instance); + ~MasterSemaphoreFence() override; + + void Refresh() override; + + void Wait(u64 tick) override; + + void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal, + u64 signal_value) override; + +private: + void WaitThread(std::stop_token token); + + vk::UniqueFence GetFreeFence(); + +private: + const Instance& instance; + + struct Fence { + vk::UniqueFence handle; + u64 signal_value; + }; + + std::queue free_queue; + std::queue wait_queue; + std::mutex free_mutex; + std::mutex wait_mutex; + std::condition_variable_any wait_cv; + std::jthread wait_thread; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp new file mode 100644 index 000000000..4bac4c707 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -0,0 +1,519 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_paths.h" +#include "common/file_util.h" +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "common/settings.h" +#include "video_core/renderer_vulkan/pica_to_vk.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_gen_spv.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" + +MICROPROFILE_DEFINE(Vulkan_Bind, "Vulkan", "Pipeline Bind", MP_RGB(192, 32, 32)); + +namespace Vulkan { + +enum ProgramType : u32 { + VS = 0, + GS = 2, + FS = 1, +}; + +u32 AttribBytes(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) { + switch (format) { + case Pica::PipelineRegs::VertexAttributeFormat::FLOAT: + return sizeof(float) * size; + case Pica::PipelineRegs::VertexAttributeFormat::SHORT: + return sizeof(u16) * size; + case Pica::PipelineRegs::VertexAttributeFormat::BYTE: + case Pica::PipelineRegs::VertexAttributeFormat::UBYTE: + return sizeof(u8) * size; + } + return 0; +} + +AttribLoadFlags MakeAttribLoadFlag(Pica::PipelineRegs::VertexAttributeFormat format) { + switch (format) { + case Pica::PipelineRegs::VertexAttributeFormat::BYTE: + case Pica::PipelineRegs::VertexAttributeFormat::SHORT: + return AttribLoadFlags::Sint; + case Pica::PipelineRegs::VertexAttributeFormat::UBYTE: + return AttribLoadFlags::Uint; + default: + return AttribLoadFlags::Float; + } +} + +constexpr std::array BUFFER_BINDINGS = {{ + {0, vk::DescriptorType::eUniformBufferDynamic, 1, vk::ShaderStageFlagBits::eVertex}, + {1, vk::DescriptorType::eUniformBufferDynamic, 1, + vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eGeometry | + vk::ShaderStageFlagBits::eFragment}, + {2, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment}, + {3, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment}, + {4, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment}, +}}; + +constexpr std::array TEXTURE_BINDINGS = {{ + {0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, + {1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, + {2, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, + {3, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, +}}; + +// TODO: Use descriptor array for shadow cube +constexpr std::array SHADOW_BINDINGS = {{ + {0, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, + {1, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, + {2, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, + {3, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, + {4, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, + {5, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, + {6, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, +}}; + +PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, + RenderpassCache& renderpass_cache_, DescriptorPool& pool_) + : instance{instance_}, scheduler{scheduler_}, renderpass_cache{renderpass_cache_}, pool{pool_}, + num_worker_threads{std::max(std::thread::hardware_concurrency(), 2U)}, + workers{num_worker_threads, "Pipeline workers"}, + descriptor_set_providers{DescriptorSetProvider{instance, pool, BUFFER_BINDINGS}, + DescriptorSetProvider{instance, pool, TEXTURE_BINDINGS}, + DescriptorSetProvider{instance, pool, SHADOW_BINDINGS}}, + trivial_vertex_shader{instance, vk::ShaderStageFlagBits::eVertex, + GenerateTrivialVertexShader(instance.IsShaderClipDistanceSupported())} { + BuildLayout(); +} + +void PipelineCache::BuildLayout() { + std::array descriptor_set_layouts; + std::transform(descriptor_set_providers.begin(), descriptor_set_providers.end(), + descriptor_set_layouts.begin(), + [](const auto& provider) { return provider.Layout(); }); + + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = NUM_RASTERIZER_SETS, + .pSetLayouts = descriptor_set_layouts.data(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }; + pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); +} + +PipelineCache::~PipelineCache() { + SaveDiskCache(); +} + +void PipelineCache::LoadDiskCache() { + if (!Settings::values.use_disk_shader_cache || !EnsureDirectories()) { + return; + } + + const std::string cache_file_path = fmt::format("{}{:x}{:x}.bin", GetPipelineCacheDir(), + instance.GetVendorID(), instance.GetDeviceID()); + vk::PipelineCacheCreateInfo cache_info = { + .initialDataSize = 0, + .pInitialData = nullptr, + }; + + std::vector cache_data; + FileUtil::IOFile cache_file{cache_file_path, "r"}; + if (cache_file.IsOpen()) { + LOG_INFO(Render_Vulkan, "Loading pipeline cache"); + + const u64 cache_file_size = cache_file.GetSize(); + cache_data.resize(cache_file_size); + if (cache_file.ReadBytes(cache_data.data(), cache_file_size)) { + if (!IsCacheValid(cache_data)) { + LOG_WARNING(Render_Vulkan, "Pipeline cache provided invalid, ignoring"); + } else { + cache_info.initialDataSize = cache_file_size; + cache_info.pInitialData = cache_data.data(); + } + } + + cache_file.Close(); + } + + vk::Device device = instance.GetDevice(); + pipeline_cache = device.createPipelineCacheUnique(cache_info); +} + +void PipelineCache::SaveDiskCache() { + if (!Settings::values.use_disk_shader_cache || !EnsureDirectories()) { + return; + } + + const std::string cache_file_path = fmt::format("{}{:x}{:x}.bin", GetPipelineCacheDir(), + instance.GetVendorID(), instance.GetDeviceID()); + FileUtil::IOFile cache_file{cache_file_path, "wb"}; + if (!cache_file.IsOpen()) { + LOG_ERROR(Render_Vulkan, "Unable to open pipeline cache for writing"); + return; + } + + vk::Device device = instance.GetDevice(); + auto cache_data = device.getPipelineCacheData(*pipeline_cache); + if (!cache_file.WriteBytes(cache_data.data(), cache_data.size())) { + LOG_ERROR(Render_Vulkan, "Error during pipeline cache write"); + return; + } + + cache_file.Close(); +} + +bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) { + MICROPROFILE_SCOPE(Vulkan_Bind); + + u64 shader_hash = 0; + for (u32 i = 0; i < MAX_SHADER_STAGES; i++) { + shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]); + } + + const u64 info_hash = info.Hash(instance); + const u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash); + + auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash); + if (new_pipeline) { + it.value() = std::make_unique( + instance, renderpass_cache, info, *pipeline_cache, *pipeline_layout, current_shaders, + wait_built ? nullptr : &workers); + } + + GraphicsPipeline* const pipeline{it->second.get()}; + if (!pipeline->IsDone() && !pipeline->TryBuild(wait_built)) { + return false; + } + + for (u32 i = 0; i < NUM_RASTERIZER_SETS; i++) { + if (!set_dirty[i]) { + continue; + } + bound_descriptor_sets[i] = descriptor_set_providers[i].Acquire(update_data[i]); + set_dirty[i] = false; + } + + const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline); + const bool pipeline_dirty = (current_pipeline != pipeline) || is_dirty; + scheduler.Record([this, is_dirty, pipeline_dirty, pipeline, + current_dynamic = current_info.dynamic, dynamic = info.dynamic, + descriptor_sets = bound_descriptor_sets, offsets = offsets, + current_rasterization = current_info.rasterization, + current_depth_stencil = current_info.depth_stencil, + rasterization = info.rasterization, + depth_stencil = info.depth_stencil](vk::CommandBuffer cmdbuf) { + if (dynamic.stencil_compare_mask != current_dynamic.stencil_compare_mask || is_dirty) { + cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, + dynamic.stencil_compare_mask); + } + + if (dynamic.stencil_write_mask != current_dynamic.stencil_write_mask || is_dirty) { + cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, + dynamic.stencil_write_mask); + } + + if (dynamic.stencil_reference != current_dynamic.stencil_reference || is_dirty) { + cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, + dynamic.stencil_reference); + } + + if (dynamic.blend_color != current_dynamic.blend_color || is_dirty) { + const Common::Vec4f color = PicaToVK::ColorRGBA8(dynamic.blend_color); + cmdbuf.setBlendConstants(color.AsArray()); + } + + if (instance.IsExtendedDynamicStateSupported()) { + if (rasterization.cull_mode != current_rasterization.cull_mode || is_dirty) { + cmdbuf.setCullModeEXT(PicaToVK::CullMode(rasterization.cull_mode)); + cmdbuf.setFrontFaceEXT(PicaToVK::FrontFace(rasterization.cull_mode)); + } + + if (depth_stencil.depth_compare_op != current_depth_stencil.depth_compare_op || + is_dirty) { + cmdbuf.setDepthCompareOpEXT(PicaToVK::CompareFunc(depth_stencil.depth_compare_op)); + } + + if (depth_stencil.depth_test_enable != current_depth_stencil.depth_test_enable || + is_dirty) { + cmdbuf.setDepthTestEnableEXT(depth_stencil.depth_test_enable); + } + + if (depth_stencil.depth_write_enable != current_depth_stencil.depth_write_enable || + is_dirty) { + cmdbuf.setDepthWriteEnableEXT(depth_stencil.depth_write_enable); + } + + if (rasterization.topology != current_rasterization.topology || is_dirty) { + cmdbuf.setPrimitiveTopologyEXT(PicaToVK::PrimitiveTopology(rasterization.topology)); + } + + if (depth_stencil.stencil_test_enable != current_depth_stencil.stencil_test_enable || + is_dirty) { + cmdbuf.setStencilTestEnableEXT(depth_stencil.stencil_test_enable); + } + + if (depth_stencil.stencil_fail_op != current_depth_stencil.stencil_fail_op || + depth_stencil.stencil_pass_op != current_depth_stencil.stencil_pass_op || + depth_stencil.stencil_depth_fail_op != + current_depth_stencil.stencil_depth_fail_op || + depth_stencil.stencil_compare_op != current_depth_stencil.stencil_compare_op || + is_dirty) { + cmdbuf.setStencilOpEXT(vk::StencilFaceFlagBits::eFrontAndBack, + PicaToVK::StencilOp(depth_stencil.stencil_fail_op), + PicaToVK::StencilOp(depth_stencil.stencil_pass_op), + PicaToVK::StencilOp(depth_stencil.stencil_depth_fail_op), + PicaToVK::CompareFunc(depth_stencil.stencil_compare_op)); + } + } + + if (pipeline_dirty) { + if (!pipeline->IsDone()) { + pipeline->WaitDone(); + } + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); + } + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, + descriptor_sets, offsets); + }); + + current_info = info; + current_pipeline = pipeline; + + return true; +} + +bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, + Pica::Shader::ShaderSetup& setup, + const VertexLayout& layout) { + PicaVSConfig config{regs.rasterizer, regs.vs, setup, instance}; + config.state.use_geometry_shader = instance.UseGeometryShaders(); + + for (u32 i = 0; i < layout.attribute_count; i++) { + const VertexAttribute& attr = layout.attributes[i]; + const FormatTraits& traits = instance.GetTraits(attr.type, attr.size); + const u32 location = attr.location.Value(); + AttribLoadFlags& flags = config.state.load_flags[location]; + + if (traits.needs_conversion) { + flags = MakeAttribLoadFlag(attr.type); + } + if (traits.needs_emulation) { + flags |= AttribLoadFlags::ZeroW; + } + } + + auto [it, new_config] = programmable_vertex_map.try_emplace(config); + if (new_config) { + auto code = GenerateVertexShader(setup, config); + if (!code) { + LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader"); + programmable_vertex_map[config] = nullptr; + return false; + } + + std::string& program = code.value(); + auto [iter, new_program] = programmable_vertex_cache.try_emplace(program, instance); + auto& shader = iter->second; + + if (new_program) { + shader.program = std::move(program); + const vk::Device device = instance.GetDevice(); + workers.QueueWork([device, &shader] { + shader.module = Compile(shader.program, vk::ShaderStageFlagBits::eVertex, device); + shader.MarkDone(); + }); + } + + it->second = &shader; + } + + Shader* const shader{it->second}; + if (!shader) { + LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader"); + return false; + } + + current_shaders[ProgramType::VS] = shader; + shader_hashes[ProgramType::VS] = config.Hash(); + + return true; +} + +void PipelineCache::UseTrivialVertexShader() { + current_shaders[ProgramType::VS] = &trivial_vertex_shader; + shader_hashes[ProgramType::VS] = 0; +} + +bool PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) { + if (!instance.UseGeometryShaders()) { + UseTrivialGeometryShader(); + return true; + } + + const PicaFixedGSConfig gs_config{regs, instance}; + auto [it, new_shader] = fixed_geometry_shaders.try_emplace(gs_config, instance); + auto& shader = it->second; + + if (new_shader) { + workers.QueueWork([gs_config, device = instance.GetDevice(), &shader]() { + const std::string code = GenerateFixedGeometryShader(gs_config); + shader.module = Compile(code, vk::ShaderStageFlagBits::eGeometry, device); + shader.MarkDone(); + }); + } + + current_shaders[ProgramType::GS] = &shader; + shader_hashes[ProgramType::GS] = gs_config.Hash(); + + return true; +} + +void PipelineCache::UseTrivialGeometryShader() { + current_shaders[ProgramType::GS] = nullptr; + shader_hashes[ProgramType::GS] = 0; +} + +void PipelineCache::UseFragmentShader(const Pica::Regs& regs) { + const PicaFSConfig config{regs, instance}; + + const auto [it, new_shader] = fragment_shaders.try_emplace(config, instance); + auto& shader = it->second; + + if (new_shader) { + const bool use_spirv = Settings::values.spirv_shader_gen.GetValue(); + if (use_spirv && !config.state.shadow_rendering.Value()) { + const std::vector code = GenerateFragmentShaderSPV(config); + shader.module = CompileSPV(code, instance.GetDevice()); + shader.MarkDone(); + } else { + workers.QueueWork([config, device = instance.GetDevice(), &shader]() { + const std::string code = GenerateFragmentShader(config); + shader.module = Compile(code, vk::ShaderStageFlagBits::eFragment, device); + shader.MarkDone(); + }); + } + } + + current_shaders[ProgramType::FS] = &shader; + shader_hashes[ProgramType::FS] = config.Hash(); +} + +void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler) { + auto& info = update_data[1][binding].image_info; + if (info.imageView == image_view && info.sampler == sampler) { + return; + } + set_dirty[1] = true; + info = vk::DescriptorImageInfo{ + .sampler = sampler, + .imageView = image_view, + .imageLayout = vk::ImageLayout::eGeneral, + }; +} + +void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) { + auto& info = update_data[2][binding].image_info; + if (info.imageView == image_view) { + return; + } + set_dirty[2] = true; + info = vk::DescriptorImageInfo{ + .imageView = image_view, + .imageLayout = vk::ImageLayout::eGeneral, + }; +} + +void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) { + auto& info = update_data[0][binding].buffer_info; + if (info.buffer == buffer && info.offset == offset && info.range == size) { + return; + } + set_dirty[0] = true; + info = vk::DescriptorBufferInfo{ + .buffer = buffer, + .offset = offset, + .range = size, + }; +} + +void PipelineCache::BindTexelBuffer(u32 binding, vk::BufferView buffer_view) { + auto& view = update_data[0][binding].buffer_view; + if (view != buffer_view) { + set_dirty[0] = true; + view = buffer_view; + } +} + +void PipelineCache::SetBufferOffset(u32 binding, size_t offset) { + offsets[binding] = static_cast(offset); +} + +bool PipelineCache::IsCacheValid(std::span data) const { + if (data.size() < sizeof(vk::PipelineCacheHeaderVersionOne)) { + LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header"); + return false; + } + + vk::PipelineCacheHeaderVersionOne header; + std::memcpy(&header, data.data(), sizeof(header)); + if (header.headerSize < sizeof(header)) { + LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header length"); + return false; + } + + if (header.headerVersion != vk::PipelineCacheHeaderVersion::eOne) { + LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header version"); + return false; + } + + if (u32 vendor_id = instance.GetVendorID(); header.vendorID != vendor_id) { + LOG_ERROR( + Render_Vulkan, + "Pipeline cache failed validation: Incorrect vendor ID (file: {:#X}, device: {:#X})", + header.vendorID, vendor_id); + return false; + } + + if (u32 device_id = instance.GetDeviceID(); header.deviceID != device_id) { + LOG_ERROR( + Render_Vulkan, + "Pipeline cache failed validation: Incorrect device ID (file: {:#X}, device: {:#X})", + header.deviceID, device_id); + return false; + } + + if (header.pipelineCacheUUID != instance.GetPipelineCacheUUID()) { + LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Incorrect UUID"); + return false; + } + + return true; +} + +bool PipelineCache::EnsureDirectories() const { + const auto create_dir = [](const std::string& dir) { + if (!FileUtil::CreateDir(dir)) { + LOG_ERROR(Render_Vulkan, "Failed to create directory={}", dir); + return false; + } + + return true; + }; + + return create_dir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) && + create_dir(GetPipelineCacheDir()); +} + +std::string PipelineCache::GetPipelineCacheDir() const { + return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + "vulkan" + DIR_SEP; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h new file mode 100644 index 000000000..954f4b3e9 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -0,0 +1,123 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" + +namespace Pica { +struct Regs; +} + +namespace Vulkan { + +class Instance; +class Scheduler; +class RenderpassCache; +class DescriptorPool; + +constexpr u32 NUM_RASTERIZER_SETS = 3; +constexpr u32 NUM_DYNAMIC_OFFSETS = 2; + +/** + * Stores a collection of rasterizer pipelines used during rendering. + */ +class PipelineCache { +public: + explicit PipelineCache(const Instance& instance, Scheduler& scheduler, + RenderpassCache& renderpass_cache, DescriptorPool& pool); + ~PipelineCache(); + + [[nodiscard]] DescriptorSetProvider& TextureProvider() noexcept { + return descriptor_set_providers[1]; + } + + /// Loads the pipeline cache stored to disk + void LoadDiskCache(); + + /// Stores the generated pipeline cache to disk + void SaveDiskCache(); + + /// Binds a pipeline using the provided information + bool BindPipeline(const PipelineInfo& info, bool wait_built = false); + + /// Binds a PICA decompiled vertex shader + bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, + const VertexLayout& layout); + + /// Binds a passthrough vertex shader + void UseTrivialVertexShader(); + + /// Binds a PICA decompiled geometry shader + bool UseFixedGeometryShader(const Pica::Regs& regs); + + /// Binds a passthrough geometry shader + void UseTrivialGeometryShader(); + + /// Binds a fragment shader generated from PICA state + void UseFragmentShader(const Pica::Regs& regs); + + /// Binds a texture to the specified binding + void BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler); + + /// Binds a storage image to the specified binding + void BindStorageImage(u32 binding, vk::ImageView image_view); + + /// Binds a buffer to the specified binding + void BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size); + + /// Binds a buffer to the specified binding + void BindTexelBuffer(u32 binding, vk::BufferView buffer_view); + + /// Sets the dynamic offset for the uniform buffer at binding + void SetBufferOffset(u32 binding, size_t offset); + +private: + /// Builds the rasterizer pipeline layout + void BuildLayout(); + + /// Returns true when the disk data can be used by the current driver + bool IsCacheValid(std::span cache_data) const; + + /// Create shader disk cache directories. Returns true on success. + bool EnsureDirectories() const; + + /// Returns the pipeline cache storage dir + std::string GetPipelineCacheDir() const; + +private: + const Instance& instance; + Scheduler& scheduler; + RenderpassCache& renderpass_cache; + DescriptorPool& pool; + + vk::UniquePipelineCache pipeline_cache; + vk::UniquePipelineLayout pipeline_layout; + std::size_t num_worker_threads; + Common::ThreadWorker workers; + PipelineInfo current_info{}; + GraphicsPipeline* current_pipeline{}; + tsl::robin_map, Common::IdentityHash> + graphics_pipelines; + + std::array descriptor_set_providers; + std::array update_data{}; + std::array bound_descriptor_sets{}; + std::array offsets{}; + std::bitset set_dirty{}; + + std::array shader_hashes; + std::array current_shaders; + std::unordered_map programmable_vertex_map; + std::unordered_map programmable_vertex_cache; + std::unordered_map fixed_geometry_shaders; + std::unordered_map fragment_shaders; + Shader trivial_vertex_shader; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_present_window.cpp b/src/video_core/renderer_vulkan/vk_present_window.cpp new file mode 100644 index 000000000..785600d6e --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_present_window.cpp @@ -0,0 +1,514 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/microprofile.h" +#include "common/settings.h" +#include "common/thread.h" +#include "core/frontend/emu_window.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_platform.h" +#include "video_core/renderer_vulkan/vk_present_window.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" + +#include + +MICROPROFILE_DEFINE(Vulkan_WaitPresent, "Vulkan", "Wait For Present", MP_RGB(128, 128, 128)); + +namespace Vulkan { + +namespace { + +bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, vk::Format format) { + const vk::FormatProperties props{physical_device.getFormatProperties(format)}; + return static_cast(props.optimalTilingFeatures & vk::FormatFeatureFlagBits::eBlitDst); +} + +[[nodiscard]] vk::ImageSubresourceLayers MakeImageSubresourceLayers() { + return vk::ImageSubresourceLayers{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }; +} + +[[nodiscard]] vk::ImageBlit MakeImageBlit(s32 frame_width, s32 frame_height, s32 swapchain_width, + s32 swapchain_height) { + return vk::ImageBlit{ + .srcSubresource = MakeImageSubresourceLayers(), + .srcOffsets = + std::array{ + vk::Offset3D{ + .x = 0, + .y = 0, + .z = 0, + }, + vk::Offset3D{ + .x = frame_width, + .y = frame_height, + .z = 1, + }, + }, + .dstSubresource = MakeImageSubresourceLayers(), + .dstOffsets = + std::array{ + vk::Offset3D{ + .x = 0, + .y = 0, + .z = 0, + }, + vk::Offset3D{ + .x = swapchain_width, + .y = swapchain_height, + .z = 1, + }, + }, + }; +} + +[[nodiscard]] vk::ImageCopy MakeImageCopy(u32 frame_width, u32 frame_height, u32 swapchain_width, + u32 swapchain_height) { + return vk::ImageCopy{ + .srcSubresource = MakeImageSubresourceLayers(), + .srcOffset = + vk::Offset3D{ + .x = 0, + .y = 0, + .z = 0, + }, + .dstSubresource = MakeImageSubresourceLayers(), + .dstOffset = + vk::Offset3D{ + .x = 0, + .y = 0, + .z = 0, + }, + .extent = + vk::Extent3D{ + .width = std::min(frame_width, swapchain_width), + .height = std::min(frame_height, swapchain_height), + .depth = 1, + }, + }; +} + +} // Anonymous namespace + +PresentWindow::PresentWindow(Frontend::EmuWindow& emu_window_, const Instance& instance_, + Scheduler& scheduler_) + : emu_window{emu_window_}, instance{instance_}, scheduler{scheduler_}, + surface{CreateSurface(instance.GetInstance(), emu_window)}, + swapchain{instance, emu_window.GetFramebufferLayout().width, + emu_window.GetFramebufferLayout().height, surface}, + graphics_queue{instance.GetGraphicsQueue()}, present_renderpass{CreateRenderpass()}, + vsync_enabled{Settings::values.use_vsync_new.GetValue()}, + blit_supported{ + CanBlitToSwapchain(instance.GetPhysicalDevice(), swapchain.GetSurfaceFormat().format)}, + use_present_thread{Settings::values.async_presentation.GetValue()}, + last_render_surface{emu_window.GetWindowInfo().render_surface} { + + const vk::Device device = instance.GetDevice(); + const vk::CommandPoolCreateInfo pool_info = { + .flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer | + vk::CommandPoolCreateFlagBits::eTransient, + .queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(), + }; + command_pool = device.createCommandPool(pool_info); + + const vk::CommandBufferAllocateInfo alloc_info = { + .commandPool = command_pool, + .level = vk::CommandBufferLevel::ePrimary, + .commandBufferCount = SWAP_CHAIN_SIZE, + }; + const std::vector command_buffers = device.allocateCommandBuffers(alloc_info); + + for (u32 i = 0; i < SWAP_CHAIN_SIZE; i++) { + Frame& frame = swap_chain[i]; + frame.cmdbuf = command_buffers[i]; + frame.render_ready = device.createSemaphore({}); + frame.present_done = device.createFence({.flags = vk::FenceCreateFlagBits::eSignaled}); + free_queue.push(&frame); + } + + if (use_present_thread) { + present_thread = std::jthread([this](std::stop_token token) { PresentThread(token); }); + } +} + +PresentWindow::~PresentWindow() { + scheduler.Finish(); + const vk::Device device = instance.GetDevice(); + device.destroyCommandPool(command_pool); + device.destroyRenderPass(present_renderpass); + for (auto& frame : swap_chain) { + device.destroyImageView(frame.image_view); + device.destroyFramebuffer(frame.framebuffer); + device.destroySemaphore(frame.render_ready); + device.destroyFence(frame.present_done); + vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation); + } +} + +void PresentWindow::RecreateFrame(Frame* frame, u32 width, u32 height) { + vk::Device device = instance.GetDevice(); + if (frame->framebuffer) { + device.destroyFramebuffer(frame->framebuffer); + } + if (frame->image_view) { + device.destroyImageView(frame->image_view); + } + if (frame->image) { + vmaDestroyImage(instance.GetAllocator(), frame->image, frame->allocation); + } + + const vk::Format format = swapchain.GetSurfaceFormat().format; + const vk::ImageCreateInfo image_info = { + .imageType = vk::ImageType::e2D, + .format = format, + .extent = {width, height, 1}, + .mipLevels = 1, + .arrayLayers = 1, + .samples = vk::SampleCountFlagBits::e1, + .usage = vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eTransferSrc, + }; + + const VmaAllocationCreateInfo alloc_info = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + .requiredFlags = 0, + .preferredFlags = 0, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + }; + + VkImage unsafe_image{}; + VkImageCreateInfo unsafe_image_info = static_cast(image_info); + + VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info, + &unsafe_image, &frame->allocation, nullptr); + if (result != VK_SUCCESS) [[unlikely]] { + LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result); + UNREACHABLE(); + } + frame->image = vk::Image{unsafe_image}; + + const vk::ImageViewCreateInfo view_info = { + .image = frame->image, + .viewType = vk::ImageViewType::e2D, + .format = format, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + frame->image_view = device.createImageView(view_info); + + const vk::FramebufferCreateInfo framebuffer_info = { + .renderPass = present_renderpass, + .attachmentCount = 1, + .pAttachments = &frame->image_view, + .width = width, + .height = height, + .layers = 1, + }; + frame->framebuffer = instance.GetDevice().createFramebuffer(framebuffer_info); + + frame->width = width; + frame->height = height; +} + +Frame* PresentWindow::GetRenderFrame() { + MICROPROFILE_SCOPE(Vulkan_WaitPresent); + + // Wait for free presentation frames + std::unique_lock lock{free_mutex}; + free_cv.wait(lock, [this] { return !free_queue.empty(); }); + + // Take the frame from the queue + Frame* frame = free_queue.front(); + free_queue.pop(); + + vk::Device device = instance.GetDevice(); + vk::Result result{}; + + const auto wait = [&]() { + result = device.waitForFences(frame->present_done, false, std::numeric_limits::max()); + return result; + }; + + // Wait for the presentation to be finished so all frame resources are free + while (wait() != vk::Result::eSuccess) { + // Retry if the waiting times out + if (result == vk::Result::eTimeout) { + continue; + } + + // eErrorInitializationFailed occurs on Mali GPU drivers due to them + // using the ppoll() syscall which isn't correctly restarted after a signal, + // we need to manually retry waiting in that case + if (result == vk::Result::eErrorInitializationFailed) { + continue; + } + } + + device.resetFences(frame->present_done); + return frame; +} + +void PresentWindow::Present(Frame* frame) { + if (!use_present_thread) { + scheduler.WaitWorker(); + CopyToSwapchain(frame); + free_queue.push(frame); + return; + } + + scheduler.Record([this, frame](vk::CommandBuffer) { + std::unique_lock lock{queue_mutex}; + present_queue.push(frame); + frame_cv.notify_one(); + }); +} + +void PresentWindow::WaitPresent() { + if (!use_present_thread) { + return; + } + + // Wait for the present queue to be empty + { + std::unique_lock queue_lock{queue_mutex}; + frame_cv.wait(queue_lock, [this] { return present_queue.empty(); }); + } + + // The above condition will be satisfied when the last frame is taken from the queue. + // To ensure that frame has been presented as well take hold of the swapchain + // mutex. + std::scoped_lock swapchain_lock{swapchain_mutex}; +} + +void PresentWindow::PresentThread(std::stop_token token) { + Common::SetCurrentThreadName("VulkanPresent"); + while (!token.stop_requested()) { + std::unique_lock lock{queue_mutex}; + + // Wait for presentation frames + Common::CondvarWait(frame_cv, lock, token, [this] { return !present_queue.empty(); }); + if (token.stop_requested()) { + return; + } + + // Take the frame and notify anyone waiting + Frame* frame = present_queue.front(); + present_queue.pop(); + frame_cv.notify_one(); + + // By exchanging the lock ownership we take the swapchain lock + // before the queue lock goes out of scope. This way the swapchain + // lock in WaitPresent is guaranteed to occur after here. + std::exchange(lock, std::unique_lock{swapchain_mutex}); + + CopyToSwapchain(frame); + + // Free the frame for reuse + std::scoped_lock fl{free_mutex}; + free_queue.push(frame); + free_cv.notify_one(); + } +} + +void PresentWindow::NotifySurfaceChanged() { +#ifdef ANDROID + std::scoped_lock lock{recreate_surface_mutex}; + recreate_surface_cv.notify_one(); +#endif +} + +void PresentWindow::CopyToSwapchain(Frame* frame) { + const auto recreate_swapchain = [&] { swapchain.Create(frame->width, frame->height, surface); }; + +#ifdef ANDROID + std::unique_lock lock{recreate_surface_mutex}; + + recreate_surface_cv.wait_for(lock, std::chrono::milliseconds(400), [&]() { + return last_render_surface == emu_window.GetWindowInfo().render_surface; + }); + + // If the frontend recreated the surface, recreate the renderer surface and swapchain. + void* const render_surface = emu_window.GetWindowInfo().render_surface; + if (last_render_surface != render_surface) { + last_render_surface = render_surface; + surface = CreateSurface(instance.GetInstance(), emu_window); + recreate_swapchain(); + } +#else + const bool use_vsync = Settings::values.use_vsync_new.GetValue(); + const bool size_changed = + swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height; + const bool vsync_changed = vsync_enabled != use_vsync; + if (vsync_changed || size_changed) [[unlikely]] { + vsync_enabled = use_vsync; + recreate_swapchain(); + } +#endif + + while (!swapchain.AcquireNextImage()) { + recreate_swapchain(); + } + + const vk::Image swapchain_image = swapchain.Image(); + + const vk::CommandBufferBeginInfo begin_info = { + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit, + }; + const vk::CommandBuffer cmdbuf = frame->cmdbuf; + cmdbuf.begin(begin_info); + + const vk::Extent2D extent = swapchain.GetExtent(); + const std::array pre_barriers{ + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = swapchain_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = frame->image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + const vk::ImageMemoryBarrier post_barrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::ePresentSrcKHR, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = swapchain_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, + {}, {}, pre_barriers); + + if (blit_supported) { + cmdbuf.blitImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image, + vk::ImageLayout::eTransferDstOptimal, + MakeImageBlit(frame->width, frame->height, extent.width, extent.height), + vk::Filter::eLinear); + } else { + cmdbuf.copyImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image, + vk::ImageLayout::eTransferDstOptimal, + MakeImageCopy(frame->width, frame->height, extent.width, extent.height)); + } + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + + cmdbuf.end(); + + static constexpr std::array wait_stage_masks = { + vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eAllGraphics, + }; + + const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore(); + const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore(); + const std::array wait_semaphores = {image_acquired, frame->render_ready}; + + vk::SubmitInfo submit_info = { + .waitSemaphoreCount = static_cast(wait_semaphores.size()), + .pWaitSemaphores = wait_semaphores.data(), + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = 1u, + .pCommandBuffers = &cmdbuf, + .signalSemaphoreCount = 1, + .pSignalSemaphores = &present_ready, + }; + + std::scoped_lock submit_lock{scheduler.submit_mutex}; + + try { + graphics_queue.submit(submit_info, frame->present_done); + } catch (vk::DeviceLostError& err) { + LOG_CRITICAL(Render_Vulkan, "Device lost during present submit: {}", err.what()); + UNREACHABLE(); + } + + swapchain.Present(); +} + +vk::RenderPass PresentWindow::CreateRenderpass() { + const vk::AttachmentReference color_ref = { + .attachment = 0, + .layout = vk::ImageLayout::eGeneral, + }; + + const vk::SubpassDescription subpass = { + .pipelineBindPoint = vk::PipelineBindPoint::eGraphics, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = 1u, + .pColorAttachments = &color_ref, + .pResolveAttachments = 0, + .pDepthStencilAttachment = nullptr, + }; + + const vk::AttachmentDescription color_attachment = { + .format = swapchain.GetSurfaceFormat().format, + .loadOp = vk::AttachmentLoadOp::eClear, + .storeOp = vk::AttachmentStoreOp::eStore, + .stencilLoadOp = vk::AttachmentLoadOp::eDontCare, + .stencilStoreOp = vk::AttachmentStoreOp::eDontCare, + .initialLayout = vk::ImageLayout::eUndefined, + .finalLayout = vk::ImageLayout::eTransferSrcOptimal, + }; + + const vk::RenderPassCreateInfo renderpass_info = { + .attachmentCount = 1, + .pAttachments = &color_attachment, + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 0, + .pDependencies = nullptr, + }; + + return instance.GetDevice().createRenderPass(renderpass_info); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_present_window.h b/src/video_core/renderer_vulkan/vk_present_window.h new file mode 100644 index 000000000..d559d1eab --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_present_window.h @@ -0,0 +1,101 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include "common/polyfill_thread.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" + +VK_DEFINE_HANDLE(VmaAllocation) + +namespace Frontend { +class EmuWindow; +} + +namespace Vulkan { + +class Instance; +class Swapchain; +class Scheduler; +class RenderpassCache; + +struct Frame { + u32 width; + u32 height; + VmaAllocation allocation; + vk::Framebuffer framebuffer; + vk::Image image; + vk::ImageView image_view; + vk::Semaphore render_ready; + vk::Fence present_done; + vk::CommandBuffer cmdbuf; +}; + +class PresentWindow final { + static constexpr std::size_t SWAP_CHAIN_SIZE = 6; + +public: + explicit PresentWindow(Frontend::EmuWindow& emu_window, const Instance& instance, + Scheduler& scheduler); + ~PresentWindow(); + + /// Waits for all queued frames to finish presenting. + void WaitPresent(); + + /// Returns the last used render frame. + Frame* GetRenderFrame(); + + /// Recreates the render frame to match provided parameters. + void RecreateFrame(Frame* frame, u32 width, u32 height); + + /// Queues the provided frame for presentation. + void Present(Frame* frame); + + /// This is called to notify the rendering backend of a surface change + void NotifySurfaceChanged(); + + [[nodiscard]] vk::RenderPass Renderpass() const noexcept { + return present_renderpass; + } + + u32 ImageCount() const noexcept { + return swapchain.GetImageCount(); + } + +private: + void PresentThread(std::stop_token token); + + void CopyToSwapchain(Frame* frame); + + vk::RenderPass CreateRenderpass(); + +private: + Frontend::EmuWindow& emu_window; + const Instance& instance; + Scheduler& scheduler; + vk::SurfaceKHR surface; + Swapchain swapchain; + vk::CommandPool command_pool; + vk::Queue graphics_queue; + vk::RenderPass present_renderpass; + std::array swap_chain{}; + std::queue free_queue; + std::queue present_queue; + std::condition_variable free_cv; + std::condition_variable recreate_surface_cv; + std::condition_variable_any frame_cv; + std::mutex swapchain_mutex; + std::mutex recreate_surface_mutex; + std::mutex queue_mutex; + std::mutex free_mutex; + std::jthread present_thread; + bool vsync_enabled{}; + bool blit_supported; + bool use_present_thread{true}; + void* last_render_surface{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp new file mode 100644 index 000000000..5d83448c7 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -0,0 +1,1138 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/alignment.h" +#include "common/logging/log.h" +#include "common/math_util.h" +#include "common/microprofile.h" +#include "common/settings.h" +#include "video_core/pica_state.h" +#include "video_core/regs_framebuffer.h" +#include "video_core/regs_pipeline.h" +#include "video_core/regs_rasterizer.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/texture/texture_decode.h" + +namespace Vulkan { + +namespace { + +MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128)); +MICROPROFILE_DEFINE(Vulkan_GS, "Vulkan", "Geometry Shader Setup", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Drawing", MP_RGB(128, 128, 192)); + +using TriangleTopology = Pica::PipelineRegs::TriangleTopology; +using VideoCore::SurfaceType; + +constexpr u64 STREAM_BUFFER_SIZE = 64 * 1024 * 1024; +constexpr u64 UNIFORM_BUFFER_SIZE = 4 * 1024 * 1024; +constexpr u64 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024; + +constexpr vk::BufferUsageFlags BUFFER_USAGE = + vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer; + +struct DrawParams { + u32 vertex_count; + s32 vertex_offset; + u32 binding_count; + std::array bindings; + bool is_indexed; +}; + +[[nodiscard]] u64 TextureBufferSize(const Instance& instance) { + // Use the smallest texel size from the texel views + // which corresponds to eR32G32Sfloat + const u64 max_size = instance.MaxTexelBufferElements() * 8; + return std::min(max_size, TEXTURE_BUFFER_SIZE); +} + +} // Anonymous namespace + +RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, + VideoCore::CustomTexManager& custom_tex_manager, + VideoCore::RendererBase& renderer, + Frontend::EmuWindow& emu_window, const Instance& instance, + Scheduler& scheduler, DescriptorPool& pool, + RenderpassCache& renderpass_cache, u32 image_count) + : RasterizerAccelerated{memory}, instance{instance}, scheduler{scheduler}, + renderpass_cache{renderpass_cache}, pipeline_cache{instance, scheduler, renderpass_cache, + pool}, + runtime{instance, scheduler, renderpass_cache, pool, pipeline_cache.TextureProvider(), + image_count}, + res_cache{memory, custom_tex_manager, runtime, regs, renderer}, + stream_buffer{instance, scheduler, BUFFER_USAGE, STREAM_BUFFER_SIZE}, + uniform_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformBuffer, + UNIFORM_BUFFER_SIZE}, + texture_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformTexelBuffer, + TextureBufferSize(instance)}, + texture_lf_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformTexelBuffer, + TextureBufferSize(instance)}, + async_shaders{Settings::values.async_shader_compilation.GetValue()} { + + vertex_buffers.fill(stream_buffer.Handle()); + + uniform_buffer_alignment = instance.UniformMinAlignment(); + uniform_size_aligned_vs = + Common::AlignUp(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment); + uniform_size_aligned_fs = + Common::AlignUp(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment); + + // Define vertex layout for software shaders + MakeSoftwareVertexLayout(); + pipeline_info.vertex_layout = software_layout; + + const vk::Device device = instance.GetDevice(); + texture_lf_view = device.createBufferViewUnique({ + .buffer = texture_lf_buffer.Handle(), + .format = vk::Format::eR32G32Sfloat, + .offset = 0, + .range = VK_WHOLE_SIZE, + }); + texture_rg_view = device.createBufferViewUnique({ + .buffer = texture_buffer.Handle(), + .format = vk::Format::eR32G32Sfloat, + .offset = 0, + .range = VK_WHOLE_SIZE, + }); + texture_rgba_view = device.createBufferViewUnique({ + .buffer = texture_buffer.Handle(), + .format = vk::Format::eR32G32B32A32Sfloat, + .offset = 0, + .range = VK_WHOLE_SIZE, + }); + + // Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize + // all descriptor sets even the ones we don't use. + pipeline_cache.BindBuffer(0, uniform_buffer.Handle(), 0, sizeof(Pica::Shader::VSUniformData)); + pipeline_cache.BindBuffer(1, uniform_buffer.Handle(), 0, sizeof(Pica::Shader::UniformData)); + pipeline_cache.BindTexelBuffer(2, *texture_lf_view); + pipeline_cache.BindTexelBuffer(3, *texture_rg_view); + pipeline_cache.BindTexelBuffer(4, *texture_rgba_view); + + Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); + Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID); + for (u32 i = 0; i < 4; i++) { + pipeline_cache.BindTexture(i, null_surface.ImageView(), null_sampler.Handle()); + } + + for (u32 i = 0; i < 7; i++) { + pipeline_cache.BindStorageImage(i, null_surface.StorageView()); + } + + SyncEntireState(); +} + +RasterizerVulkan::~RasterizerVulkan() = default; + +void RasterizerVulkan::TickFrame() { + res_cache.TickFrame(); +} + +void RasterizerVulkan::LoadDiskResources(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + pipeline_cache.LoadDiskCache(); +} + +void RasterizerVulkan::SyncFixedState() { + SyncClipEnabled(); + SyncCullMode(); + SyncBlendEnabled(); + SyncBlendFuncs(); + SyncBlendColor(); + SyncLogicOp(); + SyncStencilTest(); + SyncDepthTest(); + SyncColorWriteMask(); + SyncStencilWriteMask(); + SyncDepthWriteMask(); +} + +void RasterizerVulkan::SetupVertexArray() { + const auto [vs_input_index_min, vs_input_index_max, vs_input_size] = vertex_info; + auto [array_ptr, array_offset, invalidate] = stream_buffer.Map(vs_input_size, 16); + + /** + * The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU + * how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base + * address containing the vertex array data. The data for each attribute loader (i) can be found + * by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought + * as something analogous to Vulkan bindings. The user can store attributes in separate loaders + * or interleave them in the same loader. + **/ + const auto& vertex_attributes = regs.pipeline.vertex_attributes; + PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); // GPUREG_ATTR_BUF_BASE + + const u32 stride_alignment = instance.GetMinVertexStrideAlignment(); + + VertexLayout& layout = pipeline_info.vertex_layout; + layout.attribute_count = 0; + layout.binding_count = 0; + enable_attributes.fill(false); + + u32 buffer_offset = 0; + for (const auto& loader : vertex_attributes.attribute_loaders) { + if (loader.component_count == 0 || loader.byte_count == 0) { + continue; + } + + // Analyze the attribute loader by checking which attributes it provides + u32 offset = 0; + for (u32 comp = 0; comp < loader.component_count && comp < 12; comp++) { + u32 attribute_index = loader.GetComponent(comp); + if (attribute_index < 12) { + if (u32 size = vertex_attributes.GetNumElements(attribute_index); size != 0) { + offset = Common::AlignUp( + offset, vertex_attributes.GetElementSizeInBytes(attribute_index)); + + const u32 input_reg = regs.vs.GetRegisterForAttribute(attribute_index); + const Pica::PipelineRegs::VertexAttributeFormat format = + vertex_attributes.GetFormat(attribute_index); + + VertexAttribute& attribute = layout.attributes[layout.attribute_count++]; + attribute.binding.Assign(layout.binding_count); + attribute.location.Assign(input_reg); + attribute.offset.Assign(offset); + attribute.type.Assign(format); + attribute.size.Assign(size); + + enable_attributes[input_reg] = true; + offset += vertex_attributes.GetStride(attribute_index); + } + } else { + // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings + // respectively + offset = Common::AlignUp(offset, 4); + offset += (attribute_index - 11) * 4; + } + } + + const PAddr data_addr = + base_address + loader.data_offset + (vs_input_index_min * loader.byte_count); + const u32 vertex_num = vs_input_index_max - vs_input_index_min + 1; + u32 data_size = loader.byte_count * vertex_num; + res_cache.FlushRegion(data_addr, data_size); + + const MemoryRef src_ref = memory.GetPhysicalRef(data_addr); + if (src_ref.GetSize() < data_size) { + LOG_ERROR(Render_Vulkan, + "Vertex buffer size {} exceeds available space {} at address {:#016X}", + data_size, src_ref.GetSize(), data_addr); + } + + const u8* src_ptr = src_ref.GetPtr(); + u8* dst_ptr = array_ptr + buffer_offset; + + // Align stride up if required by Vulkan implementation. + const u32 aligned_stride = + Common::AlignUp(static_cast(loader.byte_count), stride_alignment); + if (aligned_stride == loader.byte_count) { + std::memcpy(dst_ptr, src_ptr, data_size); + } else { + for (size_t vertex = 0; vertex < vertex_num; vertex++) { + std::memcpy(dst_ptr + vertex * aligned_stride, src_ptr + vertex * loader.byte_count, + loader.byte_count); + } + } + + // Create the binding associated with this loader + VertexBinding& binding = layout.bindings[layout.binding_count]; + binding.binding.Assign(layout.binding_count); + binding.fixed.Assign(0); + binding.stride.Assign(aligned_stride); + + // Keep track of the binding offsets so we can bind the vertex buffer later + binding_offsets[layout.binding_count++] = static_cast(array_offset + buffer_offset); + buffer_offset += Common::AlignUp(aligned_stride * vertex_num, 4); + } + + stream_buffer.Commit(buffer_offset); + + // Assign the rest of the attributes to the last binding + SetupFixedAttribs(); +} + +void RasterizerVulkan::SetupFixedAttribs() { + const auto& vertex_attributes = regs.pipeline.vertex_attributes; + VertexLayout& layout = pipeline_info.vertex_layout; + + auto [fixed_ptr, fixed_offset, _] = stream_buffer.Map(16 * sizeof(Common::Vec4f), 0); + binding_offsets[layout.binding_count] = static_cast(fixed_offset); + + // Reserve the last binding for fixed and default attributes + // Place the default attrib at offset zero for easy access + static const Common::Vec4f default_attrib{0.f, 0.f, 0.f, 1.f}; + std::memcpy(fixed_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f)); + + // Find all fixed attributes and assign them to the last binding + u32 offset = sizeof(Common::Vec4f); + for (std::size_t i = 0; i < 16; i++) { + if (vertex_attributes.IsDefaultAttribute(i)) { + const u32 reg = regs.vs.GetRegisterForAttribute(i); + if (!enable_attributes[reg]) { + const auto& attr = Pica::g_state.input_default_attributes.attr[i]; + const std::array data = {attr.x.ToFloat32(), attr.y.ToFloat32(), attr.z.ToFloat32(), + attr.w.ToFloat32()}; + + const u32 data_size = sizeof(float) * static_cast(data.size()); + std::memcpy(fixed_ptr + offset, data.data(), data_size); + + VertexAttribute& attribute = layout.attributes[layout.attribute_count++]; + attribute.binding.Assign(layout.binding_count); + attribute.location.Assign(reg); + attribute.offset.Assign(offset); + attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT); + attribute.size.Assign(4); + + offset += data_size; + enable_attributes[reg] = true; + } + } + } + + // Loop one more time to find unused attributes and assign them to the default one + // If the attribute is just disabled, shove the default attribute to avoid + // errors if the shader ever decides to use it. + for (u32 i = 0; i < 16; i++) { + if (!enable_attributes[i]) { + VertexAttribute& attribute = layout.attributes[layout.attribute_count++]; + attribute.binding.Assign(layout.binding_count); + attribute.location.Assign(i); + attribute.offset.Assign(0); + attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT); + attribute.size.Assign(4); + } + } + + // Define the fixed+default binding + VertexBinding& binding = layout.bindings[layout.binding_count]; + binding.binding.Assign(layout.binding_count++); + binding.fixed.Assign(1); + binding.stride.Assign(offset); + + stream_buffer.Commit(offset); +} + +bool RasterizerVulkan::SetupVertexShader() { + MICROPROFILE_SCOPE(Vulkan_VS); + return pipeline_cache.UseProgrammableVertexShader(regs, Pica::g_state.vs, + pipeline_info.vertex_layout); +} + +bool RasterizerVulkan::SetupGeometryShader() { + MICROPROFILE_SCOPE(Vulkan_GS); + + if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { + LOG_ERROR(Render_Vulkan, "Accelerate draw doesn't support geometry shader"); + return false; + } + + return pipeline_cache.UseFixedGeometryShader(regs); +} + +bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) { + if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { + if (regs.pipeline.gs_config.mode != Pica::PipelineRegs::GSMode::Point) { + return false; + } + if (regs.pipeline.triangle_topology != Pica::PipelineRegs::TriangleTopology::Shader) { + return false; + } + } + + pipeline_info.rasterization.topology.Assign(regs.pipeline.triangle_topology); + if (regs.pipeline.triangle_topology == TriangleTopology::Fan && + !instance.IsTriangleFanSupported()) { + LOG_DEBUG(Render_Vulkan, + "Skipping accelerated draw with unsupported triangle fan topology"); + return false; + } + + // Vertex data setup might involve scheduler flushes so perform it + // early to avoid invalidating our state in the middle of the draw. + vertex_info = AnalyzeVertexArray(is_indexed, instance.GetMinVertexStrideAlignment()); + SetupVertexArray(); + + if (!SetupVertexShader()) { + return false; + } + if (!SetupGeometryShader()) { + return false; + } + + return Draw(true, is_indexed); +} + +bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) { + if (is_indexed) { + SetupIndexArray(); + } + + const bool wait_built = !async_shaders || regs.pipeline.num_vertices <= 4; + if (!pipeline_cache.BindPipeline(pipeline_info, wait_built)) { + return true; + } + + const DrawParams params = { + .vertex_count = regs.pipeline.num_vertices, + .vertex_offset = -static_cast(vertex_info.vs_input_index_min), + .binding_count = pipeline_info.vertex_layout.binding_count, + .bindings = binding_offsets, + .is_indexed = is_indexed, + }; + + scheduler.Record([this, params](vk::CommandBuffer cmdbuf) { + std::array offsets; + std::transform(params.bindings.begin(), params.bindings.end(), offsets.begin(), + [](u32 offset) { return static_cast(offset); }); + cmdbuf.bindVertexBuffers(0, params.binding_count, vertex_buffers.data(), offsets.data()); + if (params.is_indexed) { + cmdbuf.drawIndexed(params.vertex_count, 1, 0, params.vertex_offset, 0); + } else { + cmdbuf.draw(params.vertex_count, 1, 0, 0); + } + }); + + return true; +} + +void RasterizerVulkan::SetupIndexArray() { + const bool index_u8 = regs.pipeline.index_array.format == 0; + const bool native_u8 = index_u8 && instance.IsIndexTypeUint8Supported(); + const u32 index_buffer_size = regs.pipeline.num_vertices * (native_u8 ? 1 : 2); + const vk::IndexType index_type = native_u8 ? vk::IndexType::eUint8EXT : vk::IndexType::eUint16; + + const u8* index_data = + memory.GetPhysicalPointer(regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() + + regs.pipeline.index_array.offset); + + auto [index_ptr, index_offset, _] = stream_buffer.Map(index_buffer_size, 2); + + if (index_u8 && !native_u8) { + u16* index_ptr_u16 = reinterpret_cast(index_ptr); + for (u32 i = 0; i < regs.pipeline.num_vertices; i++) { + index_ptr_u16[i] = index_data[i]; + } + } else { + std::memcpy(index_ptr, index_data, index_buffer_size); + } + + stream_buffer.Commit(index_buffer_size); + + scheduler.Record( + [this, index_offset = index_offset, index_type = index_type](vk::CommandBuffer cmdbuf) { + cmdbuf.bindIndexBuffer(stream_buffer.Handle(), index_offset, index_type); + }); +} + +void RasterizerVulkan::DrawTriangles() { + if (vertex_batch.empty()) { + return; + } + + pipeline_info.rasterization.topology.Assign(Pica::PipelineRegs::TriangleTopology::List); + pipeline_info.vertex_layout = software_layout; + + pipeline_cache.UseTrivialVertexShader(); + pipeline_cache.UseTrivialGeometryShader(); + + Draw(false, false); +} + +bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { + MICROPROFILE_SCOPE(Vulkan_Drawing); + + const bool shadow_rendering = regs.framebuffer.IsShadowRendering(); + const bool has_stencil = regs.framebuffer.HasStencil(); + + const bool write_color_fb = shadow_rendering || pipeline_info.blending.color_write_mask; + const bool write_depth_fb = pipeline_info.IsDepthWriteEnabled(); + const bool using_color_fb = + regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb; + const bool using_depth_fb = + !shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && + (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 || + (has_stencil && pipeline_info.depth_stencil.stencil_test_enable)); + + const auto fb_helper = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb); + const Framebuffer* framebuffer = fb_helper.Framebuffer(); + if (!framebuffer->Handle()) { + return true; + } + + pipeline_info.attachments.color = framebuffer->Format(SurfaceType::Color); + pipeline_info.attachments.depth = framebuffer->Format(SurfaceType::Depth); + + if (shadow_rendering) { + pipeline_cache.BindStorageImage(6, framebuffer->ImageView(SurfaceType::Color)); + } + + // Update scissor uniforms + const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = fb_helper.Scissor(); + if (uniform_block_data.data.scissor_x1 != scissor_x1 || + uniform_block_data.data.scissor_x2 != scissor_x2 || + uniform_block_data.data.scissor_y1 != scissor_y1 || + uniform_block_data.data.scissor_y2 != scissor_y2) { + + uniform_block_data.data.scissor_x1 = scissor_x1; + uniform_block_data.data.scissor_x2 = scissor_x2; + uniform_block_data.data.scissor_y1 = scissor_y1; + uniform_block_data.data.scissor_y2 = scissor_y2; + uniform_block_data.dirty = true; + } + + // Sync and bind the texture surfaces + SyncTextureUnits(framebuffer); + + // Sync and bind the shader + if (shader_dirty) { + pipeline_cache.UseFragmentShader(regs); + shader_dirty = false; + } + + // Sync the LUTs within the texture buffer + SyncAndUploadLUTs(); + SyncAndUploadLUTsLF(); + UploadUniforms(accelerate); + + // Configure viewport and scissor + const auto draw_rect = fb_helper.DrawRect(); + const auto viewport = fb_helper.Viewport(); + scheduler.Record([viewport, draw_rect](vk::CommandBuffer cmdbuf) { + const vk::Viewport vk_viewport = { + .x = static_cast(viewport.x), + .y = static_cast(viewport.y), + .width = static_cast(viewport.width), + .height = static_cast(viewport.height), + .minDepth = 0.f, + .maxDepth = 1.f, + }; + + const vk::Rect2D scissor = { + .offset{ + .x = static_cast(draw_rect.left), + .y = static_cast(draw_rect.bottom), + }, + .extent{ + .width = draw_rect.GetWidth(), + .height = draw_rect.GetHeight(), + }, + }; + + cmdbuf.setViewport(0, vk_viewport); + cmdbuf.setScissor(0, scissor); + }); + + // Begin rendering + renderpass_cache.BeginRendering(framebuffer, draw_rect); + + // Draw the vertex batch + bool succeeded = true; + if (accelerate) { + succeeded = AccelerateDrawBatchInternal(is_indexed); + } else { + pipeline_cache.BindPipeline(pipeline_info, true); + + const u64 vertex_size = vertex_batch.size() * sizeof(HardwareVertex); + const u32 vertex_count = static_cast(vertex_batch.size()); + const auto [buffer, offset, _] = stream_buffer.Map(vertex_size, sizeof(HardwareVertex)); + + std::memcpy(buffer, vertex_batch.data(), vertex_size); + stream_buffer.Commit(vertex_size); + + scheduler.Record([this, offset = offset, vertex_count](vk::CommandBuffer cmdbuf) { + cmdbuf.bindVertexBuffers(0, stream_buffer.Handle(), offset); + cmdbuf.draw(vertex_count, 1, 0, 0); + }); + } + + vertex_batch.clear(); + return succeeded; +} + +void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) { + using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; + + const auto pica_textures = regs.texturing.GetTextures(); + for (u32 texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { + const auto& texture = pica_textures[texture_index]; + + // If the texture unit is disabled bind a null surface to it + if (!texture.enabled) { + const Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); + const Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID); + pipeline_cache.BindTexture(texture_index, null_surface.ImageView(), + null_sampler.Handle()); + continue; + } + + // Handle special tex0 configurations + if (texture_index == 0) { + switch (texture.config.type.Value()) { + case TextureType::Shadow2D: { + Surface& surface = res_cache.GetTextureSurface(texture); + surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap; + pipeline_cache.BindStorageImage(0, surface.StorageView()); + continue; + } + case TextureType::ShadowCube: { + BindShadowCube(texture); + continue; + } + case TextureType::TextureCube: { + BindTextureCube(texture); + continue; + } + default: + UnbindSpecial(); + break; + } + } + + // Bind the texture provided by the rasterizer cache + Surface& surface = res_cache.GetTextureSurface(texture); + Sampler& sampler = res_cache.GetSampler(texture.config); + if (!IsFeedbackLoop(texture_index, framebuffer, surface, sampler)) { + pipeline_cache.BindTexture(texture_index, surface.ImageView(), sampler.Handle()); + } + } +} + +void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture) { + using CubeFace = Pica::TexturingRegs::CubeFace; + auto info = Pica::Texture::TextureInfo::FromPicaRegister(texture.config, texture.format); + constexpr std::array faces = { + CubeFace::PositiveX, CubeFace::NegativeX, CubeFace::PositiveY, + CubeFace::NegativeY, CubeFace::PositiveZ, CubeFace::NegativeZ, + }; + + for (CubeFace face : faces) { + const u32 binding = static_cast(face); + info.physical_address = regs.texturing.GetCubePhysicalAddress(face); + + const VideoCore::SurfaceId surface_id = res_cache.GetTextureSurface(info); + Surface& surface = res_cache.GetSurface(surface_id); + surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap; + pipeline_cache.BindStorageImage(binding, surface.StorageView()); + } +} + +void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture) { + using CubeFace = Pica::TexturingRegs::CubeFace; + const VideoCore::TextureCubeConfig config = { + .px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX), + .nx = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX), + .py = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY), + .ny = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY), + .pz = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ), + .nz = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ), + .width = texture.config.width, + .levels = texture.config.lod.max_level + 1, + .format = texture.format, + }; + + Surface& surface = res_cache.GetTextureCube(config); + Sampler& sampler = res_cache.GetSampler(texture.config); + pipeline_cache.BindTexture(3, surface.ImageView(), sampler.Handle()); +} + +bool RasterizerVulkan::IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer, + Surface& surface, Sampler& sampler) { + const vk::ImageView color_view = framebuffer->ImageView(SurfaceType::Color); + const bool is_feedback_loop = color_view == surface.ImageView(); + if (!is_feedback_loop) { + return false; + } + + // Make a temporary copy of the framebuffer to sample from + pipeline_cache.BindTexture(texture_index, surface.CopyImageView(), sampler.Handle()); + return true; +} + +void RasterizerVulkan::UnbindSpecial() { + const Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); + const Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID); + pipeline_cache.BindTexture(3, null_surface.ImageView(), null_sampler.Handle()); + for (u32 i = 0; i < 7; i++) { + pipeline_cache.BindStorageImage(i, null_surface.ImageView()); + } +} + +void RasterizerVulkan::NotifyFixedFunctionPicaRegisterChanged(u32 id) { + switch (id) { + // Clipping plane + case PICA_REG_INDEX(rasterizer.clip_enable): + SyncClipEnabled(); + break; + + // Culling + case PICA_REG_INDEX(rasterizer.cull_mode): + SyncCullMode(); + break; + + // Blending + case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable): + SyncBlendEnabled(); + // Update since logic op emulation depends on alpha blend enable. + SyncLogicOp(); + SyncColorWriteMask(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending): + SyncBlendFuncs(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.blend_const): + SyncBlendColor(); + break; + + // Sync VK stencil test + stencil write mask + // (Pica stencil test function register also contains a stencil write mask) + case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_func): + SyncStencilTest(); + SyncStencilWriteMask(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_op): + case PICA_REG_INDEX(framebuffer.framebuffer.depth_format): + SyncStencilTest(); + break; + + // Sync VK depth test + depth and color write mask + // (Pica depth test function register also contains a depth and color write mask) + case PICA_REG_INDEX(framebuffer.output_merger.depth_test_enable): + SyncDepthTest(); + SyncDepthWriteMask(); + SyncColorWriteMask(); + break; + + // Sync VK depth and stencil write mask + // (This is a dedicated combined depth / stencil write-enable register) + case PICA_REG_INDEX(framebuffer.framebuffer.allow_depth_stencil_write): + SyncDepthWriteMask(); + SyncStencilWriteMask(); + break; + + // Sync VK color write mask + // (This is a dedicated color write-enable register) + case PICA_REG_INDEX(framebuffer.framebuffer.allow_color_write): + SyncColorWriteMask(); + break; + + // Logic op + case PICA_REG_INDEX(framebuffer.output_merger.logic_op): + SyncLogicOp(); + // Update since color write mask is used to emulate no-op. + SyncColorWriteMask(); + break; + } +} + +void RasterizerVulkan::FlushAll() { + res_cache.FlushAll(); +} + +void RasterizerVulkan::FlushRegion(PAddr addr, u32 size) { + res_cache.FlushRegion(addr, size); +} + +void RasterizerVulkan::InvalidateRegion(PAddr addr, u32 size) { + res_cache.InvalidateRegion(addr, size); +} + +void RasterizerVulkan::FlushAndInvalidateRegion(PAddr addr, u32 size) { + res_cache.FlushRegion(addr, size); + res_cache.InvalidateRegion(addr, size); +} + +void RasterizerVulkan::ClearAll(bool flush) { + res_cache.ClearAll(flush); +} + +bool RasterizerVulkan::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { + return res_cache.AccelerateDisplayTransfer(config); +} + +bool RasterizerVulkan::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { + return res_cache.AccelerateTextureCopy(config); +} + +bool RasterizerVulkan::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { + return res_cache.AccelerateFill(config); +} + +bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, + PAddr framebuffer_addr, u32 pixel_stride, + ScreenInfo& screen_info) { + if (framebuffer_addr == 0) [[unlikely]] { + return false; + } + + VideoCore::SurfaceParams src_params; + src_params.addr = framebuffer_addr; + src_params.width = std::min(config.width.Value(), pixel_stride); + src_params.height = config.height; + src_params.stride = pixel_stride; + src_params.is_tiled = false; + src_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.color_format); + src_params.UpdateParams(); + + const auto [src_surface_id, src_rect] = + res_cache.GetSurfaceSubRect(src_params, VideoCore::ScaleMatch::Ignore, true); + + if (!src_surface_id) { + return false; + } + + const Surface& src_surface = res_cache.GetSurface(src_surface_id); + const u32 scaled_width = src_surface.GetScaledWidth(); + const u32 scaled_height = src_surface.GetScaledHeight(); + + screen_info.texcoords = Common::Rectangle( + (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, + (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); + + screen_info.image_view = src_surface.ImageView(); + + return true; +} + +void RasterizerVulkan::MakeSoftwareVertexLayout() { + constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3}; + + software_layout = VertexLayout{ + .binding_count = 1, + .attribute_count = 8, + }; + + for (u32 i = 0; i < software_layout.binding_count; i++) { + VertexBinding& binding = software_layout.bindings[i]; + binding.binding.Assign(i); + binding.fixed.Assign(0); + binding.stride.Assign(sizeof(HardwareVertex)); + } + + u32 offset = 0; + for (u32 i = 0; i < 8; i++) { + VertexAttribute& attribute = software_layout.attributes[i]; + attribute.binding.Assign(0); + attribute.location.Assign(i); + attribute.offset.Assign(offset); + attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT); + attribute.size.Assign(sizes[i]); + offset += sizes[i] * sizeof(float); + } +} + +void RasterizerVulkan::SyncClipEnabled() { + bool clip_enabled = regs.rasterizer.clip_enable != 0; + if (clip_enabled != uniform_block_data.data.enable_clip1) { + uniform_block_data.data.enable_clip1 = clip_enabled; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncCullMode() { + pipeline_info.rasterization.cull_mode.Assign(regs.rasterizer.cull_mode); +} + +void RasterizerVulkan::SyncBlendEnabled() { + pipeline_info.blending.blend_enable = regs.framebuffer.output_merger.alphablend_enable; +} + +void RasterizerVulkan::SyncBlendFuncs() { + pipeline_info.blending.color_blend_eq.Assign( + regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb); + pipeline_info.blending.alpha_blend_eq.Assign( + regs.framebuffer.output_merger.alpha_blending.blend_equation_a); + pipeline_info.blending.src_color_blend_factor.Assign( + regs.framebuffer.output_merger.alpha_blending.factor_source_rgb); + pipeline_info.blending.dst_color_blend_factor.Assign( + regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb); + pipeline_info.blending.src_alpha_blend_factor.Assign( + regs.framebuffer.output_merger.alpha_blending.factor_source_a); + pipeline_info.blending.dst_alpha_blend_factor.Assign( + regs.framebuffer.output_merger.alpha_blending.factor_dest_a); +} + +void RasterizerVulkan::SyncBlendColor() { + pipeline_info.dynamic.blend_color = regs.framebuffer.output_merger.blend_const.raw; +} + +void RasterizerVulkan::SyncLogicOp() { + if (instance.NeedsLogicOpEmulation()) { + // We need this in the fragment shader to emulate logic operations + shader_dirty = true; + } + + pipeline_info.blending.logic_op = regs.framebuffer.output_merger.logic_op; + + const bool is_logic_op_emulated = + instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable; + const bool is_logic_op_noop = + regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp; + if (is_logic_op_emulated && is_logic_op_noop) { + // Color output is disabled by logic operation. We use color write mask to skip + // color but allow depth write. + pipeline_info.blending.color_write_mask = 0; + } +} + +void RasterizerVulkan::SyncColorWriteMask() { + const u32 color_mask = regs.framebuffer.framebuffer.allow_color_write != 0 + ? (regs.framebuffer.output_merger.depth_color_mask >> 8) & 0xF + : 0; + + const bool is_logic_op_emulated = + instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable; + const bool is_logic_op_noop = + regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp; + if (is_logic_op_emulated && is_logic_op_noop) { + // Color output is disabled by logic operation. We use color write mask to skip + // color but allow depth write. Return early to avoid overwriting this. + return; + } + + pipeline_info.blending.color_write_mask = color_mask; +} + +void RasterizerVulkan::SyncStencilWriteMask() { + pipeline_info.dynamic.stencil_write_mask = + (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) + ? static_cast(regs.framebuffer.output_merger.stencil_test.write_mask) + : 0; +} + +void RasterizerVulkan::SyncDepthWriteMask() { + const bool write_enable = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && + regs.framebuffer.output_merger.depth_write_enable); + pipeline_info.depth_stencil.depth_write_enable.Assign(write_enable); +} + +void RasterizerVulkan::SyncStencilTest() { + const auto& stencil_test = regs.framebuffer.output_merger.stencil_test; + const bool test_enable = stencil_test.enable && regs.framebuffer.framebuffer.depth_format == + Pica::FramebufferRegs::DepthFormat::D24S8; + + pipeline_info.depth_stencil.stencil_test_enable.Assign(test_enable); + pipeline_info.depth_stencil.stencil_fail_op.Assign(stencil_test.action_stencil_fail); + pipeline_info.depth_stencil.stencil_pass_op.Assign(stencil_test.action_depth_pass); + pipeline_info.depth_stencil.stencil_depth_fail_op.Assign(stencil_test.action_depth_fail); + pipeline_info.depth_stencil.stencil_compare_op.Assign(stencil_test.func); + pipeline_info.dynamic.stencil_reference = stencil_test.reference_value; + pipeline_info.dynamic.stencil_compare_mask = stencil_test.input_mask; +} + +void RasterizerVulkan::SyncDepthTest() { + const bool test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 || + regs.framebuffer.output_merger.depth_write_enable == 1; + const auto compare_op = regs.framebuffer.output_merger.depth_test_enable == 1 + ? regs.framebuffer.output_merger.depth_test_func.Value() + : Pica::FramebufferRegs::CompareFunc::Always; + + pipeline_info.depth_stencil.depth_test_enable.Assign(test_enabled); + pipeline_info.depth_stencil.depth_compare_op.Assign(compare_op); +} + +void RasterizerVulkan::SyncAndUploadLUTsLF() { + constexpr std::size_t max_size = + sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler + + sizeof(Common::Vec2f) * 128; // fog + + if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty) { + return; + } + + std::size_t bytes_used = 0; + auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f)); + + // Sync the lighting luts + if (uniform_block_data.lighting_lut_dirty_any || invalidate) { + for (unsigned index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) { + if (uniform_block_data.lighting_lut_dirty[index] || invalidate) { + std::array new_data; + const auto& source_lut = Pica::g_state.lighting.luts[index]; + std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), + [](const auto& entry) { + return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lighting_lut_data[index] || invalidate) { + lighting_lut_data[index] = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec2f)); + uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = + static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec2f); + } + uniform_block_data.lighting_lut_dirty[index] = false; + } + } + uniform_block_data.lighting_lut_dirty_any = false; + } + + // Sync the fog lut + if (uniform_block_data.fog_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), + [](const auto& entry) { + return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != fog_lut_data || invalidate) { + fog_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec2f)); + uniform_block_data.data.fog_lut_offset = + static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec2f); + } + uniform_block_data.fog_lut_dirty = false; + } + + texture_lf_buffer.Commit(static_cast(bytes_used)); +} + +void RasterizerVulkan::SyncAndUploadLUTs() { + const auto& proctex = Pica::g_state.proctex; + constexpr std::size_t max_size = + sizeof(Common::Vec2f) * 128 * 3 + // proctex: noise + color + alpha + sizeof(Common::Vec4f) * 256 + // proctex + sizeof(Common::Vec4f) * 256; // proctex diff + + if (!uniform_block_data.proctex_noise_lut_dirty && + !uniform_block_data.proctex_color_map_dirty && + !uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty && + !uniform_block_data.proctex_diff_lut_dirty) { + return; + } + + std::size_t bytes_used = 0; + auto [buffer, offset, invalidate] = texture_buffer.Map(max_size, sizeof(Common::Vec4f)); + + // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap + auto sync_proctex_value_lut = + [this, buffer = buffer, offset = offset, invalidate = invalidate, + &bytes_used](const std::array& lut, + std::array& lut_data, int& lut_offset) { + std::array new_data; + std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { + return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lut_data || invalidate) { + lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec2f)); + lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec2f); + } + }; + + // Sync the proctex noise lut + if (uniform_block_data.proctex_noise_lut_dirty || invalidate) { + sync_proctex_value_lut(proctex.noise_table, proctex_noise_lut_data, + uniform_block_data.data.proctex_noise_lut_offset); + uniform_block_data.proctex_noise_lut_dirty = false; + } + + // Sync the proctex color map + if (uniform_block_data.proctex_color_map_dirty || invalidate) { + sync_proctex_value_lut(proctex.color_map_table, proctex_color_map_data, + uniform_block_data.data.proctex_color_map_offset); + uniform_block_data.proctex_color_map_dirty = false; + } + + // Sync the proctex alpha map + if (uniform_block_data.proctex_alpha_map_dirty || invalidate) { + sync_proctex_value_lut(proctex.alpha_map_table, proctex_alpha_map_data, + uniform_block_data.data.proctex_alpha_map_offset); + uniform_block_data.proctex_alpha_map_dirty = false; + } + + // Sync the proctex lut + if (uniform_block_data.proctex_lut_dirty || invalidate) { + std::array new_data; + + std::transform(proctex.color_table.begin(), proctex.color_table.end(), new_data.begin(), + [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_lut_data || invalidate) { + proctex_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec4f)); + uniform_block_data.data.proctex_lut_offset = + static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec4f); + } + uniform_block_data.proctex_lut_dirty = false; + } + + // Sync the proctex difference lut + if (uniform_block_data.proctex_diff_lut_dirty || invalidate) { + std::array new_data; + + std::transform(proctex.color_diff_table.begin(), proctex.color_diff_table.end(), + new_data.begin(), [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_diff_lut_data || invalidate) { + proctex_diff_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec4f)); + uniform_block_data.data.proctex_diff_lut_offset = + static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec4f); + } + uniform_block_data.proctex_diff_lut_dirty = false; + } + + texture_buffer.Commit(static_cast(bytes_used)); +} + +void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { + const bool sync_vs = accelerate_draw; + const bool sync_fs = uniform_block_data.dirty; + + if (!sync_vs && !sync_fs) { + return; + } + + const u64 uniform_size = uniform_size_aligned_vs + uniform_size_aligned_fs; + auto [uniforms, offset, invalidate] = + uniform_buffer.Map(uniform_size, uniform_buffer_alignment); + + u32 used_bytes = 0; + if (sync_vs) { + Pica::Shader::VSUniformData vs_uniforms; + vs_uniforms.uniforms.SetFromRegs(regs.vs, Pica::g_state.vs); + std::memcpy(uniforms, &vs_uniforms, sizeof(vs_uniforms)); + + pipeline_cache.SetBufferOffset(0, offset); + used_bytes += static_cast(uniform_size_aligned_vs); + } + + if (sync_fs || invalidate) { + std::memcpy(uniforms + used_bytes, &uniform_block_data.data, + sizeof(Pica::Shader::UniformData)); + + pipeline_cache.SetBufferOffset(1, offset + used_bytes); + uniform_block_data.dirty = false; + used_bytes += static_cast(uniform_size_aligned_fs); + } + + uniform_buffer.Commit(used_bytes); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h new file mode 100644 index 000000000..8034412c0 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -0,0 +1,171 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/hw/gpu.h" +#include "video_core/rasterizer_accelerated.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" + +namespace Frontend { +class EmuWindow; +} + +namespace VideoCore { +class CustomTexManager; +class RendererBase; +} // namespace VideoCore + +namespace Vulkan { + +struct ScreenInfo; + +class Instance; +class Scheduler; +class RenderpassCache; +class DescriptorPool; + +class RasterizerVulkan : public VideoCore::RasterizerAccelerated { +public: + explicit RasterizerVulkan(Memory::MemorySystem& memory, + VideoCore::CustomTexManager& custom_tex_manager, + VideoCore::RendererBase& renderer, Frontend::EmuWindow& emu_window, + const Instance& instance, Scheduler& scheduler, DescriptorPool& pool, + RenderpassCache& renderpass_cache, u32 image_count); + ~RasterizerVulkan() override; + + void TickFrame(); + void LoadDiskResources(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) override; + + void DrawTriangles() override; + void FlushAll() override; + void FlushRegion(PAddr addr, u32 size) override; + void InvalidateRegion(PAddr addr, u32 size) override; + void FlushAndInvalidateRegion(PAddr addr, u32 size) override; + void ClearAll(bool flush) override; + bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; + bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; + bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; + bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, + u32 pixel_stride, ScreenInfo& screen_info); + bool AccelerateDrawBatch(bool is_indexed) override; + + void SyncFixedState() override; + +private: + void NotifyFixedFunctionPicaRegisterChanged(u32 id) override; + + /// Syncs the clip enabled status to match the PICA register + void SyncClipEnabled(); + + /// Syncs the cull mode to match the PICA register + void SyncCullMode(); + + /// Syncs the blend enabled status to match the PICA register + void SyncBlendEnabled(); + + /// Syncs the blend functions to match the PICA register + void SyncBlendFuncs(); + + /// Syncs the blend color to match the PICA register + void SyncBlendColor(); + + /// Syncs the logic op states to match the PICA register + void SyncLogicOp(); + + /// Syncs the color write mask to match the PICA register state + void SyncColorWriteMask(); + + /// Syncs the stencil write mask to match the PICA register state + void SyncStencilWriteMask(); + + /// Syncs the depth write mask to match the PICA register state + void SyncDepthWriteMask(); + + /// Syncs the stencil test states to match the PICA register + void SyncStencilTest(); + + /// Syncs the depth test states to match the PICA register + void SyncDepthTest(); + + /// Syncs and uploads the lighting, fog and proctex LUTs + void SyncAndUploadLUTs(); + void SyncAndUploadLUTsLF(); + + /// Syncs all enabled PICA texture units + void SyncTextureUnits(const Framebuffer* framebuffer); + + /// Binds the PICA shadow cube required for shadow mapping + void BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture); + + /// Binds a texture cube to texture unit 0 + void BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture); + + /// Makes a temporary copy of the framebuffer if a feedback loop is detected + bool IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer, Surface& surface, + Sampler& sampler); + + /// Unbinds all special texture unit 0 texture configurations + void UnbindSpecial(); + + /// Upload the uniform blocks to the uniform buffer object + void UploadUniforms(bool accelerate_draw); + + /// Generic draw function for DrawTriangles and AccelerateDrawBatch + bool Draw(bool accelerate, bool is_indexed); + + /// Internal implementation for AccelerateDrawBatch + bool AccelerateDrawBatchInternal(bool is_indexed); + + /// Setup index array for AccelerateDrawBatch + void SetupIndexArray(); + + /// Setup vertex array for AccelerateDrawBatch + void SetupVertexArray(); + + /// Setup the fixed attribute emulation in vulkan + void SetupFixedAttribs(); + + /// Setup vertex shader for AccelerateDrawBatch + bool SetupVertexShader(); + + /// Setup geometry shader for AccelerateDrawBatch + bool SetupGeometryShader(); + + /// Creates the vertex layout struct used for software shader pipelines + void MakeSoftwareVertexLayout(); + +private: + const Instance& instance; + Scheduler& scheduler; + RenderpassCache& renderpass_cache; + PipelineCache pipeline_cache; + TextureRuntime runtime; + RasterizerCache res_cache; + + VertexLayout software_layout; + std::array binding_offsets{}; + std::array enable_attributes{}; + std::array vertex_buffers; + VertexArrayInfo vertex_info; + PipelineInfo pipeline_info; + + StreamBuffer stream_buffer; ///< Vertex+Index buffer + StreamBuffer uniform_buffer; ///< Uniform buffer + StreamBuffer texture_buffer; ///< Texture buffer + StreamBuffer texture_lf_buffer; ///< Texture Light-Fog buffer + vk::UniqueBufferView texture_lf_view; + vk::UniqueBufferView texture_rg_view; + vk::UniqueBufferView texture_rgba_view; + u64 uniform_buffer_alignment; + u64 uniform_size_aligned_vs; + u64 uniform_size_aligned_fs; + bool async_shaders{false}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp b/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp new file mode 100644 index 000000000..21eb7ce6b --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp @@ -0,0 +1,10 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/rasterizer_cache/rasterizer_cache.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" + +namespace VideoCore { +template class RasterizerCache; +} // namespace VideoCore diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp new file mode 100644 index 000000000..845f1f279 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -0,0 +1,211 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include "common/assert.h" +#include "video_core/rasterizer_cache/pixel_format.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" + +namespace Vulkan { + +using VideoCore::PixelFormat; +using VideoCore::SurfaceType; + +RenderpassCache::RenderpassCache(const Instance& instance, Scheduler& scheduler) + : instance{instance}, scheduler{scheduler} {} + +RenderpassCache::~RenderpassCache() = default; + +void RenderpassCache::BeginRendering(const Framebuffer* framebuffer, + Common::Rectangle draw_rect) { + const vk::Rect2D render_area = { + .offset{ + .x = static_cast(draw_rect.left), + .y = static_cast(draw_rect.bottom), + }, + .extent{ + .width = draw_rect.GetWidth(), + .height = draw_rect.GetHeight(), + }, + }; + const RenderPass new_pass = { + .framebuffer = framebuffer->Handle(), + .render_pass = framebuffer->RenderPass(), + .render_area = render_area, + .clear = {}, + .do_clear = false, + }; + images = framebuffer->Images(); + aspects = framebuffer->Aspects(); + BeginRendering(new_pass); +} + +void RenderpassCache::BeginRendering(const RenderPass& new_pass) { + if (pass == new_pass) [[likely]] { + return; + } + + EndRendering(); + scheduler.Record([info = new_pass](vk::CommandBuffer cmdbuf) { + const vk::RenderPassBeginInfo renderpass_begin_info = { + .renderPass = info.render_pass, + .framebuffer = info.framebuffer, + .renderArea = info.render_area, + .clearValueCount = info.do_clear ? 1u : 0u, + .pClearValues = &info.clear, + }; + cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline); + }); + + pass = new_pass; +} + +void RenderpassCache::EndRendering() { + if (!pass.render_pass) { + return; + } + + pass.render_pass = vk::RenderPass{}; + scheduler.Record([images = images, aspects = aspects](vk::CommandBuffer cmdbuf) { + u32 num_barriers = 0; + vk::PipelineStageFlags pipeline_flags{}; + std::array barriers; + for (u32 i = 0; i < images.size(); i++) { + if (!images[i]) { + continue; + } + const bool is_color = static_cast(aspects[i] & vk::ImageAspectFlagBits::eColor); + if (is_color) { + pipeline_flags |= vk::PipelineStageFlagBits::eColorAttachmentOutput; + } else { + pipeline_flags |= vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests; + } + barriers[num_barriers++] = vk::ImageMemoryBarrier{ + .srcAccessMask = is_color ? vk::AccessFlagBits::eColorAttachmentWrite + : vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .dstAccessMask = + vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = images[i], + .subresourceRange{ + .aspectMask = aspects[i], + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + } + cmdbuf.endRenderPass(); + cmdbuf.pipelineBarrier(pipeline_flags, + vk::PipelineStageFlagBits::eFragmentShader | + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, 0, nullptr, 0, nullptr, + num_barriers, barriers.data()); + }); +} + +vk::RenderPass RenderpassCache::GetRenderpass(VideoCore::PixelFormat color, + VideoCore::PixelFormat depth, bool is_clear) { + std::scoped_lock lock{cache_mutex}; + + const u32 color_index = + color == VideoCore::PixelFormat::Invalid ? MAX_COLOR_FORMATS : static_cast(color); + const u32 depth_index = depth == VideoCore::PixelFormat::Invalid + ? MAX_DEPTH_FORMATS + : (static_cast(depth) - 14); + + ASSERT_MSG(color_index <= MAX_COLOR_FORMATS && depth_index <= MAX_DEPTH_FORMATS, + "Invalid color index {} and/or depth_index {}", color_index, depth_index); + + vk::UniqueRenderPass& renderpass = cached_renderpasses[color_index][depth_index][is_clear]; + if (!renderpass) { + const vk::Format color_format = instance.GetTraits(color).native; + const vk::Format depth_format = instance.GetTraits(depth).native; + const vk::AttachmentLoadOp load_op = + is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad; + renderpass = CreateRenderPass(color_format, depth_format, load_op); + } + + return *renderpass; +} + +vk::UniqueRenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format depth, + vk::AttachmentLoadOp load_op) const { + u32 attachment_count = 0; + std::array attachments; + + bool use_color = false; + vk::AttachmentReference color_attachment_ref{}; + bool use_depth = false; + vk::AttachmentReference depth_attachment_ref{}; + + if (color != vk::Format::eUndefined) { + attachments[attachment_count] = vk::AttachmentDescription{ + .format = color, + .loadOp = load_op, + .storeOp = vk::AttachmentStoreOp::eStore, + .stencilLoadOp = vk::AttachmentLoadOp::eDontCare, + .stencilStoreOp = vk::AttachmentStoreOp::eDontCare, + .initialLayout = vk::ImageLayout::eGeneral, + .finalLayout = vk::ImageLayout::eGeneral, + }; + + color_attachment_ref = vk::AttachmentReference{ + .attachment = attachment_count++, + .layout = vk::ImageLayout::eGeneral, + }; + + use_color = true; + } + + if (depth != vk::Format::eUndefined) { + attachments[attachment_count] = vk::AttachmentDescription{ + .format = depth, + .loadOp = load_op, + .storeOp = vk::AttachmentStoreOp::eStore, + .stencilLoadOp = load_op, + .stencilStoreOp = vk::AttachmentStoreOp::eStore, + .initialLayout = vk::ImageLayout::eGeneral, + .finalLayout = vk::ImageLayout::eGeneral, + }; + + depth_attachment_ref = vk::AttachmentReference{ + .attachment = attachment_count++, + .layout = vk::ImageLayout::eGeneral, + }; + + use_depth = true; + } + + const vk::SubpassDescription subpass = { + .pipelineBindPoint = vk::PipelineBindPoint::eGraphics, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = use_color ? 1u : 0u, + .pColorAttachments = &color_attachment_ref, + .pResolveAttachments = 0, + .pDepthStencilAttachment = use_depth ? &depth_attachment_ref : nullptr, + }; + + const vk::RenderPassCreateInfo renderpass_info = { + .attachmentCount = attachment_count, + .pAttachments = attachments.data(), + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 0, + .pDependencies = nullptr, + }; + + return instance.GetDevice().createRenderPassUnique(renderpass_info); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h new file mode 100644 index 000000000..820738cc4 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h @@ -0,0 +1,73 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/math_util.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace VideoCore { +enum class PixelFormat : u32; +} + +namespace Vulkan { + +class Instance; +class Scheduler; +class Framebuffer; + +struct RenderPass { + vk::Framebuffer framebuffer; + vk::RenderPass render_pass; + vk::Rect2D render_area; + vk::ClearValue clear; + bool do_clear; + + bool operator==(const RenderPass& other) const noexcept { + return std::tie(framebuffer, render_pass, render_area, do_clear) == + std::tie(other.framebuffer, other.render_pass, other.render_area, + other.do_clear) && + std::memcmp(&clear, &other.clear, sizeof(vk::ClearValue)) == 0; + } +}; + +class RenderpassCache { + static constexpr size_t MAX_COLOR_FORMATS = 5; + static constexpr size_t MAX_DEPTH_FORMATS = 4; + +public: + explicit RenderpassCache(const Instance& instance, Scheduler& scheduler); + ~RenderpassCache(); + + /// Begins a new renderpass with the provided framebuffer as render target. + void BeginRendering(const Framebuffer* framebuffer, Common::Rectangle draw_rect); + + /// Begins a new renderpass with the provided render state. + void BeginRendering(const RenderPass& new_pass); + + /// Exits from any currently active renderpass instance + void EndRendering(); + + /// Returns the renderpass associated with the color-depth format pair + vk::RenderPass GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth, + bool is_clear); + +private: + /// Creates a renderpass configured appropriately and stores it in cached_renderpasses + vk::UniqueRenderPass CreateRenderPass(vk::Format color, vk::Format depth, + vk::AttachmentLoadOp load_op) const; + +private: + const Instance& instance; + Scheduler& scheduler; + vk::UniqueRenderPass cached_renderpasses[MAX_COLOR_FORMATS + 1][MAX_DEPTH_FORMATS + 1][2]; + std::mutex cache_mutex; + std::array images; + std::array aspects; + RenderPass pass{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp new file mode 100644 index 000000000..02a5e22b7 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -0,0 +1,113 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" + +namespace Vulkan { + +ResourcePool::ResourcePool(MasterSemaphore* master_semaphore_, size_t grow_step_) + : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} + +std::size_t ResourcePool::CommitResource() { + // Refresh semaphore to query updated results + master_semaphore->Refresh(); + const u64 gpu_tick = master_semaphore->KnownGpuTick(); + const auto search = [this, gpu_tick](std::size_t begin, + std::size_t end) -> std::optional { + for (std::size_t iterator = begin; iterator < end; ++iterator) { + if (gpu_tick >= ticks[iterator]) { + ticks[iterator] = master_semaphore->CurrentTick(); + return iterator; + } + } + return std::nullopt; + }; + + // Try to find a free resource from the hinted position to the end. + std::optional found = search(hint_iterator, ticks.size()); + if (!found) { + // Search from beginning to the hinted position. + found = search(0, hint_iterator); + if (!found) { + // Both searches failed, the pool is full; handle it. + const std::size_t free_resource = ManageOverflow(); + + ticks[free_resource] = master_semaphore->CurrentTick(); + found = free_resource; + } + } + + // Free iterator is hinted to the resource after the one that's been commited. + hint_iterator = (*found + 1) % ticks.size(); + return *found; +} + +std::size_t ResourcePool::ManageOverflow() { + const std::size_t old_capacity = ticks.size(); + Grow(); + + // The last entry is guaranted to be free, since it's the first element of the freshly + // allocated resources. + return old_capacity; +} + +void ResourcePool::Grow() { + const size_t old_capacity = ticks.size(); + ticks.resize(old_capacity + grow_step); + Allocate(old_capacity, old_capacity + grow_step); +} + +constexpr size_t COMMAND_BUFFER_POOL_SIZE = 4; + +struct CommandPool::Pool { + vk::CommandPool handle; + std::array cmdbufs; +}; + +CommandPool::CommandPool(const Instance& instance, MasterSemaphore* master_semaphore) + : ResourcePool{master_semaphore, COMMAND_BUFFER_POOL_SIZE}, instance{instance} {} + +CommandPool::~CommandPool() { + vk::Device device = instance.GetDevice(); + for (Pool& pool : pools) { + device.destroyCommandPool(pool.handle); + } +} + +void CommandPool::Allocate(std::size_t begin, std::size_t end) { + // Command buffers are going to be commited, recorded, executed every single usage cycle. + // They are also going to be reseted when commited. + Pool& pool = pools.emplace_back(); + + const vk::CommandPoolCreateInfo pool_create_info = { + .flags = vk::CommandPoolCreateFlagBits::eTransient | + vk::CommandPoolCreateFlagBits::eResetCommandBuffer, + .queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(), + }; + + vk::Device device = instance.GetDevice(); + pool.handle = device.createCommandPool(pool_create_info); + + const vk::CommandBufferAllocateInfo buffer_alloc_info = { + .commandPool = pool.handle, + .level = vk::CommandBufferLevel::ePrimary, + .commandBufferCount = COMMAND_BUFFER_POOL_SIZE, + }; + + auto buffers = device.allocateCommandBuffers(buffer_alloc_info); + std::copy(buffers.begin(), buffers.end(), pool.cmdbufs.begin()); +} + +vk::CommandBuffer CommandPool::Commit() { + const std::size_t index = CommitResource(); + const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE; + const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE; + return pools[pool_index].cmdbufs[sub_index]; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h new file mode 100644 index 000000000..81fc549e7 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -0,0 +1,67 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; +class MasterSemaphore; + +/** + * Handles a pool of resources protected by fences. Manages resource overflow allocating more + * resources. + */ +class ResourcePool { +public: + explicit ResourcePool() = default; + explicit ResourcePool(MasterSemaphore* master_semaphore, std::size_t grow_step); + virtual ~ResourcePool() = default; + + ResourcePool& operator=(ResourcePool&&) noexcept = default; + ResourcePool(ResourcePool&&) noexcept = default; + + ResourcePool& operator=(const ResourcePool&) = default; + ResourcePool(const ResourcePool&) = default; + +protected: + std::size_t CommitResource(); + + /// Called when a chunk of resources have to be allocated. + virtual void Allocate(std::size_t begin, std::size_t end) = 0; + +private: + /// Manages pool overflow allocating new resources. + std::size_t ManageOverflow(); + + /// Allocates a new page of resources. + void Grow(); + +protected: + MasterSemaphore* master_semaphore{nullptr}; + std::size_t grow_step = 0; ///< Number of new resources created after an overflow + std::size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found + std::vector ticks; ///< Ticks for each resource +}; + +class CommandPool final : public ResourcePool { +public: + explicit CommandPool(const Instance& instance, MasterSemaphore* master_semaphore); + ~CommandPool() override; + + void Allocate(std::size_t begin, std::size_t end) override; + + vk::CommandBuffer Commit(); + +private: + struct Pool; + const Instance& instance; + std::vector pools; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp new file mode 100644 index 000000000..6a8f85735 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -0,0 +1,203 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "common/microprofile.h" +#include "common/settings.h" +#include "common/thread.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); +MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Submit Exectution", MP_RGB(255, 192, 255)); + +namespace Vulkan { + +namespace { + +std::unique_ptr MakeMasterSemaphore(const Instance& instance) { + if (instance.IsTimelineSemaphoreSupported()) { + return std::make_unique(instance); + } else { + return std::make_unique(instance); + } +} + +} // Anonymous namespace + +void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { + auto command = first; + while (command != nullptr) { + auto next = command->GetNext(); + command->Execute(cmdbuf); + command->~Command(); + command = next; + } + submit = false; + command_offset = 0; + first = nullptr; + last = nullptr; +} + +Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache) + : renderpass_cache{renderpass_cache}, master_semaphore{MakeMasterSemaphore(instance)}, + command_pool{instance, master_semaphore.get()}, use_worker_thread{ + !Settings::values.renderer_debug} { + AllocateWorkerCommandBuffers(); + if (use_worker_thread) { + AcquireNewChunk(); + worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); }); + } +} + +Scheduler::~Scheduler() = default; + +void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) { + // When flushing, we only send data to the worker thread; no waiting is necessary. + SubmitExecution(signal, wait); +} + +void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) { + // When finishing, we need to wait for the submission to have executed on the device. + const u64 presubmit_tick = CurrentTick(); + SubmitExecution(signal, wait); + Wait(presubmit_tick); +} + +void Scheduler::WaitWorker() { + if (!use_worker_thread) { + return; + } + + MICROPROFILE_SCOPE(Vulkan_WaitForWorker); + DispatchWork(); + + // Ensure the queue is drained. + { + std::unique_lock ql{queue_mutex}; + event_cv.wait(ql, [this] { return work_queue.empty(); }); + } + + // Now wait for execution to finish. + // This needs to be done in the same order as WorkerThread. + std::scoped_lock el{execution_mutex}; +} + +void Scheduler::Wait(u64 tick) { + if (tick >= master_semaphore->CurrentTick()) { + // Make sure we are not waiting for the current tick without signalling + Flush(); + } + master_semaphore->Wait(tick); +} + +void Scheduler::DispatchWork() { + if (!use_worker_thread || chunk->Empty()) { + return; + } + + { + std::scoped_lock ql{queue_mutex}; + work_queue.push(std::move(chunk)); + } + + event_cv.notify_all(); + AcquireNewChunk(); +} + +void Scheduler::WorkerThread(std::stop_token stop_token) { + Common::SetCurrentThreadName("VulkanWorker"); + + const auto TryPopQueue{[this](auto& work) -> bool { + if (work_queue.empty()) { + return false; + } + + work = std::move(work_queue.front()); + work_queue.pop(); + event_cv.notify_all(); + return true; + }}; + + while (!stop_token.stop_requested()) { + std::unique_ptr work; + + { + std::unique_lock lk{queue_mutex}; + + // Wait for work. + Common::CondvarWait(event_cv, lk, stop_token, [&] { return TryPopQueue(work); }); + + // If we've been asked to stop, we're done. + if (stop_token.stop_requested()) { + return; + } + + // Exchange lock ownership so that we take the execution lock before + // the queue lock goes out of scope. This allows us to force execution + // to complete in the next step. + std::exchange(lk, std::unique_lock{execution_mutex}); + + // Perform the work, tracking whether the chunk was a submission + // before executing. + const bool has_submit = work->HasSubmit(); + work->ExecuteAll(current_cmdbuf); + + // If the chunk was a submission, reallocate the command buffer. + if (has_submit) { + AllocateWorkerCommandBuffers(); + } + } + + { + std::scoped_lock rl{reserve_mutex}; + + // Recycle the chunk back to the reserve. + chunk_reserve.emplace_back(std::move(work)); + } + } +} + +void Scheduler::AllocateWorkerCommandBuffers() { + const vk::CommandBufferBeginInfo begin_info = { + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit, + }; + + current_cmdbuf = command_pool.Commit(); + current_cmdbuf.begin(begin_info); +} + +void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) { + state = StateFlags::AllDirty; + const u64 signal_value = master_semaphore->NextTick(); + + renderpass_cache.EndRendering(); + Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { + MICROPROFILE_SCOPE(Vulkan_Submit); + std::scoped_lock lock{submit_mutex}; + master_semaphore->SubmitWork(cmdbuf, wait_semaphore, signal_semaphore, signal_value); + }); + + if (!use_worker_thread) { + AllocateWorkerCommandBuffers(); + } else { + chunk->MarkSubmit(); + DispatchWork(); + } +} + +void Scheduler::AcquireNewChunk() { + std::scoped_lock lock{reserve_mutex}; + if (chunk_reserve.empty()) { + chunk = std::make_unique(); + return; + } + + chunk = std::move(chunk_reserve.back()); + chunk_reserve.pop_back(); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h new file mode 100644 index 000000000..faffd22e8 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -0,0 +1,210 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/alignment.h" +#include "common/common_funcs.h" +#include "common/logging/log.h" +#include "common/polyfill_thread.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" + +namespace Vulkan { + +enum class StateFlags { + AllDirty = 0, + Renderpass = 1 << 0, + Pipeline = 1 << 1, + DescriptorSets = 1 << 2 +}; + +DECLARE_ENUM_FLAG_OPERATORS(StateFlags) + +class Instance; +class RenderpassCache; + +/// The scheduler abstracts command buffer and fence management with an interface that's able to do +/// OpenGL-like operations on Vulkan command buffers. +class Scheduler { +public: + explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache); + ~Scheduler(); + + /// Sends the current execution context to the GPU. + void Flush(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr); + + /// Sends the current execution context to the GPU and waits for it to complete. + void Finish(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr); + + /// Waits for the worker thread to finish executing everything. After this function returns it's + /// safe to touch worker resources. + void WaitWorker(); + + /// Waits for the given tick to trigger on the GPU. + void Wait(u64 tick); + + /// Sends currently recorded work to the worker thread. + void DispatchWork(); + + /// Records the command to the current chunk. + template + void Record(T&& command) { + if (!use_worker_thread) { + command(current_cmdbuf); + return; + } + + if (chunk->Record(command)) { + return; + } + DispatchWork(); + (void)chunk->Record(command); + } + + /// Marks the provided state as non dirty + void MarkStateNonDirty(StateFlags flag) noexcept { + state |= flag; + } + + /// Marks the provided state as dirty + void MakeDirty(StateFlags flag) noexcept { + state &= ~flag; + } + + /// Returns true if the state is dirty + [[nodiscard]] bool IsStateDirty(StateFlags flag) const noexcept { + return False(state & flag); + } + + /// Returns the current command buffer tick. + [[nodiscard]] u64 CurrentTick() const noexcept { + return master_semaphore->CurrentTick(); + } + + /// Returns true when a tick has been triggered by the GPU. + [[nodiscard]] bool IsFree(u64 tick) const noexcept { + return master_semaphore->IsFree(tick); + } + + /// Returns the master timeline semaphore. + [[nodiscard]] MasterSemaphore* GetMasterSemaphore() noexcept { + return master_semaphore.get(); + } + + std::mutex submit_mutex; + +private: + class Command { + public: + virtual ~Command() = default; + + virtual void Execute(vk::CommandBuffer cmdbuf) const = 0; + + Command* GetNext() const { + return next; + } + + void SetNext(Command* next_) { + next = next_; + } + + private: + Command* next = nullptr; + }; + + template + class TypedCommand final : public Command { + public: + explicit TypedCommand(T&& command_) : command{std::move(command_)} {} + ~TypedCommand() override = default; + + TypedCommand(TypedCommand&&) = delete; + TypedCommand& operator=(TypedCommand&&) = delete; + + void Execute(vk::CommandBuffer cmdbuf) const override { + command(cmdbuf); + } + + private: + T command; + }; + + class CommandChunk final { + public: + void ExecuteAll(vk::CommandBuffer cmdbuf); + + template + bool Record(T& command) { + using FuncType = TypedCommand; + static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large"); + + recorded_counts++; + command_offset = Common::AlignUp(command_offset, alignof(FuncType)); + if (command_offset > sizeof(data) - sizeof(FuncType)) { + return false; + } + Command* const current_last = last; + last = new (data.data() + command_offset) FuncType(std::move(command)); + + if (current_last) { + current_last->SetNext(last); + } else { + first = last; + } + command_offset += sizeof(FuncType); + return true; + } + + void MarkSubmit() { + submit = true; + } + + bool Empty() const { + return recorded_counts == 0; + } + + bool HasSubmit() const { + return submit; + } + + private: + Command* first = nullptr; + Command* last = nullptr; + + std::size_t recorded_counts = 0; + std::size_t command_offset = 0; + bool submit = false; + alignas(std::max_align_t) std::array data{}; + }; + +private: + void WorkerThread(std::stop_token stop_token); + + void AllocateWorkerCommandBuffers(); + + void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore); + + void AcquireNewChunk(); + +private: + RenderpassCache& renderpass_cache; + std::unique_ptr master_semaphore; + CommandPool command_pool; + std::unique_ptr chunk; + std::queue> work_queue; + std::vector> chunk_reserve; + vk::CommandBuffer current_cmdbuf; + StateFlags state{}; + std::mutex execution_mutex; + std::mutex reserve_mutex; + std::mutex queue_mutex; + std::condition_variable_any event_cv; + std::jthread worker_thread; + bool use_worker_thread; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp index 049366b59..ca69efa1d 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -1511,10 +1511,11 @@ vec4 secondary_fragment_color = vec4(0.0); "gl_FragCoord.y < float(scissor_y2))) discard;\n"; } - // After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use - // default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then - // do our own transformation according to PICA specification. - out += "float z_over_w = 2.0 * gl_FragCoord.z - 1.0;\n" + // The PICA depth range is [-1, 0] while in Vulkan that range is [0, 1]. + // Thus in the vertex shader we flip the sign of the z component to place + // it in the correct range. Here we undo the transformation to get the original z_over_w, + // then do our own transformation according to PICA specification. + out += "float z_over_w = -gl_FragCoord.z;\n" "float depth = z_over_w * depth_scale + depth_offset;\n"; if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) { out += "depth /= gl_FragCoord.w;\n"; @@ -1661,8 +1662,7 @@ void main() { texcoord0_w = vert_texcoord0_w; normquat = vert_normquat; view = vert_view; - gl_Position = vert_position; - gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0; + gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); )"; if (use_clip_planes) { out += R"( @@ -1780,8 +1780,7 @@ layout (set = 0, binding = 0, std140) uniform vs_config { semantic(VSOutputAttributes::POSITION_Y) + ", " + semantic(VSOutputAttributes::POSITION_Z) + ", " + semantic(VSOutputAttributes::POSITION_W) + ");\n"; - out += " gl_Position = vtx_pos;\n"; - out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n"; + out += " gl_Position = vec4(vtx_pos.x, vtx_pos.y, -vtx_pos.z, vtx_pos.w);\n"; if (config.use_clip_planes) { out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0 out += " if (enable_clip1) {\n"; @@ -1867,8 +1866,7 @@ struct Vertex { semantic(VSOutputAttributes::POSITION_Y) + ", " + semantic(VSOutputAttributes::POSITION_Z) + ", " + semantic(VSOutputAttributes::POSITION_W) + ");\n"; - out += " gl_Position = vtx_pos;\n"; - out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n"; + out += " gl_Position = vec4(vtx_pos.x, vtx_pos.y, -vtx_pos.z, vtx_pos.w);\n"; if (use_clip_planes) { out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0 out += " if (enable_clip1) {\n"; diff --git a/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp b/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp index 332021944..9656a2db3 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp @@ -115,7 +115,7 @@ void FragmentModule::WriteDepth() { const Id input_pointer_id{TypePointer(spv::StorageClass::Input, f32_id)}; const Id gl_frag_coord_z{ OpLoad(f32_id, OpAccessChain(input_pointer_id, gl_frag_coord_id, ConstU32(2u)))}; - const Id z_over_w{OpFma(f32_id, ConstF32(2.f), gl_frag_coord_z, ConstF32(-1.f))}; + const Id z_over_w{OpFNegate(f32_id, gl_frag_coord_z)}; const Id depth_scale{GetShaderDataMember(f32_id, ConstS32(2))}; const Id depth_offset{GetShaderDataMember(f32_id, ConstS32(3))}; depth = OpFma(f32_id, z_over_w, depth_scale, depth_offset); diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index 2d84a76b9..ba5c5f867 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -160,7 +160,7 @@ bool InitializeCompiler() { vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device) { if (!InitializeCompiler()) { - return VK_NULL_HANDLE; + return {}; } EProfile profile = ECoreProfile; @@ -182,7 +182,7 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v includer)) [[unlikely]] { LOG_INFO(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog()); - return VK_NULL_HANDLE; + return {}; } // Even though there's only a single shader, we still need to link it to generate SPV @@ -191,7 +191,7 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v if (!program->link(messages)) { LOG_INFO(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(), program->getInfoDebugLog()); - return VK_NULL_HANDLE; + return {}; } glslang::TIntermediate* intermediate = program->getIntermediate(lang); @@ -227,7 +227,7 @@ vk::ShaderModule CompileSPV(std::span code, vk::Device device) { UNREACHABLE_MSG("{}", err.what()); } - return VK_NULL_HANDLE; + return {}; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp new file mode 100644 index 000000000..7fb464fb6 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -0,0 +1,201 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "common/alignment.h" +#include "common/assert.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" + +namespace Vulkan { + +namespace { + +std::string_view BufferTypeName(BufferType type) { + switch (type) { + case BufferType::Upload: + return "Upload"; + case BufferType::Download: + return "Download"; + case BufferType::Stream: + return "Stream"; + default: + return "Invalid"; + } +} + +vk::MemoryPropertyFlags MakePropertyFlags(BufferType type) { + switch (type) { + case BufferType::Upload: + return vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent; + case BufferType::Download: + return vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached; + case BufferType::Stream: + return vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent; + default: + UNREACHABLE_MSG("Unknown buffer type {}", type); + return vk::MemoryPropertyFlagBits::eHostVisible; + } +} + +/// Find a memory type with the passed requirements +std::optional FindMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, + vk::MemoryPropertyFlags wanted) { + for (u32 i = 0; i < properties.memoryTypeCount; ++i) { + const auto flags = properties.memoryTypes[i].propertyFlags; + if ((flags & wanted) == wanted) { + return i; + } + } + return std::nullopt; +} + +/// Get the preferred host visible memory type. +u32 GetMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, BufferType type) { + vk::MemoryPropertyFlags flags = MakePropertyFlags(type); + std::optional preferred_type = FindMemoryType(properties, flags); + + constexpr std::array remove_flags = { + vk::MemoryPropertyFlagBits::eHostCached, + vk::MemoryPropertyFlagBits::eHostCoherent, + }; + + for (u32 i = 0; i < remove_flags.size() && !preferred_type; i++) { + flags &= ~remove_flags[i]; + preferred_type = FindMemoryType(properties, flags); + } + ASSERT_MSG(preferred_type, "No suitable memory type found"); + return preferred_type.value(); +} + +constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; +constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; + +} // Anonymous namespace + +StreamBuffer::StreamBuffer(const Instance& instance_, Scheduler& scheduler_, + vk::BufferUsageFlags usage_, u64 size, BufferType type_) + : instance{instance_}, scheduler{scheduler_}, device{instance.GetDevice()}, + stream_buffer_size{size}, usage{usage_}, type{type_} { + CreateBuffers(size); + ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); + ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); +} + +StreamBuffer::~StreamBuffer() { + device.unmapMemory(memory); + device.destroyBuffer(buffer); + device.freeMemory(memory); +} + +std::tuple StreamBuffer::Map(u64 size, u64 alignment) { + ASSERT(size <= stream_buffer_size); + mapped_size = size; + + if (alignment > 0) { + offset = Common::AlignUp(offset, alignment); + } + + bool invalidate{false}; + if (offset + size > stream_buffer_size) { + // The buffer would overflow, save the amount of used watches and reset the state. + invalidate = true; + invalidation_mark = current_watch_cursor; + current_watch_cursor = 0; + offset = 0; + + // Swap watches and reset waiting cursors. + std::swap(previous_watches, current_watches); + wait_cursor = 0; + wait_bound = 0; + } + + const u64 mapped_upper_bound = offset + size; + WaitPendingOperations(mapped_upper_bound); + + return std::make_tuple(mapped + offset, offset, invalidate); +} + +void StreamBuffer::Commit(u64 size) { + ASSERT_MSG(size <= mapped_size, "Reserved size {} is too small compared to {}", mapped_size, + size); + + const vk::MappedMemoryRange range = { + .memory = memory, + .offset = offset, + .size = size, + }; + + if (!is_coherent && type == BufferType::Download) { + device.invalidateMappedMemoryRanges(range); + } else if (!is_coherent) { + device.flushMappedMemoryRanges(range); + } + + offset += size; + + if (current_watch_cursor + 1 >= current_watches.size()) { + // Ensure that there are enough watches. + ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK); + } + auto& watch = current_watches[current_watch_cursor++]; + watch.upper_bound = offset; + watch.tick = scheduler.CurrentTick(); +} + +void StreamBuffer::CreateBuffers(u64 prefered_size) { + const vk::Device device = instance.GetDevice(); + const auto memory_properties = instance.GetPhysicalDevice().getMemoryProperties(); + const u32 preferred_type = GetMemoryType(memory_properties, type); + const vk::MemoryType mem_type = memory_properties.memoryTypes[preferred_type]; + const u32 preferred_heap = mem_type.heapIndex; + is_coherent = + static_cast(mem_type.propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent); + + // Substract from the preferred heap size some bytes to avoid getting out of memory. + const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size; + // As per DXVK's example, using `heap_size / 2` + const VkDeviceSize allocable_size = heap_size / 2; + buffer = device.createBuffer({ + .size = std::min(prefered_size, allocable_size), + .usage = usage, + }); + + const auto requirements = device.getBufferMemoryRequirements(buffer); + stream_buffer_size = static_cast(requirements.size); + + LOG_INFO(Render_Vulkan, "Creating {} buffer with size {} KB with flags {}", + BufferTypeName(type), stream_buffer_size / 1024, + vk::to_string(mem_type.propertyFlags)); + + memory = device.allocateMemory({ + .allocationSize = requirements.size, + .memoryTypeIndex = preferred_type, + }); + + device.bindBufferMemory(buffer, memory, 0); + mapped = reinterpret_cast(device.mapMemory(memory, 0, VK_WHOLE_SIZE)); +} + +void StreamBuffer::ReserveWatches(std::vector& watches, std::size_t grow_size) { + watches.resize(watches.size() + grow_size); +} + +void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { + if (!invalidation_mark) { + return; + } + while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) { + auto& watch = previous_watches[wait_cursor]; + wait_bound = watch.upper_bound; + scheduler.Wait(watch.tick); + ++wait_cursor; + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h new file mode 100644 index 000000000..2b14c78a7 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -0,0 +1,86 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +enum class BufferType : u32 { + Upload = 0, + Download = 1, + Stream = 2, +}; + +class Instance; +class Scheduler; + +class StreamBuffer final { + static constexpr std::size_t MAX_BUFFER_VIEWS = 3; + +public: + explicit StreamBuffer(const Instance& instance, Scheduler& scheduler, + vk::BufferUsageFlags usage, u64 size, + BufferType type = BufferType::Stream); + ~StreamBuffer(); + + /** + * Reserves a region of memory from the stream buffer. + * @param size Size to reserve. + * @returns A pair of a raw memory pointer (with offset added), and the buffer offset + */ + std::tuple Map(u64 size, u64 alignment); + + /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. + void Commit(u64 size); + + vk::Buffer Handle() const noexcept { + return buffer; + } + +private: + struct Watch { + u64 tick{}; + u64 upper_bound{}; + }; + + /// Creates Vulkan buffer handles committing the required the required memory. + void CreateBuffers(u64 prefered_size); + + /// Increases the amount of watches available. + void ReserveWatches(std::vector& watches, std::size_t grow_size); + + void WaitPendingOperations(u64 requested_upper_bound); + +private: + const Instance& instance; ///< Vulkan instance. + Scheduler& scheduler; ///< Command scheduler. + + vk::Device device; + vk::Buffer buffer; ///< Mapped buffer. + vk::DeviceMemory memory; ///< Memory allocation. + u8* mapped{}; ///< Pointer to the mapped memory + u64 stream_buffer_size{}; ///< Stream buffer size. + vk::BufferUsageFlags usage{}; + BufferType type; + + u64 offset{}; ///< Buffer iterator. + u64 mapped_size{}; ///< Size reserved for the current copy. + bool is_coherent{}; ///< True if the buffer is coherent + + std::vector current_watches; ///< Watches recorded in the current iteration. + std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation. + std::optional invalidation_mark; ///< Number of watches used in the previous cycle. + + std::vector previous_watches; ///< Watches used in the previous iteration. + std::size_t wait_cursor{}; ///< Last watch being waited for completion. + u64 wait_bound{}; ///< Highest offset being watched for completion. +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp new file mode 100644 index 000000000..84dd1dcaf --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -0,0 +1,236 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "common/settings.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" + +MICROPROFILE_DEFINE(Vulkan_Acquire, "Vulkan", "Swapchain Acquire", MP_RGB(185, 66, 245)); +MICROPROFILE_DEFINE(Vulkan_Present, "Vulkan", "Swapchain Present", MP_RGB(66, 185, 245)); + +namespace Vulkan { + +Swapchain::Swapchain(const Instance& instance_, u32 width, u32 height, vk::SurfaceKHR surface_) + : instance{instance_}, surface{surface_} { + FindPresentFormat(); + SetPresentMode(); + Create(width, height, surface); +} + +Swapchain::~Swapchain() { + Destroy(); + instance.GetInstance().destroySurfaceKHR(surface); +} + +void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) { + width = width_; + height = height_; + surface = surface_; + needs_recreation = false; + + Destroy(); + + SetPresentMode(); + SetSurfaceProperties(); + + const std::array queue_family_indices = { + instance.GetGraphicsQueueFamilyIndex(), + instance.GetPresentQueueFamilyIndex(), + }; + + const bool exclusive = queue_family_indices[0] == queue_family_indices[1]; + const u32 queue_family_indices_count = exclusive ? 1u : 2u; + const vk::SharingMode sharing_mode = + exclusive ? vk::SharingMode::eExclusive : vk::SharingMode::eConcurrent; + const vk::SwapchainCreateInfoKHR swapchain_info = { + .surface = surface, + .minImageCount = image_count, + .imageFormat = surface_format.format, + .imageColorSpace = surface_format.colorSpace, + .imageExtent = extent, + .imageArrayLayers = 1, + .imageUsage = vk::ImageUsageFlagBits::eColorAttachment | + vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst, + .imageSharingMode = sharing_mode, + .queueFamilyIndexCount = queue_family_indices_count, + .pQueueFamilyIndices = queue_family_indices.data(), + .preTransform = transform, + .compositeAlpha = composite_alpha, + .presentMode = present_mode, + .clipped = true, + .oldSwapchain = nullptr, + }; + + try { + swapchain = instance.GetDevice().createSwapchainKHR(swapchain_info); + } catch (vk::SystemError& err) { + LOG_CRITICAL(Render_Vulkan, "{}", err.what()); + UNREACHABLE(); + } + + SetupImages(); + RefreshSemaphores(); +} + +bool Swapchain::AcquireNextImage() { + MICROPROFILE_SCOPE(Vulkan_Acquire); + vk::Device device = instance.GetDevice(); + vk::Result result = + device.acquireNextImageKHR(swapchain, std::numeric_limits::max(), + image_acquired[frame_index], VK_NULL_HANDLE, &image_index); + + switch (result) { + case vk::Result::eSuccess: + break; + case vk::Result::eSuboptimalKHR: + case vk::Result::eErrorOutOfDateKHR: + needs_recreation = true; + break; + default: + LOG_CRITICAL(Render_Vulkan, "Swapchain acquire returned unknown result {}", result); + UNREACHABLE(); + break; + } + + return !needs_recreation; +} + +void Swapchain::Present() { + if (needs_recreation) { + return; + } + + const vk::PresentInfoKHR present_info = { + .waitSemaphoreCount = 1, + .pWaitSemaphores = &present_ready[image_index], + .swapchainCount = 1, + .pSwapchains = &swapchain, + .pImageIndices = &image_index, + }; + + MICROPROFILE_SCOPE(Vulkan_Present); + try { + [[maybe_unused]] vk::Result result = instance.GetPresentQueue().presentKHR(present_info); + } catch (vk::OutOfDateKHRError&) { + needs_recreation = true; + } catch (const vk::SystemError& err) { + LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed {}", err.what()); + UNREACHABLE(); + } + + frame_index = (frame_index + 1) % image_count; +} + +void Swapchain::FindPresentFormat() { + const auto formats = instance.GetPhysicalDevice().getSurfaceFormatsKHR(surface); + + // If there is a single undefined surface format, the device doesn't care, so we'll just use + // RGBA. + if (formats[0].format == vk::Format::eUndefined) { + surface_format.format = vk::Format::eR8G8B8A8Unorm; + surface_format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear; + return; + } + + // Try to find a suitable format. + for (const vk::SurfaceFormatKHR& sformat : formats) { + vk::Format format = sformat.format; + if (format != vk::Format::eR8G8B8A8Unorm && format != vk::Format::eB8G8R8A8Unorm) { + continue; + } + + surface_format.format = format; + surface_format.colorSpace = sformat.colorSpace; + return; + } + + LOG_CRITICAL(Render_Vulkan, "Unable to find required swapchain format!"); + UNREACHABLE(); +} + +void Swapchain::SetPresentMode() { + present_mode = vk::PresentModeKHR::eFifo; + if (!Settings::values.use_vsync_new) { + const auto modes = instance.GetPhysicalDevice().getSurfacePresentModesKHR(surface); + const auto find_mode = [&modes](vk::PresentModeKHR requested) { + auto it = + std::find_if(modes.begin(), modes.end(), + [&requested](vk::PresentModeKHR mode) { return mode == requested; }); + + return it != modes.end(); + }; + + const bool has_mailbox = find_mode(vk::PresentModeKHR::eMailbox); + present_mode = has_mailbox ? vk::PresentModeKHR::eMailbox : vk::PresentModeKHR::eImmediate; + } +} + +void Swapchain::SetSurfaceProperties() { + const vk::SurfaceCapabilitiesKHR capabilities = + instance.GetPhysicalDevice().getSurfaceCapabilitiesKHR(surface); + + extent = capabilities.currentExtent; + if (capabilities.currentExtent.width == std::numeric_limits::max()) { + extent.width = std::max(capabilities.minImageExtent.width, + std::min(capabilities.maxImageExtent.width, width)); + extent.height = std::max(capabilities.minImageExtent.height, + std::min(capabilities.maxImageExtent.height, height)); + } + + // Select number of images in swap chain, we prefer one buffer in the background to work on + image_count = capabilities.minImageCount + 1; + if (capabilities.maxImageCount > 0) { + image_count = std::min(image_count, capabilities.maxImageCount); + } + + // Prefer identity transform if possible + transform = vk::SurfaceTransformFlagBitsKHR::eIdentity; + if (!(capabilities.supportedTransforms & transform)) { + transform = capabilities.currentTransform; + } + + // Opaque is not supported everywhere. + composite_alpha = vk::CompositeAlphaFlagBitsKHR::eOpaque; + if (!(capabilities.supportedCompositeAlpha & vk::CompositeAlphaFlagBitsKHR::eOpaque)) { + composite_alpha = vk::CompositeAlphaFlagBitsKHR::eInherit; + } +} + +void Swapchain::Destroy() { + vk::Device device = instance.GetDevice(); + if (swapchain) { + device.destroySwapchainKHR(swapchain); + } + for (u32 i = 0; i < image_count; i++) { + device.destroySemaphore(image_acquired[i]); + device.destroySemaphore(present_ready[i]); + } + image_acquired.clear(); + present_ready.clear(); +} + +void Swapchain::RefreshSemaphores() { + const vk::Device device = instance.GetDevice(); + image_acquired.resize(image_count); + present_ready.resize(image_count); + + for (vk::Semaphore& semaphore : image_acquired) { + semaphore = device.createSemaphore({}); + } + for (vk::Semaphore& semaphore : present_ready) { + semaphore = device.createSemaphore({}); + } +} + +void Swapchain::SetupImages() { + vk::Device device = instance.GetDevice(); + images = device.getSwapchainImagesKHR(swapchain); + image_count = static_cast(images.size()); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h new file mode 100644 index 000000000..c3f6c17d0 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -0,0 +1,110 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; +class Scheduler; + +class Swapchain { +public: + explicit Swapchain(const Instance& instance, u32 width, u32 height, vk::SurfaceKHR surface); + ~Swapchain(); + + /// Creates (or recreates) the swapchain with a given size. + void Create(u32 width, u32 height, vk::SurfaceKHR surface); + + /// Acquires the next image in the swapchain. + bool AcquireNextImage(); + + /// Presents the current image and move to the next one + void Present(); + + vk::SurfaceKHR GetSurface() const { + return surface; + } + + vk::Image Image() const { + return images[image_index]; + } + + vk::SurfaceFormatKHR GetSurfaceFormat() const { + return surface_format; + } + + vk::SwapchainKHR GetHandle() const { + return swapchain; + } + + u32 GetWidth() const { + return width; + } + + u32 GetHeight() const { + return height; + } + + u32 GetImageCount() const { + return image_count; + } + + vk::Extent2D GetExtent() const { + return extent; + } + + [[nodiscard]] vk::Semaphore GetImageAcquiredSemaphore() const { + return image_acquired[frame_index]; + } + + [[nodiscard]] vk::Semaphore GetPresentReadySemaphore() const { + return present_ready[image_index]; + } + +private: + /// Selects the best available swapchain image format + void FindPresentFormat(); + + /// Sets the best available present mode + void SetPresentMode(); + + /// Sets the surface properties according to device capabilities + void SetSurfaceProperties(); + + /// Destroys current swapchain resources + void Destroy(); + + /// Performs creation of image views and framebuffers from the swapchain images + void SetupImages(); + + /// Creates the image acquired and present ready semaphores + void RefreshSemaphores(); + +private: + const Instance& instance; + vk::SwapchainKHR swapchain{}; + vk::SurfaceKHR surface{}; + vk::SurfaceFormatKHR surface_format; + vk::PresentModeKHR present_mode; + vk::Extent2D extent; + vk::SurfaceTransformFlagBitsKHR transform; + vk::CompositeAlphaFlagBitsKHR composite_alpha; + std::vector images; + std::vector image_acquired; + std::vector present_ready; + u32 width = 0; + u32 height = 0; + u32 image_count = 0; + u32 image_index = 0; + u32 frame_index = 0; + bool needs_recreation = true; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp new file mode 100644 index 000000000..abc6aec87 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -0,0 +1,1570 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/microprofile.h" +#include "common/scope_exit.h" +#include "video_core/custom_textures/material.h" +#include "video_core/rasterizer_cache/texture_codec.h" +#include "video_core/rasterizer_cache/utils.h" +#include "video_core/renderer_vulkan/pica_to_vk.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" + +#include +#include + +// Ignore the -Wclass-memaccess warning on memcpy for non-trivially default constructible objects. +#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif + +MICROPROFILE_DEFINE(Vulkan_ImageAlloc, "Vulkan", "Texture Allocation", MP_RGB(192, 52, 235)); + +namespace Vulkan { + +namespace { + +using VideoCore::MapType; +using VideoCore::PixelFormat; +using VideoCore::SurfaceType; +using VideoCore::TextureType; + +struct RecordParams { + vk::ImageAspectFlags aspect; + vk::Filter filter; + vk::PipelineStageFlags pipeline_flags; + vk::AccessFlags src_access; + vk::AccessFlags dst_access; + vk::Image src_image; + vk::Image dst_image; +}; + +vk::Filter MakeFilter(VideoCore::PixelFormat pixel_format) { + switch (pixel_format) { + case VideoCore::PixelFormat::D16: + case VideoCore::PixelFormat::D24: + case VideoCore::PixelFormat::D24S8: + return vk::Filter::eNearest; + default: + return vk::Filter::eLinear; + } +} + +[[nodiscard]] vk::ClearValue MakeClearValue(VideoCore::ClearValue clear) { + static_assert(sizeof(VideoCore::ClearValue) == sizeof(vk::ClearValue)); + + vk::ClearValue value{}; + std::memcpy(&value, &clear, sizeof(vk::ClearValue)); + return value; +} + +[[nodiscard]] vk::ClearColorValue MakeClearColorValue(Common::Vec4f color) { + return vk::ClearColorValue{ + .float32 = std::array{color[0], color[1], color[2], color[3]}, + }; +} + +[[nodiscard]] vk::ClearDepthStencilValue MakeClearDepthStencilValue(VideoCore::ClearValue clear) { + return vk::ClearDepthStencilValue{ + .depth = clear.depth, + .stencil = clear.stencil, + }; +} + +u32 UnpackDepthStencil(const VideoCore::StagingData& data, vk::Format dest) { + u32 depth_offset = 0; + u32 stencil_offset = 4 * data.size / 5; + const auto& mapped = data.mapped; + + switch (dest) { + case vk::Format::eD24UnormS8Uint: { + for (; stencil_offset < data.size; depth_offset += 4) { + u8* ptr = mapped.data() + depth_offset; + const u32 d24s8 = VideoCore::MakeInt(ptr); + const u32 d24 = d24s8 >> 8; + mapped[stencil_offset] = d24s8 & 0xFF; + std::memcpy(ptr, &d24, 4); + stencil_offset++; + } + break; + } + case vk::Format::eD32SfloatS8Uint: { + for (; stencil_offset < data.size; depth_offset += 4) { + u8* ptr = mapped.data() + depth_offset; + const u32 d24s8 = VideoCore::MakeInt(ptr); + const float d32 = (d24s8 >> 8) / 16777215.f; + mapped[stencil_offset] = d24s8 & 0xFF; + std::memcpy(ptr, &d32, 4); + stencil_offset++; + } + break; + } + default: + LOG_ERROR(Render_Vulkan, "Unimplemented convertion for depth format {}", + vk::to_string(dest)); + UNREACHABLE(); + } + + ASSERT(depth_offset == 4 * data.size / 5); + return depth_offset; +} + +boost::container::small_vector MakeInitBarriers( + vk::ImageAspectFlags aspect, std::span images, size_t num_images) { + boost::container::small_vector barriers; + for (size_t i = 0; i < num_images; i++) { + barriers.push_back(vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eNone, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = images[i], + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }); + } + return barriers; +} + +Handle MakeHandle(const Instance* instance, u32 width, u32 height, u32 levels, TextureType type, + vk::Format format, vk::ImageUsageFlags usage, vk::ImageCreateFlags flags, + vk::ImageAspectFlags aspect, bool need_format_list, + std::string_view debug_name = {}) { + const u32 layers = type == TextureType::CubeMap ? 6 : 1; + + const std::array format_list = { + vk::Format::eR8G8B8A8Unorm, + vk::Format::eR32Uint, + }; + const vk::ImageFormatListCreateInfo image_format_list = { + .viewFormatCount = static_cast(format_list.size()), + .pViewFormats = format_list.data(), + }; + + const vk::ImageCreateInfo image_info = { + .pNext = need_format_list ? &image_format_list : nullptr, + .flags = flags, + .imageType = vk::ImageType::e2D, + .format = format, + .extent = {width, height, 1}, + .mipLevels = levels, + .arrayLayers = layers, + .samples = vk::SampleCountFlagBits::e1, + .usage = usage, + }; + + const VmaAllocationCreateInfo alloc_info = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + .requiredFlags = 0, + .preferredFlags = 0, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + }; + + VkImage unsafe_image{}; + VkImageCreateInfo unsafe_image_info = static_cast(image_info); + VmaAllocation allocation{}; + + VkResult result = vmaCreateImage(instance->GetAllocator(), &unsafe_image_info, &alloc_info, + &unsafe_image, &allocation, nullptr); + if (result != VK_SUCCESS) [[unlikely]] { + LOG_CRITICAL(Render_Vulkan, "Failed allocating image with error {}", result); + UNREACHABLE(); + } + + if (!debug_name.empty() && instance->HasDebuggingToolAttached()) { + const vk::DebugUtilsObjectNameInfoEXT name_info = { + .objectType = vk::ObjectType::eImage, + .objectHandle = reinterpret_cast(unsafe_image), + .pObjectName = debug_name.data(), + }; + instance->GetDevice().setDebugUtilsObjectNameEXT(name_info); + } + + const vk::Image image{unsafe_image}; + const vk::ImageViewCreateInfo view_info = { + .image = image, + .viewType = + type == TextureType::CubeMap ? vk::ImageViewType::eCube : vk::ImageViewType::e2D, + .format = format, + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = 0, + .levelCount = levels, + .baseArrayLayer = 0, + .layerCount = layers, + }, + }; + vk::UniqueImageView image_view = instance->GetDevice().createImageViewUnique(view_info); + + return Handle{ + .alloc = allocation, + .image = image, + .image_view = std::move(image_view), + }; +} + +vk::UniqueFramebuffer MakeFramebuffer(vk::Device device, vk::RenderPass render_pass, u32 width, + u32 height, std::span attachments, + u32 num_attachments) { + const vk::FramebufferCreateInfo framebuffer_info = { + .renderPass = render_pass, + .attachmentCount = num_attachments, + .pAttachments = attachments.data(), + .width = width, + .height = height, + .layers = 1, + }; + return device.createFramebufferUnique(framebuffer_info); +} + +vk::ImageSubresourceRange MakeSubresourceRange(vk::ImageAspectFlags aspect, u32 level = 0, + u32 levels = 1, u32 layer = 0) { + return vk::ImageSubresourceRange{ + .aspectMask = aspect, + .baseMipLevel = level, + .levelCount = levels, + .baseArrayLayer = layer, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }; +} + +constexpr u64 UPLOAD_BUFFER_SIZE = 512 * 1024 * 1024; +constexpr u64 DOWNLOAD_BUFFER_SIZE = 16 * 1024 * 1024; + +} // Anonymous namespace + +TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler, + RenderpassCache& renderpass_cache, DescriptorPool& pool, + DescriptorSetProvider& texture_provider_, u32 num_swapchain_images_) + : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, + texture_provider{texture_provider_}, blit_helper{instance, scheduler, pool, renderpass_cache}, + upload_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, UPLOAD_BUFFER_SIZE, + BufferType::Upload}, + download_buffer{instance, scheduler, + vk::BufferUsageFlagBits::eTransferDst | + vk::BufferUsageFlagBits::eStorageBuffer, + DOWNLOAD_BUFFER_SIZE, BufferType::Download}, + num_swapchain_images{num_swapchain_images_} {} + +TextureRuntime::~TextureRuntime() = default; + +VideoCore::StagingData TextureRuntime::FindStaging(u32 size, bool upload) { + StreamBuffer& buffer = upload ? upload_buffer : download_buffer; + const auto [data, offset, invalidate] = buffer.Map(size, 16); + return VideoCore::StagingData{ + .size = size, + .offset = static_cast(offset), + .mapped = std::span{data, size}, + }; +} + +u32 TextureRuntime::RemoveThreshold() { + return num_swapchain_images + 2; +} + +bool TextureRuntime::Reinterpret(Surface& source, Surface& dest, + const VideoCore::TextureBlit& blit) { + const PixelFormat src_format = source.pixel_format; + const PixelFormat dst_format = dest.pixel_format; + ASSERT_MSG(src_format != dst_format, "Reinterpretation with the same format is invalid"); + if (src_format == PixelFormat::D24S8 && dst_format == PixelFormat::RGBA8) { + blit_helper.ConvertDS24S8ToRGBA8(source, dest, blit); + } else { + LOG_WARNING(Render_Vulkan, "Unimplemented reinterpretation {} -> {}", + VideoCore::PixelFormatAsString(src_format), + VideoCore::PixelFormatAsString(dst_format)); + return false; + } + return true; +} + +bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear) { + renderpass_cache.EndRendering(); + + const RecordParams params = { + .aspect = surface.Aspect(), + .pipeline_flags = surface.PipelineStageFlags(), + .src_access = surface.AccessFlags(), + .src_image = surface.Image(), + }; + + if (clear.texture_rect == surface.GetScaledRect()) { + scheduler.Record([params, clear](vk::CommandBuffer cmdbuf) { + const vk::ImageSubresourceRange range = { + .aspectMask = params.aspect, + .baseMipLevel = clear.texture_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }; + + const vk::ImageMemoryBarrier pre_barrier = { + .srcAccessMask = params.src_access, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = range, + }; + + const vk::ImageMemoryBarrier post_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = params.src_access, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = range, + }; + + cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); + + const bool is_color = + static_cast(params.aspect & vk::ImageAspectFlagBits::eColor); + if (is_color) { + cmdbuf.clearColorImage(params.src_image, vk::ImageLayout::eTransferDstOptimal, + MakeClearColorValue(clear.value.color), range); + } else { + cmdbuf.clearDepthStencilImage(params.src_image, + vk::ImageLayout::eTransferDstOptimal, + MakeClearDepthStencilValue(clear.value), range); + } + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + }); + return true; + } + + ClearTextureWithRenderpass(surface, clear); + return true; +} + +void TextureRuntime::ClearTextureWithRenderpass(Surface& surface, + const VideoCore::TextureClear& clear) { + const bool is_color = surface.type != VideoCore::SurfaceType::Depth && + surface.type != VideoCore::SurfaceType::DepthStencil; + + const auto color_format = is_color ? surface.pixel_format : PixelFormat::Invalid; + const auto depth_format = is_color ? PixelFormat::Invalid : surface.pixel_format; + const auto render_pass = renderpass_cache.GetRenderpass(color_format, depth_format, true); + + const RecordParams params = { + .aspect = surface.Aspect(), + .pipeline_flags = surface.PipelineStageFlags(), + .src_access = surface.AccessFlags(), + .src_image = surface.Image(), + }; + + scheduler.Record([params, is_color, clear, render_pass, + framebuffer = surface.Framebuffer()](vk::CommandBuffer cmdbuf) { + const vk::AccessFlags access_flag = + is_color ? vk::AccessFlagBits::eColorAttachmentRead | + vk::AccessFlagBits::eColorAttachmentWrite + : vk::AccessFlagBits::eDepthStencilAttachmentRead | + vk::AccessFlagBits::eDepthStencilAttachmentWrite; + + const vk::PipelineStageFlags pipeline_flags = + is_color ? vk::PipelineStageFlagBits::eColorAttachmentOutput + : vk::PipelineStageFlagBits::eEarlyFragmentTests; + + const vk::ImageMemoryBarrier pre_barrier = { + .srcAccessMask = params.src_access, + .dstAccessMask = access_flag, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, clear.texture_level), + }; + + const vk::ImageMemoryBarrier post_barrier = { + .srcAccessMask = access_flag, + .dstAccessMask = params.src_access, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, clear.texture_level), + }; + + const vk::Rect2D render_area = { + .offset{ + .x = static_cast(clear.texture_rect.left), + .y = static_cast(clear.texture_rect.bottom), + }, + .extent{ + .width = clear.texture_rect.GetWidth(), + .height = clear.texture_rect.GetHeight(), + }, + }; + + const auto clear_value = MakeClearValue(clear.value); + + const vk::RenderPassBeginInfo renderpass_begin_info = { + .renderPass = render_pass, + .framebuffer = framebuffer, + .renderArea = render_area, + .clearValueCount = 1, + .pClearValues = &clear_value, + }; + + cmdbuf.pipelineBarrier(params.pipeline_flags, pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); + + cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline); + cmdbuf.endRenderPass(); + + cmdbuf.pipelineBarrier(pipeline_flags, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + }); +} + +bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, + const VideoCore::TextureCopy& copy) { + renderpass_cache.EndRendering(); + + const RecordParams params = { + .aspect = source.Aspect(), + .filter = MakeFilter(source.pixel_format), + .pipeline_flags = source.PipelineStageFlags() | dest.PipelineStageFlags(), + .src_access = source.AccessFlags(), + .dst_access = dest.AccessFlags(), + .src_image = source.Image(), + .dst_image = dest.Image(), + }; + + scheduler.Record([params, copy](vk::CommandBuffer cmdbuf) { + const vk::ImageCopy image_copy = { + .srcSubresource{ + .aspectMask = params.aspect, + .mipLevel = copy.src_level, + .baseArrayLayer = copy.src_layer, + .layerCount = 1, + }, + .srcOffset = {static_cast(copy.src_offset.x), static_cast(copy.src_offset.y), + 0}, + .dstSubresource{ + .aspectMask = params.aspect, + .mipLevel = copy.dst_level, + .baseArrayLayer = copy.dst_layer, + .layerCount = 1, + }, + .dstOffset = {static_cast(copy.dst_offset.x), static_cast(copy.dst_offset.y), + 0}, + .extent = {copy.extent.width, copy.extent.height, 1}, + }; + + const bool self_copy = params.src_image == params.dst_image; + const vk::ImageLayout new_src_layout = + self_copy ? vk::ImageLayout::eGeneral : vk::ImageLayout::eTransferSrcOptimal; + const vk::ImageLayout new_dst_layout = + self_copy ? vk::ImageLayout::eGeneral : vk::ImageLayout::eTransferDstOptimal; + + const std::array pre_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = params.src_access, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = new_src_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, copy.src_level), + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = params.dst_access, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = new_dst_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.dst_image, + .subresourceRange = MakeSubresourceRange(params.aspect, copy.dst_level), + }, + }; + const std::array post_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eNone, + .oldLayout = new_src_layout, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, copy.src_level), + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = params.dst_access, + .oldLayout = new_dst_layout, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.dst_image, + .subresourceRange = MakeSubresourceRange(params.aspect, copy.dst_level), + }, + }; + + cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); + + cmdbuf.copyImage(params.src_image, new_src_layout, params.dst_image, new_dst_layout, + image_copy); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); + }); + + return true; +} + +bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, + const VideoCore::TextureBlit& blit) { + const bool is_depth_stencil = source.type == VideoCore::SurfaceType::DepthStencil; + const auto& depth_traits = instance.GetTraits(source.pixel_format); + if (is_depth_stencil && !depth_traits.blit_support) { + return blit_helper.BlitDepthStencil(source, dest, blit); + } + + renderpass_cache.EndRendering(); + + const RecordParams params = { + .aspect = source.Aspect(), + .filter = MakeFilter(source.pixel_format), + .pipeline_flags = source.PipelineStageFlags() | dest.PipelineStageFlags(), + .src_access = source.AccessFlags(), + .dst_access = dest.AccessFlags(), + .src_image = source.Image(), + .dst_image = dest.Image(), + }; + + scheduler.Record([params, blit](vk::CommandBuffer cmdbuf) { + const std::array source_offsets = { + vk::Offset3D{static_cast(blit.src_rect.left), + static_cast(blit.src_rect.bottom), 0}, + vk::Offset3D{static_cast(blit.src_rect.right), static_cast(blit.src_rect.top), + 1}, + }; + + const std::array dest_offsets = { + vk::Offset3D{static_cast(blit.dst_rect.left), + static_cast(blit.dst_rect.bottom), 0}, + vk::Offset3D{static_cast(blit.dst_rect.right), static_cast(blit.dst_rect.top), + 1}, + }; + + const vk::ImageBlit blit_area = { + .srcSubresource{ + .aspectMask = params.aspect, + .mipLevel = blit.src_level, + .baseArrayLayer = blit.src_layer, + .layerCount = 1, + }, + .srcOffsets = source_offsets, + .dstSubresource{ + .aspectMask = params.aspect, + .mipLevel = blit.dst_level, + .baseArrayLayer = blit.dst_layer, + .layerCount = 1, + }, + .dstOffsets = dest_offsets, + }; + + const std::array read_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = params.src_access, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, blit.src_level), + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = params.dst_access, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.dst_image, + .subresourceRange = MakeSubresourceRange(params.aspect, blit.dst_level), + }, + }; + const std::array write_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferRead, + .dstAccessMask = params.src_access, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, blit.src_level), + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = params.dst_access, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.dst_image, + .subresourceRange = MakeSubresourceRange(params.aspect, blit.dst_level), + }, + }; + + cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barriers); + + cmdbuf.blitImage(params.src_image, vk::ImageLayout::eTransferSrcOptimal, params.dst_image, + vk::ImageLayout::eTransferDstOptimal, blit_area, params.filter); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, write_barriers); + }); + + return true; +} + +void TextureRuntime::GenerateMipmaps(Surface& surface) { + if (VideoCore::IsCustomFormatCompressed(surface.custom_format)) { + LOG_ERROR(Render_Vulkan, "Generating mipmaps for compressed formats unsupported!"); + return; + } + + renderpass_cache.EndRendering(); + + auto [width, height] = surface.RealExtent(); + const u32 levels = surface.levels; + for (u32 i = 1; i < levels; i++) { + const Common::Rectangle src_rect{0, height, width, 0}; + width = width > 1 ? width >> 1 : 1; + height = height > 1 ? height >> 1 : 1; + const Common::Rectangle dst_rect{0, height, width, 0}; + + const VideoCore::TextureBlit blit = { + .src_level = i - 1, + .dst_level = i, + .src_rect = src_rect, + .dst_rect = dst_rect, + }; + BlitTextures(surface, surface, blit); + } +} + +bool TextureRuntime::NeedsConversion(VideoCore::PixelFormat format) const { + const FormatTraits traits = instance.GetTraits(format); + return traits.needs_conversion && + // DepthStencil formats are handled elsewhere due to de-interleaving. + traits.aspect != (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil); +} + +void TextureRuntime::FreeDescriptorSetsWithImage(vk::ImageView image_view) { + texture_provider.FreeWithImage(image_view); + blit_helper.compute_provider.FreeWithImage(image_view); + blit_helper.compute_buffer_provider.FreeWithImage(image_view); + blit_helper.two_textures_provider.FreeWithImage(image_view); +} + +Surface::Surface(TextureRuntime& runtime_, const VideoCore::SurfaceParams& params) + : SurfaceBase{params}, runtime{&runtime_}, instance{&runtime_.GetInstance()}, + scheduler{&runtime_.GetScheduler()}, traits{instance->GetTraits(pixel_format)} { + + if (pixel_format == VideoCore::PixelFormat::Invalid) { + return; + } + + const bool is_mutable = pixel_format == VideoCore::PixelFormat::RGBA8; + const vk::Format format = traits.native; + + ASSERT_MSG(format != vk::Format::eUndefined && levels >= 1, + "Image allocation parameters are invalid"); + + u32 num_images = 0; + std::array raw_images; + + vk::ImageCreateFlags flags{}; + if (texture_type == VideoCore::TextureType::CubeMap) { + flags |= vk::ImageCreateFlagBits::eCubeCompatible; + } + if (is_mutable) { + flags |= vk::ImageCreateFlagBits::eMutableFormat; + } + + const bool need_format_list = is_mutable && instance->IsImageFormatListSupported(); + handles[0] = MakeHandle(instance, width, height, levels, texture_type, format, traits.usage, + flags, traits.aspect, need_format_list, DebugName(false)); + raw_images[num_images++] = handles[0].image; + + if (res_scale != 1) { + handles[1] = + MakeHandle(instance, GetScaledWidth(), GetScaledHeight(), levels, texture_type, format, + traits.usage, flags, traits.aspect, need_format_list, DebugName(true)); + raw_images[num_images++] = handles[1].image; + } + + runtime->renderpass_cache.EndRendering(); + scheduler->Record([raw_images, num_images, aspect = traits.aspect](vk::CommandBuffer cmdbuf) { + const auto barriers = MakeInitBarriers(aspect, raw_images, num_images); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eTopOfPipe, + vk::DependencyFlagBits::eByRegion, {}, {}, barriers); + }); +} + +Surface::Surface(TextureRuntime& runtime_, const VideoCore::SurfaceBase& surface, + const VideoCore::Material* mat) + : SurfaceBase{surface}, runtime{&runtime_}, instance{&runtime_.GetInstance()}, + scheduler{&runtime_.GetScheduler()}, traits{instance->GetTraits(mat->format)} { + if (!traits.transfer_support) { + return; + } + + const bool has_normal = mat && mat->Map(MapType::Normal); + const vk::Format format = traits.native; + + u32 num_images = 0; + std::array raw_images; + + vk::ImageCreateFlags flags{}; + if (texture_type == VideoCore::TextureType::CubeMap) { + flags |= vk::ImageCreateFlagBits::eCubeCompatible; + } + + const std::string debug_name = DebugName(false, true); + handles[0] = MakeHandle(instance, mat->width, mat->height, levels, texture_type, format, + traits.usage, flags, traits.aspect, false, debug_name); + raw_images[num_images++] = handles[0].image; + + if (res_scale != 1) { + handles[1] = MakeHandle(instance, mat->width, mat->height, levels, texture_type, + vk::Format::eR8G8B8A8Unorm, traits.usage, flags, traits.aspect, + false, debug_name); + raw_images[num_images++] = handles[1].image; + } + if (has_normal) { + handles[2] = MakeHandle(instance, mat->width, mat->height, levels, texture_type, format, + traits.usage, flags, traits.aspect, false, debug_name); + raw_images[num_images++] = handles[2].image; + } + + runtime->renderpass_cache.EndRendering(); + scheduler->Record([raw_images, num_images, aspect = traits.aspect](vk::CommandBuffer cmdbuf) { + const auto barriers = MakeInitBarriers(aspect, raw_images, num_images); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eTopOfPipe, + vk::DependencyFlagBits::eByRegion, {}, {}, barriers); + }); + + custom_format = mat->format; + material = mat; +} + +Surface::~Surface() { + if (!handles[0].image_view) { + return; + } + for (const auto& [alloc, image, image_view] : handles) { + if (image_view) { + runtime->FreeDescriptorSetsWithImage(*image_view); + } + if (image) { + vmaDestroyImage(instance->GetAllocator(), image, alloc); + } + } + if (copy_handle.image_view) { + vmaDestroyImage(instance->GetAllocator(), copy_handle.image, copy_handle.alloc); + } +} + +void Surface::Upload(const VideoCore::BufferTextureCopy& upload, + const VideoCore::StagingData& staging) { + runtime->renderpass_cache.EndRendering(); + + const RecordParams params = { + .aspect = Aspect(), + .pipeline_flags = PipelineStageFlags(), + .src_access = AccessFlags(), + .src_image = Image(0), + }; + + scheduler->Record([buffer = runtime->upload_buffer.Handle(), format = traits.native, params, + staging, upload](vk::CommandBuffer cmdbuf) { + u32 num_copies = 1; + std::array buffer_image_copies; + + const auto rect = upload.texture_rect; + buffer_image_copies[0] = vk::BufferImageCopy{ + .bufferOffset = upload.buffer_offset, + .bufferRowLength = rect.GetWidth(), + .bufferImageHeight = rect.GetHeight(), + .imageSubresource{ + .aspectMask = params.aspect, + .mipLevel = upload.texture_level, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}, + }; + + if (params.aspect & vk::ImageAspectFlagBits::eStencil) { + buffer_image_copies[0].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; + vk::BufferImageCopy& stencil_copy = buffer_image_copies[1]; + stencil_copy = buffer_image_copies[0]; + stencil_copy.bufferOffset += UnpackDepthStencil(staging, format); + stencil_copy.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; + num_copies++; + } + + const vk::ImageMemoryBarrier read_barrier = { + .srcAccessMask = params.src_access, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, upload.texture_level), + }; + const vk::ImageMemoryBarrier write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = params.src_access, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, upload.texture_level), + }; + + cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barrier); + + cmdbuf.copyBufferToImage(buffer, params.src_image, vk::ImageLayout::eTransferDstOptimal, + num_copies, buffer_image_copies.data()); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, write_barrier); + }); + + runtime->upload_buffer.Commit(staging.size); + + if (res_scale != 1) { + const VideoCore::TextureBlit blit = { + .src_level = upload.texture_level, + .dst_level = upload.texture_level, + .src_rect = upload.texture_rect, + .dst_rect = upload.texture_rect * res_scale, + }; + + BlitScale(blit, true); + } +} + +void Surface::UploadCustom(const VideoCore::Material* material, u32 level) { + const u32 width = material->width; + const u32 height = material->height; + const auto color = material->textures[0]; + const Common::Rectangle rect{0U, height, width, 0U}; + + const auto upload = [&](u32 index, VideoCore::CustomTexture* texture) { + const u64 custom_size = texture->data.size(); + const RecordParams params = { + .aspect = vk::ImageAspectFlagBits::eColor, + .pipeline_flags = PipelineStageFlags(), + .src_access = AccessFlags(), + .src_image = Image(index), + }; + + const auto [data, offset, invalidate] = runtime->upload_buffer.Map(custom_size, 0); + std::memcpy(data, texture->data.data(), custom_size); + runtime->upload_buffer.Commit(custom_size); + + scheduler->Record([buffer = runtime->upload_buffer.Handle(), level, params, rect, + offset = offset](vk::CommandBuffer cmdbuf) { + const vk::BufferImageCopy buffer_image_copy = { + .bufferOffset = offset, + .bufferRowLength = 0, + .bufferImageHeight = rect.GetHeight(), + .imageSubresource{ + .aspectMask = params.aspect, + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}, + }; + + const vk::ImageMemoryBarrier read_barrier = { + .srcAccessMask = params.src_access, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, level), + }; + const vk::ImageMemoryBarrier write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = params.src_access, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, level), + }; + + cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barrier); + + cmdbuf.copyBufferToImage(buffer, params.src_image, vk::ImageLayout::eTransferDstOptimal, + buffer_image_copy); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, write_barrier); + }); + }; + + upload(0, color); + + for (u32 i = 1; i < VideoCore::MAX_MAPS; i++) { + const auto texture = material->textures[i]; + if (!texture) { + continue; + } + upload(i + 1, texture); + } +} + +void Surface::Download(const VideoCore::BufferTextureCopy& download, + const VideoCore::StagingData& staging) { + SCOPE_EXIT({ + scheduler->Finish(); + runtime->download_buffer.Commit(staging.size); + }); + + runtime->renderpass_cache.EndRendering(); + + if (pixel_format == PixelFormat::D24S8) { + runtime->blit_helper.DepthToBuffer(*this, runtime->download_buffer.Handle(), download); + return; + } + + if (res_scale != 1) { + const VideoCore::TextureBlit blit = { + .src_level = download.texture_level, + .dst_level = download.texture_level, + .src_rect = download.texture_rect * res_scale, + .dst_rect = download.texture_rect, + }; + + BlitScale(blit, false); + } + + const RecordParams params = { + .aspect = Aspect(), + .pipeline_flags = PipelineStageFlags(), + .src_access = AccessFlags(), + .src_image = Image(0), + }; + + scheduler->Record( + [buffer = runtime->download_buffer.Handle(), params, download](vk::CommandBuffer cmdbuf) { + const auto rect = download.texture_rect; + const vk::BufferImageCopy buffer_image_copy = { + .bufferOffset = download.buffer_offset, + .bufferRowLength = rect.GetWidth(), + .bufferImageHeight = rect.GetHeight(), + .imageSubresource{ + .aspectMask = params.aspect, + .mipLevel = download.texture_level, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}, + }; + + const vk::ImageMemoryBarrier read_barrier = { + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, download.texture_level), + }; + const vk::ImageMemoryBarrier image_write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eMemoryWrite, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, download.texture_level), + }; + const vk::MemoryBarrier memory_write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + + cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barrier); + + cmdbuf.copyImageToBuffer(params.src_image, vk::ImageLayout::eTransferSrcOptimal, buffer, + buffer_image_copy); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, memory_write_barrier, {}, + image_write_barrier); + }); +} + +void Surface::ScaleUp(u32 new_scale) { + if (res_scale == new_scale || new_scale == 1) { + return; + } + + res_scale = new_scale; + + const bool is_mutable = pixel_format == VideoCore::PixelFormat::RGBA8; + + vk::ImageCreateFlags flags{}; + if (texture_type == VideoCore::TextureType::CubeMap) { + flags |= vk::ImageCreateFlagBits::eCubeCompatible; + } + if (is_mutable) { + flags |= vk::ImageCreateFlagBits::eMutableFormat; + } + + handles[1] = + MakeHandle(instance, GetScaledWidth(), GetScaledHeight(), levels, texture_type, + traits.native, traits.usage, flags, traits.aspect, false, DebugName(true)); + + runtime->renderpass_cache.EndRendering(); + scheduler->Record( + [raw_images = std::array{Image()}, aspect = traits.aspect](vk::CommandBuffer cmdbuf) { + const auto barriers = MakeInitBarriers(aspect, raw_images, raw_images.size()); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eTopOfPipe, + vk::DependencyFlagBits::eByRegion, {}, {}, barriers); + }); + LOG_INFO(HW_GPU, "Surface scale up!"); + for (u32 level = 0; level < levels; level++) { + const VideoCore::TextureBlit blit = { + .src_level = level, + .dst_level = level, + .src_rect = GetRect(level), + .dst_rect = GetScaledRect(level), + }; + BlitScale(blit, true); + } +} + +u32 Surface::GetInternalBytesPerPixel() const { + // Request 5 bytes for D24S8 as well because we can use the + // extra space when deinterleaving the data during upload + if (traits.native == vk::Format::eD24UnormS8Uint) { + return 5; + } + + return vk::blockSize(traits.native); +} + +vk::AccessFlags Surface::AccessFlags() const noexcept { + const bool is_color = static_cast(Aspect() & vk::ImageAspectFlagBits::eColor); + const vk::AccessFlags attachment_flags = + is_color + ? vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite + : vk::AccessFlagBits::eDepthStencilAttachmentRead | + vk::AccessFlagBits::eDepthStencilAttachmentWrite; + + return vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead | + vk::AccessFlagBits::eTransferWrite | + (is_framebuffer ? attachment_flags : vk::AccessFlagBits::eNone) | + (is_storage ? vk::AccessFlagBits::eShaderWrite : vk::AccessFlagBits::eNone); +} + +vk::PipelineStageFlags Surface::PipelineStageFlags() const noexcept { + const bool is_color = static_cast(Aspect() & vk::ImageAspectFlagBits::eColor); + const vk::PipelineStageFlags attachment_flags = + is_color ? vk::PipelineStageFlagBits::eColorAttachmentOutput + : vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests; + + return vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eFragmentShader | + (is_framebuffer ? attachment_flags : vk::PipelineStageFlagBits::eNone) | + (is_storage ? vk::PipelineStageFlagBits::eComputeShader + : vk::PipelineStageFlagBits::eNone); +} + +vk::Image Surface::Image(u32 index) const noexcept { + const vk::Image image = handles[index].image; + if (!image) { + return handles[0].image; + } + return image; +} + +vk::ImageView Surface::CopyImageView() noexcept { + vk::ImageLayout copy_layout = vk::ImageLayout::eGeneral; + if (!copy_handle.image) { + vk::ImageCreateFlags flags{}; + if (texture_type == VideoCore::TextureType::CubeMap) { + flags |= vk::ImageCreateFlagBits::eCubeCompatible; + } + copy_handle = + MakeHandle(instance, GetScaledWidth(), GetScaledHeight(), levels, texture_type, + traits.native, traits.usage, flags, traits.aspect, false); + copy_layout = vk::ImageLayout::eUndefined; + } + + runtime->renderpass_cache.EndRendering(); + + const RecordParams params = { + .aspect = Aspect(), + .pipeline_flags = PipelineStageFlags(), + .src_access = AccessFlags(), + .src_image = Image(), + .dst_image = copy_handle.image, + }; + + scheduler->Record([params, copy_layout, levels = this->levels, width = GetScaledWidth(), + height = GetScaledHeight()](vk::CommandBuffer cmdbuf) { + std::array pre_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, 0, levels), + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderRead, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = copy_layout, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.dst_image, + .subresourceRange = MakeSubresourceRange(params.aspect, 0, levels), + }, + }; + std::array post_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferRead, + .dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, 0, levels), + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.dst_image, + .subresourceRange = MakeSubresourceRange(params.aspect, 0, levels), + }, + }; + + boost::container::small_vector image_copies; + for (u32 level = 0; level < levels; level++) { + image_copies.push_back(vk::ImageCopy{ + .srcSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .srcOffset = {0, 0, 0}, + .dstSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .dstOffset = {0, 0, 0}, + .extent = {width >> level, height >> level, 1}, + }); + } + + cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); + + cmdbuf.copyImage(params.src_image, vk::ImageLayout::eTransferSrcOptimal, params.dst_image, + vk::ImageLayout::eTransferDstOptimal, image_copies); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); + }); + + return copy_handle.image_view.get(); +} + +vk::ImageView Surface::ImageView(u32 index) const noexcept { + const auto& image_view = handles[index].image_view.get(); + if (!image_view) { + return handles[0].image_view.get(); + } + return image_view; +} + +vk::ImageView Surface::FramebufferView() noexcept { + is_framebuffer = true; + return ImageView(); +} + +vk::ImageView Surface::DepthView() noexcept { + if (depth_view) { + return depth_view.get(); + } + + const vk::ImageViewCreateInfo view_info = { + .image = Image(), + .viewType = vk::ImageViewType::e2D, + .format = instance->GetTraits(pixel_format).native, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eDepth, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + + depth_view = instance->GetDevice().createImageViewUnique(view_info); + return depth_view.get(); +} + +vk::ImageView Surface::StencilView() noexcept { + if (stencil_view) { + return stencil_view.get(); + } + + const vk::ImageViewCreateInfo view_info = { + .image = Image(), + .viewType = vk::ImageViewType::e2D, + .format = instance->GetTraits(pixel_format).native, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eStencil, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + + stencil_view = instance->GetDevice().createImageViewUnique(view_info); + return stencil_view.get(); +} + +vk::ImageView Surface::StorageView() noexcept { + if (storage_view) { + return storage_view.get(); + } + + if (pixel_format != VideoCore::PixelFormat::RGBA8) { + LOG_WARNING(Render_Vulkan, + "Attempted to retrieve storage view from unsupported surface with format {}", + VideoCore::PixelFormatAsString(pixel_format)); + return ImageView(); + } + + is_storage = true; + + const vk::ImageViewCreateInfo storage_view_info = { + .image = Image(), + .viewType = vk::ImageViewType::e2D, + .format = vk::Format::eR32Uint, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + storage_view = instance->GetDevice().createImageViewUnique(storage_view_info); + return storage_view.get(); +} + +vk::Framebuffer Surface::Framebuffer() noexcept { + const u32 index = res_scale == 1 ? 0u : 1u; + if (framebuffers[index]) { + return framebuffers[index].get(); + } + + const bool is_depth = type == SurfaceType::Depth || type == SurfaceType::DepthStencil; + const auto color_format = is_depth ? PixelFormat::Invalid : pixel_format; + const auto depth_format = is_depth ? pixel_format : PixelFormat::Invalid; + const auto render_pass = + runtime->renderpass_cache.GetRenderpass(color_format, depth_format, false); + const auto attachments = std::array{ImageView()}; + framebuffers[index] = MakeFramebuffer(instance->GetDevice(), render_pass, GetScaledWidth(), + GetScaledHeight(), attachments, 1); + return framebuffers[index].get(); +} + +void Surface::BlitScale(const VideoCore::TextureBlit& blit, bool up_scale) { + const FormatTraits& depth_traits = instance->GetTraits(pixel_format); + const bool is_depth_stencil = pixel_format == PixelFormat::D24S8; + if (is_depth_stencil && !depth_traits.blit_support) { + LOG_WARNING(Render_Vulkan, "Depth scale unsupported by hardware"); + return; + } + + scheduler->Record([src_image = Image(!up_scale), aspect = Aspect(), + filter = MakeFilter(pixel_format), dst_image = Image(up_scale), + blit](vk::CommandBuffer render_cmdbuf) { + const std::array source_offsets = { + vk::Offset3D{static_cast(blit.src_rect.left), + static_cast(blit.src_rect.bottom), 0}, + vk::Offset3D{static_cast(blit.src_rect.right), static_cast(blit.src_rect.top), + 1}, + }; + + const std::array dest_offsets = { + vk::Offset3D{static_cast(blit.dst_rect.left), + static_cast(blit.dst_rect.bottom), 0}, + vk::Offset3D{static_cast(blit.dst_rect.right), static_cast(blit.dst_rect.top), + 1}, + }; + + const vk::ImageBlit blit_area = { + .srcSubresource{ + .aspectMask = aspect, + .mipLevel = blit.src_level, + .baseArrayLayer = blit.src_layer, + .layerCount = 1, + }, + .srcOffsets = source_offsets, + .dstSubresource{ + .aspectMask = aspect, + .mipLevel = blit.dst_level, + .baseArrayLayer = blit.dst_layer, + .layerCount = 1, + }, + .dstOffsets = dest_offsets, + }; + + const std::array read_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = MakeSubresourceRange(aspect, blit.src_level), + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderRead | + vk::AccessFlagBits::eDepthStencilAttachmentRead | + vk::AccessFlagBits::eColorAttachmentRead | + vk::AccessFlagBits::eTransferRead, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = MakeSubresourceRange(aspect, blit.dst_level), + }, + }; + const std::array write_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eMemoryWrite | vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = MakeSubresourceRange(aspect, blit.src_level), + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryWrite | vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = MakeSubresourceRange(aspect, blit.dst_level), + }, + }; + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barriers); + + render_cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, + vk::ImageLayout::eTransferDstOptimal, blit_area, filter); + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, {}, {}, write_barriers); + }); +} + +Framebuffer::Framebuffer(TextureRuntime& runtime, const VideoCore::FramebufferParams& params, + Surface* color, Surface* depth) + : VideoCore::FramebufferParams{params}, res_scale{color ? color->res_scale + : (depth ? depth->res_scale : 1u)} { + auto& renderpass_cache = runtime.GetRenderpassCache(); + if (shadow_rendering && !color) { + return; + } + + width = height = std::numeric_limits::max(); + + const auto prepare = [&](u32 index, Surface* surface) { + const VideoCore::Extent extent = surface->RealExtent(); + width = std::min(width, extent.width); + height = std::min(height, extent.height); + if (!shadow_rendering) { + formats[index] = surface->pixel_format; + } + images[index] = surface->Image(); + aspects[index] = surface->Aspect(); + image_views[index] = shadow_rendering ? surface->StorageView() : surface->FramebufferView(); + }; + + u32 num_attachments = 0; + std::array attachments; + + if (color) { + prepare(0, color); + attachments[num_attachments++] = image_views[0]; + } + + if (depth) { + prepare(1, depth); + attachments[num_attachments++] = image_views[1]; + } + + const vk::Device device = runtime.GetInstance().GetDevice(); + if (shadow_rendering) { + render_pass = + renderpass_cache.GetRenderpass(PixelFormat::Invalid, PixelFormat::Invalid, false); + framebuffer = MakeFramebuffer(device, render_pass, color->GetScaledWidth(), + color->GetScaledHeight(), {}, 0); + } else { + render_pass = renderpass_cache.GetRenderpass(formats[0], formats[1], false); + framebuffer = + MakeFramebuffer(device, render_pass, width, height, attachments, num_attachments); + } +} + +Framebuffer::~Framebuffer() = default; + +Sampler::Sampler(TextureRuntime& runtime, const VideoCore::SamplerParams& params) { + using TextureConfig = VideoCore::SamplerParams::TextureConfig; + + const Instance& instance = runtime.GetInstance(); + const vk::PhysicalDeviceProperties properties = instance.GetPhysicalDevice().getProperties(); + const bool use_border_color = + instance.IsCustomBorderColorSupported() && (params.wrap_s == TextureConfig::ClampToBorder || + params.wrap_t == TextureConfig::ClampToBorder); + + const Common::Vec4f color = PicaToVK::ColorRGBA8(params.border_color); + const vk::SamplerCustomBorderColorCreateInfoEXT border_color_info = { + .customBorderColor = MakeClearColorValue(color), + .format = vk::Format::eUndefined, + }; + + const vk::Filter mag_filter = PicaToVK::TextureFilterMode(params.mag_filter); + const vk::Filter min_filter = PicaToVK::TextureFilterMode(params.min_filter); + const vk::SamplerMipmapMode mipmap_mode = PicaToVK::TextureMipFilterMode(params.mip_filter); + const vk::SamplerAddressMode wrap_u = PicaToVK::WrapMode(params.wrap_s); + const vk::SamplerAddressMode wrap_v = PicaToVK::WrapMode(params.wrap_t); + const float lod_min = static_cast(params.lod_min); + const float lod_max = static_cast(params.lod_max); + + const vk::SamplerCreateInfo sampler_info = { + .pNext = use_border_color ? &border_color_info : nullptr, + .magFilter = mag_filter, + .minFilter = min_filter, + .mipmapMode = mipmap_mode, + .addressModeU = wrap_u, + .addressModeV = wrap_v, + .mipLodBias = 0, + .anisotropyEnable = instance.IsAnisotropicFilteringSupported(), + .maxAnisotropy = properties.limits.maxSamplerAnisotropy, + .compareEnable = false, + .compareOp = vk::CompareOp::eAlways, + .minLod = lod_min, + .maxLod = lod_max, + .borderColor = + use_border_color ? vk::BorderColor::eFloatCustomEXT : vk::BorderColor::eIntOpaqueBlack, + .unnormalizedCoordinates = false, + }; + sampler = instance.GetDevice().createSamplerUnique(sampler_info); +} + +Sampler::~Sampler() = default; + +DebugScope::DebugScope(TextureRuntime& runtime, Common::Vec4f color, std::string_view label) + : scheduler{runtime.GetScheduler()}, has_debug_tool{ + runtime.GetInstance().HasDebuggingToolAttached()} { + if (!has_debug_tool) { + return; + } + scheduler.Record([color, label](vk::CommandBuffer cmdbuf) { + const vk::DebugUtilsLabelEXT debug_label = { + .pLabelName = label.data(), + .color = std::array{color[0], color[1], color[2], color[3]}, + }; + cmdbuf.beginDebugUtilsLabelEXT(debug_label); + }); +} + +DebugScope::~DebugScope() { + if (!has_debug_tool) { + return; + } + scheduler.Record([](vk::CommandBuffer cmdbuf) { cmdbuf.endDebugUtilsLabelEXT(); }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h new file mode 100644 index 000000000..6694a98ef --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -0,0 +1,295 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "video_core/rasterizer_cache/framebuffer_base.h" +#include "video_core/rasterizer_cache/rasterizer_cache_base.h" +#include "video_core/rasterizer_cache/surface_base.h" +#include "video_core/renderer_vulkan/vk_blit_helper.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" + +VK_DEFINE_HANDLE(VmaAllocation) + +namespace VideoCore { +struct Material; +} + +namespace Vulkan { + +class Instance; +class RenderpassCache; +class DescriptorPool; +class DescriptorSetProvider; +class Surface; + +struct Handle { + VmaAllocation alloc; + vk::Image image; + vk::UniqueImageView image_view; +}; + +/** + * Provides texture manipulation functions to the rasterizer cache + * Separating this into a class makes it easier to abstract graphics API code + */ +class TextureRuntime { + friend class Surface; + +public: + explicit TextureRuntime(const Instance& instance, Scheduler& scheduler, + RenderpassCache& renderpass_cache, DescriptorPool& pool, + DescriptorSetProvider& texture_provider, u32 num_swapchain_images); + ~TextureRuntime(); + + const Instance& GetInstance() const { + return instance; + } + + Scheduler& GetScheduler() const { + return scheduler; + } + + RenderpassCache& GetRenderpassCache() { + return renderpass_cache; + } + + /// Returns the removal threshold ticks for the garbage collector + u32 RemoveThreshold(); + + /// Maps an internal staging buffer of the provided size for pixel uploads/downloads + VideoCore::StagingData FindStaging(u32 size, bool upload); + + /// Attempts to reinterpret a rectangle of source to another rectangle of dest + bool Reinterpret(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); + + /// Fills the rectangle of the texture with the clear value provided + bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear); + + /// Copies a rectangle of src_tex to another rectange of dst_rect + bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy); + + /// Blits a rectangle of src_tex to another rectange of dst_rect + bool BlitTextures(Surface& surface, Surface& dest, const VideoCore::TextureBlit& blit); + + /// Generates mipmaps for all the available levels of the texture + void GenerateMipmaps(Surface& surface); + + /// Returns true if the provided pixel format needs convertion + bool NeedsConversion(VideoCore::PixelFormat format) const; + + /// Removes any descriptor sets that contain the provided image view. + void FreeDescriptorSetsWithImage(vk::ImageView image_view); + +private: + /// Clears a partial texture rect using a clear rectangle + void ClearTextureWithRenderpass(Surface& surface, const VideoCore::TextureClear& clear); + +private: + const Instance& instance; + Scheduler& scheduler; + RenderpassCache& renderpass_cache; + DescriptorSetProvider& texture_provider; + BlitHelper blit_helper; + StreamBuffer upload_buffer; + StreamBuffer download_buffer; + u32 num_swapchain_images; +}; + +class Surface : public VideoCore::SurfaceBase { + friend class TextureRuntime; + +public: + explicit Surface(TextureRuntime& runtime, const VideoCore::SurfaceParams& params); + explicit Surface(TextureRuntime& runtime, const VideoCore::SurfaceBase& surface, + const VideoCore::Material* materal); + ~Surface(); + + Surface(const Surface&) = delete; + Surface& operator=(const Surface&) = delete; + + Surface(Surface&& o) noexcept = default; + Surface& operator=(Surface&& o) noexcept = default; + + vk::ImageAspectFlags Aspect() const noexcept { + return traits.aspect; + } + + /// Returns the image at index, otherwise the base image + vk::Image Image(u32 index = 1) const noexcept; + + /// Returns the image view at index, otherwise the base view + vk::ImageView ImageView(u32 index = 1) const noexcept; + + /// Returns a copy of the upscaled image handle, used for feedback loops. + vk::ImageView CopyImageView() noexcept; + + /// Returns the framebuffer view of the surface image + vk::ImageView FramebufferView() noexcept; + + /// Returns the depth view of the surface image + vk::ImageView DepthView() noexcept; + + /// Returns the stencil view of the surface image + vk::ImageView StencilView() noexcept; + + /// Returns the R32 image view used for atomic load/store + vk::ImageView StorageView() noexcept; + + /// Returns a framebuffer handle for rendering to this surface + vk::Framebuffer Framebuffer() noexcept; + + /// Uploads pixel data in staging to a rectangle region of the surface texture + void Upload(const VideoCore::BufferTextureCopy& upload, const VideoCore::StagingData& staging); + + /// Uploads the custom material to the surface allocation. + void UploadCustom(const VideoCore::Material* material, u32 level); + + /// Downloads pixel data to staging from a rectangle region of the surface texture + void Download(const VideoCore::BufferTextureCopy& download, + const VideoCore::StagingData& staging); + + /// Scales up the surface to match the new resolution scale. + void ScaleUp(u32 new_scale); + + /// Returns the bpp of the internal surface format + u32 GetInternalBytesPerPixel() const; + + /// Returns the access flags indicative of the surface + vk::AccessFlags AccessFlags() const noexcept; + + /// Returns the pipeline stage flags indicative of the surface + vk::PipelineStageFlags PipelineStageFlags() const noexcept; + +private: + /// Performs blit between the scaled/unscaled images + void BlitScale(const VideoCore::TextureBlit& blit, bool up_scale); + + /// Downloads scaled depth stencil data + void DepthStencilDownload(const VideoCore::BufferTextureCopy& download, + const VideoCore::StagingData& staging); + +public: + TextureRuntime* runtime; + const Instance* instance; + Scheduler* scheduler; + FormatTraits traits; + std::array handles{}; + std::array framebuffers{}; + Handle copy_handle; + vk::UniqueImageView depth_view; + vk::UniqueImageView stencil_view; + vk::UniqueImageView storage_view; + bool is_framebuffer{}; + bool is_storage{}; +}; + +class Framebuffer : public VideoCore::FramebufferParams { +public: + explicit Framebuffer(TextureRuntime& runtime, const VideoCore::FramebufferParams& params, + Surface* color, Surface* depth_stencil); + ~Framebuffer(); + + Framebuffer(const Framebuffer&) = delete; + Framebuffer& operator=(const Framebuffer&) = delete; + + Framebuffer(Framebuffer&& o) noexcept = default; + Framebuffer& operator=(Framebuffer&& o) noexcept = default; + + VideoCore::PixelFormat Format(VideoCore::SurfaceType type) const noexcept { + return formats[Index(type)]; + } + + [[nodiscard]] vk::ImageView ImageView(VideoCore::SurfaceType type) const noexcept { + return image_views[Index(type)]; + } + + [[nodiscard]] vk::Framebuffer Handle() const noexcept { + return framebuffer.get(); + } + + [[nodiscard]] std::array Images() const noexcept { + return images; + } + + [[nodiscard]] std::array Aspects() const noexcept { + return aspects; + } + + [[nodiscard]] vk::RenderPass RenderPass() const noexcept { + return render_pass; + } + + u32 Scale() const noexcept { + return res_scale; + } + + u32 Width() const noexcept { + return width; + } + + u32 Height() const noexcept { + return height; + } + +private: + std::array images{}; + std::array image_views{}; + vk::UniqueFramebuffer framebuffer; + vk::RenderPass render_pass; + std::array aspects{}; + std::array formats{VideoCore::PixelFormat::Invalid, + VideoCore::PixelFormat::Invalid}; + u32 width{}; + u32 height{}; + u32 res_scale{1}; +}; + +class Sampler { +public: + Sampler(TextureRuntime& runtime, const VideoCore::SamplerParams& params); + ~Sampler(); + + Sampler(const Sampler&) = delete; + Sampler& operator=(const Sampler&) = delete; + + Sampler(Sampler&& o) noexcept = default; + Sampler& operator=(Sampler&& o) noexcept = default; + + [[nodiscard]] vk::Sampler Handle() const noexcept { + return sampler.get(); + } + +private: + vk::UniqueSampler sampler; +}; + +class DebugScope { +public: + template + explicit DebugScope(TextureRuntime& runtime, Common::Vec4f color, + fmt::format_string format, T... args) + : DebugScope{runtime, color, fmt::format(format, std::forward(args)...)} {} + explicit DebugScope(TextureRuntime& runtime, Common::Vec4f color, std::string_view label); + ~DebugScope(); + +private: + Scheduler& scheduler; + bool has_debug_tool; +}; + +struct Traits { + using Runtime = Vulkan::TextureRuntime; + using Surface = Vulkan::Surface; + using Sampler = Vulkan::Sampler; + using Framebuffer = Vulkan::Framebuffer; + using DebugScope = Vulkan::DebugScope; +}; + +using RasterizerCache = VideoCore::RasterizerCache; + +} // namespace Vulkan diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 8b33aab29..c836e7372 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -14,6 +14,7 @@ #include "video_core/renderer_opengl/gl_vars.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/renderer_software/renderer_software.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/video_core.h" namespace VideoCore { @@ -39,6 +40,9 @@ void Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondary_window case Settings::GraphicsAPI::Software: g_renderer = std::make_unique(system, emu_window); break; + case Settings::GraphicsAPI::Vulkan: + g_renderer = std::make_unique(system, emu_window, secondary_window); + break; case Settings::GraphicsAPI::OpenGL: g_renderer = std::make_unique(system, emu_window, secondary_window); break;