TextureCache: Implement buffer copies on Vulkan.
This commit is contained in:
		| @@ -526,8 +526,8 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, | ||||
|     } | ||||
| } | ||||
|  | ||||
| void TextureCacheRuntime::ConvertImage(Image& dst, Image& src, | ||||
|                                        std::span<const VideoCommon::ImageCopy> copies) { | ||||
| void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, | ||||
|                                            std::span<const VideoCommon::ImageCopy> copies) { | ||||
|     LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format); | ||||
|     format_conversion_pass.ConvertImage(dst, src, copies); | ||||
| } | ||||
|   | ||||
| @@ -84,9 +84,13 @@ public: | ||||
|  | ||||
|     u64 GetDeviceLocalMemory() const; | ||||
|  | ||||
|     bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) { | ||||
|         return true; | ||||
|     } | ||||
|  | ||||
|     void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||||
|  | ||||
|     void ConvertImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||||
|     void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||||
|  | ||||
|     void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) { | ||||
|         UNIMPLEMENTED(); | ||||
| @@ -338,7 +342,6 @@ struct TextureCacheParams { | ||||
|     static constexpr bool FRAMEBUFFER_BLITS = true; | ||||
|     static constexpr bool HAS_EMULATED_COPIES = true; | ||||
|     static constexpr bool HAS_DEVICE_MEMORY_INFO = true; | ||||
|     static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = true; | ||||
|  | ||||
|     using Runtime = OpenGL::TextureCacheRuntime; | ||||
|     using Image = OpenGL::Image; | ||||
|   | ||||
| @@ -308,6 +308,19 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | ||||
|     }; | ||||
| } | ||||
|  | ||||
| [[nodiscard]] VkBufferImageCopy MakeBufferImageCopy(const VideoCommon::ImageCopy& copy, bool is_src, | ||||
|                                                     VkImageAspectFlags aspect_mask) noexcept { | ||||
|     return VkBufferImageCopy{ | ||||
|         .bufferOffset = 0, | ||||
|         .bufferRowLength = 0, | ||||
|         .bufferImageHeight = 0, | ||||
|         .imageSubresource = MakeImageSubresourceLayers( | ||||
|             is_src ? copy.src_subresource : copy.dst_subresource, aspect_mask), | ||||
|         .imageOffset = MakeOffset3D(is_src ? copy.src_offset : copy.dst_offset), | ||||
|         .imageExtent = MakeExtent3D(copy.extent), | ||||
|     }; | ||||
| } | ||||
|  | ||||
| [[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( | ||||
|     std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { | ||||
|     std::vector<VkBufferCopy> result(copies.size()); | ||||
| @@ -754,6 +767,167 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { | ||||
|     return staging_buffer_pool.Request(size, MemoryUsage::Download); | ||||
| } | ||||
|  | ||||
| bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { | ||||
|     if (VideoCore::Surface::GetFormatType(dst.info.format) == | ||||
|         VideoCore::Surface::SurfaceType::DepthStencil) { | ||||
|         return !device.IsExtShaderStencilExportSupported(); | ||||
|     } | ||||
|     return false; | ||||
| } | ||||
|  | ||||
| [[nodiscard]] size_t NextPow2(size_t value) { | ||||
|     return static_cast<size_t>(1ULL << ((8U * sizeof(size_t)) - std::countl_zero(value - 1U))); | ||||
| } | ||||
|  | ||||
| VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { | ||||
|     const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL); | ||||
|     if (buffer_commits[level]) { | ||||
|         return *buffers[level]; | ||||
|     } | ||||
|     const auto new_size = NextPow2(needed_size); | ||||
|     VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | ||||
|                                VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | | ||||
|                                VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; | ||||
|     buffers[level] = device.GetLogical().CreateBuffer({ | ||||
|         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||||
|         .pNext = nullptr, | ||||
|         .flags = 0, | ||||
|         .size = new_size, | ||||
|         .usage = flags, | ||||
|         .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||||
|         .queueFamilyIndexCount = 0, | ||||
|         .pQueueFamilyIndices = nullptr, | ||||
|     }); | ||||
|     buffer_commits[level] = std::make_unique<MemoryCommit>( | ||||
|         memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal)); | ||||
|     return *buffers[level]; | ||||
| } | ||||
|  | ||||
| void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, | ||||
|                                            std::span<const VideoCommon::ImageCopy> copies) { | ||||
|     std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); | ||||
|     std::vector<VkBufferImageCopy> vk_out_copies(copies.size()); | ||||
|     const VkImageAspectFlags src_aspect_mask = src.AspectMask(); | ||||
|     const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); | ||||
|  | ||||
|     std::ranges::transform(copies, vk_in_copies.begin(), [src_aspect_mask](const auto& copy) { | ||||
|         return MakeBufferImageCopy(copy, true, src_aspect_mask); | ||||
|     }); | ||||
|     std::ranges::transform(copies, vk_out_copies.begin(), [dst_aspect_mask](const auto& copy) { | ||||
|         return MakeBufferImageCopy(copy, false, dst_aspect_mask); | ||||
|     }); | ||||
|     const u32 img_bpp = BytesPerBlock(src.info.format); | ||||
|     size_t total_size = 0; | ||||
|     for (const auto& copy : copies) { | ||||
|         total_size += copy.extent.width * copy.extent.height * copy.extent.depth * img_bpp; | ||||
|     } | ||||
|     const VkBuffer copy_buffer = GetTemporaryBuffer(total_size); | ||||
|     const VkImage dst_image = dst.Handle(); | ||||
|     const VkImage src_image = src.Handle(); | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([dst_image, src_image, copy_buffer, src_aspect_mask, dst_aspect_mask, | ||||
|                       vk_in_copies, vk_out_copies](vk::CommandBuffer cmdbuf) { | ||||
|         RangedBarrierRange dst_range; | ||||
|         RangedBarrierRange src_range; | ||||
|         for (const VkBufferImageCopy& copy : vk_in_copies) { | ||||
|             src_range.AddLayers(copy.imageSubresource); | ||||
|         } | ||||
|         for (const VkBufferImageCopy& copy : vk_out_copies) { | ||||
|             dst_range.AddLayers(copy.imageSubresource); | ||||
|         } | ||||
|         static constexpr VkMemoryBarrier READ_BARRIER{ | ||||
|             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||
|             .pNext = nullptr, | ||||
|             .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||||
|             .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|         }; | ||||
|         static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||||
|             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||
|             .pNext = nullptr, | ||||
|             .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|             .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||||
|         }; | ||||
|         const std::array pre_barriers{ | ||||
|             VkImageMemoryBarrier{ | ||||
|                 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||
|                 .pNext = nullptr, | ||||
|                 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||||
|                                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||||
|                                  VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|                 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||||
|                 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||
|                 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||||
|                 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|                 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|                 .image = src_image, | ||||
|                 .subresourceRange = src_range.SubresourceRange(src_aspect_mask), | ||||
|             }, | ||||
|         }; | ||||
|         const std::array middle_in_barrier{ | ||||
|             VkImageMemoryBarrier{ | ||||
|                 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||
|                 .pNext = nullptr, | ||||
|                 .srcAccessMask = 0, | ||||
|                 .dstAccessMask = 0, | ||||
|                 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||||
|                 .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||
|                 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|                 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|                 .image = src_image, | ||||
|                 .subresourceRange = src_range.SubresourceRange(src_aspect_mask), | ||||
|             }, | ||||
|         }; | ||||
|         const std::array middle_out_barrier{ | ||||
|             VkImageMemoryBarrier{ | ||||
|                 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||
|                 .pNext = nullptr, | ||||
|                 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||||
|                                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||||
|                                  VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|                 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|                 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||
|                 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||||
|                 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|                 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|                 .image = dst_image, | ||||
|                 .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), | ||||
|             }, | ||||
|         }; | ||||
|         const std::array post_barriers{ | ||||
|             VkImageMemoryBarrier{ | ||||
|                 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||
|                 .pNext = nullptr, | ||||
|                 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|                 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | | ||||
|                                  VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | | ||||
|                                  VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||||
|                                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | ||||
|                                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||||
|                                  VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|                 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||||
|                 .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||
|                 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|                 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|                 .image = dst_image, | ||||
|                 .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), | ||||
|             }, | ||||
|         }; | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||||
|                                0, {}, {}, pre_barriers); | ||||
|  | ||||
|         cmdbuf.CopyImageToBuffer(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer, | ||||
|                                  vk_in_copies); | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||||
|                                0, WRITE_BARRIER, nullptr, middle_in_barrier); | ||||
|  | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||||
|                                0, READ_BARRIER, {}, middle_out_barrier); | ||||
|         cmdbuf.CopyBufferToImage(copy_buffer, dst_image, VK_IMAGE_LAYOUT_GENERAL, vk_out_copies); | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||||
|                                0, {}, {}, post_barriers); | ||||
|     }); | ||||
| } | ||||
|  | ||||
| void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, | ||||
|                                     const Region2D& dst_region, const Region2D& src_region, | ||||
|                                     Tegra::Engines::Fermi2D::Filter filter, | ||||
|   | ||||
| @@ -61,6 +61,10 @@ public: | ||||
|  | ||||
|     void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||||
|  | ||||
|     bool ShouldReinterpret(Image& dst, Image& src); | ||||
|  | ||||
|     void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||||
|  | ||||
|     void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled); | ||||
|  | ||||
|     bool CanAccelerateImageUpload(Image&) const noexcept { | ||||
| @@ -82,6 +86,8 @@ public: | ||||
|         return true; | ||||
|     } | ||||
|  | ||||
|     [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size); | ||||
|  | ||||
|     const Device& device; | ||||
|     VKScheduler& scheduler; | ||||
|     MemoryAllocator& memory_allocator; | ||||
| @@ -90,6 +96,10 @@ public: | ||||
|     ASTCDecoderPass& astc_decoder_pass; | ||||
|     RenderPassCache& render_pass_cache; | ||||
|     const Settings::ResolutionScalingInfo& resolution; | ||||
|  | ||||
|     constexpr static size_t indexing_slots = 8 * sizeof(size_t); | ||||
|     std::array<vk::Buffer, indexing_slots> buffers{}; | ||||
|     std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{}; | ||||
| }; | ||||
|  | ||||
| class Image : public VideoCommon::ImageBase { | ||||
| @@ -316,7 +326,6 @@ struct TextureCacheParams { | ||||
|     static constexpr bool FRAMEBUFFER_BLITS = false; | ||||
|     static constexpr bool HAS_EMULATED_COPIES = false; | ||||
|     static constexpr bool HAS_DEVICE_MEMORY_INFO = true; | ||||
|     static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = false; | ||||
|  | ||||
|     using Runtime = Vulkan::TextureCacheRuntime; | ||||
|     using Image = Vulkan::Image; | ||||
|   | ||||
| @@ -1762,8 +1762,8 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag | ||||
|     } | ||||
|     UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); | ||||
|     UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); | ||||
|     if constexpr (HAS_PIXEL_FORMAT_CONVERSIONS) { | ||||
|         return runtime.ConvertImage(dst, src, copies); | ||||
|     if (runtime.ShouldReinterpret(dst, src)) { | ||||
|         return runtime.ReinterpretImage(dst, src, copies); | ||||
|     } | ||||
|     for (const ImageCopy& copy : copies) { | ||||
|         UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); | ||||
|   | ||||
| @@ -59,8 +59,6 @@ class TextureCache { | ||||
|     static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; | ||||
|     /// True when the API can provide info about the memory of the device. | ||||
|     static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; | ||||
|     /// True when the API provides utilities for pixel format conversions. | ||||
|     static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = P::HAS_PIXEL_FORMAT_CONVERSIONS; | ||||
|  | ||||
|     static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; | ||||
|     static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Fernando Sahmkow
					Fernando Sahmkow