Texture Cache: Implement async texture downloads.
This commit is contained in:
		| @@ -354,6 +354,7 @@ struct TextureCacheParams { | ||||
|     static constexpr bool FRAMEBUFFER_BLITS = true; | ||||
|     static constexpr bool HAS_EMULATED_COPIES = true; | ||||
|     static constexpr bool HAS_DEVICE_MEMORY_INFO = true; | ||||
|     static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; | ||||
|  | ||||
|     using Runtime = OpenGL::TextureCacheRuntime; | ||||
|     using Image = OpenGL::Image; | ||||
| @@ -361,6 +362,7 @@ struct TextureCacheParams { | ||||
|     using ImageView = OpenGL::ImageView; | ||||
|     using Sampler = OpenGL::Sampler; | ||||
|     using Framebuffer = OpenGL::Framebuffer; | ||||
|     using AsyncBuffer = u32; | ||||
| }; | ||||
|  | ||||
| using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; | ||||
|   | ||||
| @@ -812,8 +812,12 @@ StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) { | ||||
|     return staging_buffer_pool.Request(size, MemoryUsage::Upload); | ||||
| } | ||||
|  | ||||
| StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { | ||||
|     return staging_buffer_pool.Request(size, MemoryUsage::Download); | ||||
| StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) { | ||||
|     return staging_buffer_pool.Request(size, MemoryUsage::Download, deferred); | ||||
| } | ||||
|  | ||||
| void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) { | ||||
|     staging_buffer_pool.FreeDeferred(ref); | ||||
| } | ||||
|  | ||||
| bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { | ||||
|   | ||||
| @@ -51,7 +51,9 @@ public: | ||||
|  | ||||
|     StagingBufferRef UploadStagingBuffer(size_t size); | ||||
|  | ||||
|     StagingBufferRef DownloadStagingBuffer(size_t size); | ||||
|     StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); | ||||
|  | ||||
|     void FreeDeferredStagingBuffer(StagingBufferRef& ref); | ||||
|  | ||||
|     void TickFrame(); | ||||
|  | ||||
| @@ -347,6 +349,7 @@ struct TextureCacheParams { | ||||
|     static constexpr bool FRAMEBUFFER_BLITS = false; | ||||
|     static constexpr bool HAS_EMULATED_COPIES = false; | ||||
|     static constexpr bool HAS_DEVICE_MEMORY_INFO = true; | ||||
|     static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; | ||||
|  | ||||
|     using Runtime = Vulkan::TextureCacheRuntime; | ||||
|     using Image = Vulkan::Image; | ||||
| @@ -354,6 +357,7 @@ struct TextureCacheParams { | ||||
|     using ImageView = Vulkan::ImageView; | ||||
|     using Sampler = Vulkan::Sampler; | ||||
|     using Framebuffer = Vulkan::Framebuffer; | ||||
|     using AsyncBuffer = Vulkan::StagingBufferRef; | ||||
| }; | ||||
|  | ||||
| using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; | ||||
|   | ||||
| @@ -646,7 +646,28 @@ bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept { | ||||
| template <class P> | ||||
| void TextureCache<P>::CommitAsyncFlushes() { | ||||
|     // This is intentionally passing the value by copy | ||||
|     committed_downloads.push(uncommitted_downloads); | ||||
|     if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||||
|         const std::span<const ImageId> download_ids = uncommitted_downloads; | ||||
|         if (download_ids.empty()) { | ||||
|             committed_downloads.emplace_back(std::move(uncommitted_downloads)); | ||||
|             uncommitted_downloads.clear(); | ||||
|             async_buffers.emplace_back(std::optional<AsyncBuffer>{}); | ||||
|             return; | ||||
|         } | ||||
|         size_t total_size_bytes = 0; | ||||
|         for (const ImageId image_id : download_ids) { | ||||
|             total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | ||||
|         } | ||||
|         auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); | ||||
|         for (const ImageId image_id : download_ids) { | ||||
|             Image& image = slot_images[image_id]; | ||||
|             const auto copies = FullDownloadCopies(image.info); | ||||
|             image.DownloadMemory(download_map, copies); | ||||
|             download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); | ||||
|         } | ||||
|         async_buffers.emplace_back(download_map); | ||||
|     } | ||||
|     committed_downloads.emplace_back(std::move(uncommitted_downloads)); | ||||
|     uncommitted_downloads.clear(); | ||||
| } | ||||
|  | ||||
| @@ -655,37 +676,58 @@ void TextureCache<P>::PopAsyncFlushes() { | ||||
|     if (committed_downloads.empty()) { | ||||
|         return; | ||||
|     } | ||||
|     const std::span<const ImageId> download_ids = committed_downloads.front(); | ||||
|     if (download_ids.empty()) { | ||||
|         committed_downloads.pop(); | ||||
|         return; | ||||
|     if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||||
|         const std::span<const ImageId> download_ids = committed_downloads.front(); | ||||
|         if (download_ids.empty()) { | ||||
|             committed_downloads.pop_front(); | ||||
|             async_buffers.pop_front(); | ||||
|             return; | ||||
|         } | ||||
|         auto download_map = *async_buffers.front(); | ||||
|         std::span<u8> download_span = download_map.mapped_span; | ||||
|         for (size_t i = download_ids.size(); i > 0; i--) { | ||||
|             const ImageBase& image = slot_images[download_ids[i - 1]]; | ||||
|             const auto copies = FullDownloadCopies(image.info); | ||||
|             download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); | ||||
|             std::span<u8> download_span_alt = download_span.subspan(download_map.offset); | ||||
|             SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt, | ||||
|                          swizzle_data_buffer); | ||||
|         } | ||||
|         runtime.FreeDeferredStagingBuffer(download_map); | ||||
|         committed_downloads.pop_front(); | ||||
|         async_buffers.pop_front(); | ||||
|     } else { | ||||
|         const std::span<const ImageId> download_ids = committed_downloads.front(); | ||||
|         if (download_ids.empty()) { | ||||
|             committed_downloads.pop_front(); | ||||
|             return; | ||||
|         } | ||||
|         size_t total_size_bytes = 0; | ||||
|         for (const ImageId image_id : download_ids) { | ||||
|             total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | ||||
|         } | ||||
|         auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); | ||||
|         const size_t original_offset = download_map.offset; | ||||
|         for (const ImageId image_id : download_ids) { | ||||
|             Image& image = slot_images[image_id]; | ||||
|             const auto copies = FullDownloadCopies(image.info); | ||||
|             image.DownloadMemory(download_map, copies); | ||||
|             download_map.offset += image.unswizzled_size_bytes; | ||||
|         } | ||||
|         // Wait for downloads to finish | ||||
|         runtime.Finish(); | ||||
|         download_map.offset = original_offset; | ||||
|         std::span<u8> download_span = download_map.mapped_span; | ||||
|         for (const ImageId image_id : download_ids) { | ||||
|             const ImageBase& image = slot_images[image_id]; | ||||
|             const auto copies = FullDownloadCopies(image.info); | ||||
|             SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, | ||||
|                          swizzle_data_buffer); | ||||
|             download_map.offset += image.unswizzled_size_bytes; | ||||
|             download_span = download_span.subspan(image.unswizzled_size_bytes); | ||||
|         } | ||||
|         committed_downloads.pop_front(); | ||||
|     } | ||||
|     size_t total_size_bytes = 0; | ||||
|     for (const ImageId image_id : download_ids) { | ||||
|         total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | ||||
|     } | ||||
|     auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); | ||||
|     const size_t original_offset = download_map.offset; | ||||
|     for (const ImageId image_id : download_ids) { | ||||
|         Image& image = slot_images[image_id]; | ||||
|         const auto copies = FullDownloadCopies(image.info); | ||||
|         image.DownloadMemory(download_map, copies); | ||||
|         download_map.offset += image.unswizzled_size_bytes; | ||||
|     } | ||||
|     // Wait for downloads to finish | ||||
|     runtime.Finish(); | ||||
|  | ||||
|     download_map.offset = original_offset; | ||||
|     std::span<u8> download_span = download_map.mapped_span; | ||||
|     for (const ImageId image_id : download_ids) { | ||||
|         const ImageBase& image = slot_images[image_id]; | ||||
|         const auto copies = FullDownloadCopies(image.info); | ||||
|         SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, | ||||
|                      swizzle_data_buffer); | ||||
|         download_map.offset += image.unswizzled_size_bytes; | ||||
|         download_span = download_span.subspan(image.unswizzled_size_bytes); | ||||
|     } | ||||
|     committed_downloads.pop(); | ||||
| } | ||||
|  | ||||
| template <class P> | ||||
|   | ||||
| @@ -92,6 +92,8 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI | ||||
|     static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; | ||||
|     /// True when the API can provide info about the memory of the device. | ||||
|     static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; | ||||
|     /// True when the API can do asynchronous texture downloads. | ||||
|     static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; | ||||
|  | ||||
|     static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()}; | ||||
|  | ||||
| @@ -106,6 +108,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI | ||||
|     using ImageView = typename P::ImageView; | ||||
|     using Sampler = typename P::Sampler; | ||||
|     using Framebuffer = typename P::Framebuffer; | ||||
|     using AsyncBuffer = typename P::AsyncBuffer; | ||||
|  | ||||
|     struct BlitImages { | ||||
|         ImageId dst_id; | ||||
| @@ -403,7 +406,8 @@ private: | ||||
|  | ||||
|     // TODO: This data structure is not optimal and it should be reworked | ||||
|     std::vector<ImageId> uncommitted_downloads; | ||||
|     std::queue<std::vector<ImageId>> committed_downloads; | ||||
|     std::deque<std::vector<ImageId>> committed_downloads; | ||||
|     std::deque<std::optional<AsyncBuffer>> async_buffers; | ||||
|  | ||||
|     struct LRUItemParams { | ||||
|         using ObjectType = ImageId; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Fernando Sahmkow
					Fernando Sahmkow