Merge pull request #3278 from ReinUsesLisp/vk-memory-manager
renderer_vulkan: Buffer cache, stream buffer and memory manager changes
This commit is contained in:
		| @@ -2,124 +2,145 @@ | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include <algorithm> | ||||
| #include <cstring> | ||||
| #include <memory> | ||||
| #include <optional> | ||||
| #include <tuple> | ||||
|  | ||||
| #include "common/alignment.h" | ||||
| #include "common/assert.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "common/bit_util.h" | ||||
| #include "core/core.h" | ||||
| #include "video_core/renderer_vulkan/declarations.h" | ||||
| #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_device.h" | ||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||
| #include "video_core/renderer_vulkan/vk_stream_buffer.h" | ||||
|  | ||||
| namespace Vulkan { | ||||
|  | ||||
| CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, | ||||
|                                      std::size_t alignment, u8* host_ptr) | ||||
|     : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, | ||||
|       alignment{alignment} {} | ||||
| namespace { | ||||
|  | ||||
| VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, | ||||
|                              Memory::Memory& cpu_memory_, | ||||
|                              VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, | ||||
|                              VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size) | ||||
|     : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager}, cpu_memory{ | ||||
|                                                                                    cpu_memory_} { | ||||
|     const auto usage = vk::BufferUsageFlagBits::eVertexBuffer | | ||||
|                        vk::BufferUsageFlagBits::eIndexBuffer | | ||||
|                        vk::BufferUsageFlagBits::eUniformBuffer; | ||||
|     const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead | | ||||
|                         vk::AccessFlagBits::eUniformRead; | ||||
|     stream_buffer = | ||||
|         std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access, | ||||
|                                          vk::PipelineStageFlagBits::eAllCommands); | ||||
|     buffer_handle = stream_buffer->GetBuffer(); | ||||
| const auto BufferUsage = | ||||
|     vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer | | ||||
|     vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer; | ||||
|  | ||||
| const auto UploadPipelineStage = | ||||
|     vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput | | ||||
|     vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | | ||||
|     vk::PipelineStageFlagBits::eComputeShader; | ||||
|  | ||||
| const auto UploadAccessBarriers = | ||||
|     vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead | | ||||
|     vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead | | ||||
|     vk::AccessFlagBits::eIndexRead; | ||||
|  | ||||
| auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { | ||||
|     return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage); | ||||
| } | ||||
|  | ||||
| } // Anonymous namespace | ||||
|  | ||||
| CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, | ||||
|                                      CacheAddr cache_addr, std::size_t size) | ||||
|     : VideoCommon::BufferBlock{cache_addr, size} { | ||||
|     const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size), | ||||
|                                          BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | | ||||
|                                              vk::BufferUsageFlagBits::eTransferDst, | ||||
|                                          vk::SharingMode::eExclusive, 0, nullptr); | ||||
|  | ||||
|     const auto& dld{device.GetDispatchLoader()}; | ||||
|     const auto dev{device.GetLogical()}; | ||||
|     buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld); | ||||
|     buffer.commit = memory_manager.Commit(*buffer.handle, false); | ||||
| } | ||||
|  | ||||
| CachedBufferBlock::~CachedBufferBlock() = default; | ||||
|  | ||||
| VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | ||||
|                              const VKDevice& device, VKMemoryManager& memory_manager, | ||||
|                              VKScheduler& scheduler, VKStagingBufferPool& staging_pool) | ||||
|     : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system, | ||||
|                                                                    CreateStreamBuffer(device, | ||||
|                                                                                       scheduler)}, | ||||
|       device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ | ||||
|                                                                                 staging_pool} {} | ||||
|  | ||||
| VKBufferCache::~VKBufferCache() = default; | ||||
|  | ||||
| u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) { | ||||
|     const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)}; | ||||
|     ASSERT_MSG(cpu_addr, "Invalid GPU address"); | ||||
|  | ||||
|     // Cache management is a big overhead, so only cache entries with a given size. | ||||
|     // TODO: Figure out which size is the best for given games. | ||||
|     cache &= size >= 2048; | ||||
|  | ||||
|     u8* const host_ptr{cpu_memory.GetPointer(*cpu_addr)}; | ||||
|     if (cache) { | ||||
|         const auto entry = TryGet(host_ptr); | ||||
|         if (entry) { | ||||
|             if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { | ||||
|                 return entry->GetOffset(); | ||||
|             } | ||||
|             Unregister(entry); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     AlignBuffer(alignment); | ||||
|     const u64 uploaded_offset = buffer_offset; | ||||
|  | ||||
|     if (host_ptr == nullptr) { | ||||
|         return uploaded_offset; | ||||
|     } | ||||
|  | ||||
|     std::memcpy(buffer_ptr, host_ptr, size); | ||||
|     buffer_ptr += size; | ||||
|     buffer_offset += size; | ||||
|  | ||||
|     if (cache) { | ||||
|         auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset, | ||||
|                                                          alignment, host_ptr); | ||||
|         Register(entry); | ||||
|     } | ||||
|  | ||||
|     return uploaded_offset; | ||||
| Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { | ||||
|     return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size); | ||||
| } | ||||
|  | ||||
| u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) { | ||||
|     AlignBuffer(alignment); | ||||
|     std::memcpy(buffer_ptr, raw_pointer, size); | ||||
|     const u64 uploaded_offset = buffer_offset; | ||||
|  | ||||
|     buffer_ptr += size; | ||||
|     buffer_offset += size; | ||||
|     return uploaded_offset; | ||||
| const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { | ||||
|     return buffer->GetHandle(); | ||||
| } | ||||
|  | ||||
| std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) { | ||||
|     AlignBuffer(alignment); | ||||
|     u8* const uploaded_ptr = buffer_ptr; | ||||
|     const u64 uploaded_offset = buffer_offset; | ||||
|  | ||||
|     buffer_ptr += size; | ||||
|     buffer_offset += size; | ||||
|     return {uploaded_ptr, uploaded_offset}; | ||||
| const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) { | ||||
|     size = std::max(size, std::size_t(4)); | ||||
|     const auto& empty = staging_pool.GetUnusedBuffer(size, false); | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) { | ||||
|         cmdbuf.fillBuffer(buffer, 0, size, 0, dld); | ||||
|     }); | ||||
|     return &*empty.handle; | ||||
| } | ||||
|  | ||||
| void VKBufferCache::Reserve(std::size_t max_size) { | ||||
|     bool invalidate; | ||||
|     std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size); | ||||
|     buffer_offset = buffer_offset_base; | ||||
| void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||||
|                                     const u8* data) { | ||||
|     const auto& staging = staging_pool.GetUnusedBuffer(size, true); | ||||
|     std::memcpy(staging.commit->Map(size), data, size); | ||||
|  | ||||
|     if (invalidate) { | ||||
|         InvalidateAll(); | ||||
|     } | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, | ||||
|                       size](auto cmdbuf, auto& dld) { | ||||
|         cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld); | ||||
|         cmdbuf.pipelineBarrier( | ||||
|             vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, | ||||
|             {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, | ||||
|                                      VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, | ||||
|                                      offset, size)}, | ||||
|             {}, dld); | ||||
|     }); | ||||
| } | ||||
|  | ||||
| void VKBufferCache::Send() { | ||||
|     stream_buffer->Send(buffer_offset - buffer_offset_base); | ||||
| void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||||
|                                       u8* data) { | ||||
|     const auto& staging = staging_pool.GetUnusedBuffer(size, true); | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, | ||||
|                       size](auto cmdbuf, auto& dld) { | ||||
|         cmdbuf.pipelineBarrier( | ||||
|             vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | | ||||
|                 vk::PipelineStageFlagBits::eComputeShader, | ||||
|             vk::PipelineStageFlagBits::eTransfer, {}, {}, | ||||
|             {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite, | ||||
|                                      vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED, | ||||
|                                      VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)}, | ||||
|             {}, dld); | ||||
|         cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld); | ||||
|     }); | ||||
|     scheduler.Finish(); | ||||
|  | ||||
|     std::memcpy(data, staging.commit->Map(size), size); | ||||
| } | ||||
|  | ||||
| void VKBufferCache::AlignBuffer(std::size_t alignment) { | ||||
|     // Align the offset, not the mapped pointer | ||||
|     const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment); | ||||
|     buffer_ptr += offset_aligned - buffer_offset; | ||||
|     buffer_offset = offset_aligned; | ||||
| void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | ||||
|                               std::size_t dst_offset, std::size_t size) { | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset, | ||||
|                       dst_offset, size](auto cmdbuf, auto& dld) { | ||||
|         cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld); | ||||
|         cmdbuf.pipelineBarrier( | ||||
|             vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, | ||||
|             {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead, | ||||
|                                      vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED, | ||||
|                                      VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size), | ||||
|              vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, | ||||
|                                      VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer, | ||||
|                                      dst_offset, size)}, | ||||
|             {}, dld); | ||||
|     }); | ||||
| } | ||||
|  | ||||
| } // namespace Vulkan | ||||
|   | ||||
| @@ -5,105 +5,74 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <memory> | ||||
| #include <tuple> | ||||
| #include <unordered_map> | ||||
| #include <vector> | ||||
|  | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/gpu.h" | ||||
| #include "video_core/buffer_cache/buffer_cache.h" | ||||
| #include "video_core/rasterizer_cache.h" | ||||
| #include "video_core/renderer_vulkan/declarations.h" | ||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||
| #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||||
| #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||||
| #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||||
| #include "video_core/renderer_vulkan/vk_stream_buffer.h" | ||||
|  | ||||
| namespace Memory { | ||||
| class Memory; | ||||
| } | ||||
|  | ||||
| namespace Tegra { | ||||
| class MemoryManager; | ||||
| namespace Core { | ||||
| class System; | ||||
| } | ||||
|  | ||||
| namespace Vulkan { | ||||
|  | ||||
| class VKDevice; | ||||
| class VKFence; | ||||
| class VKMemoryManager; | ||||
| class VKStreamBuffer; | ||||
| class VKScheduler; | ||||
|  | ||||
| class CachedBufferEntry final : public RasterizerCacheObject { | ||||
| class CachedBufferBlock final : public VideoCommon::BufferBlock { | ||||
| public: | ||||
|     explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment, | ||||
|                                u8* host_ptr); | ||||
|     explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, | ||||
|                                CacheAddr cache_addr, std::size_t size); | ||||
|     ~CachedBufferBlock(); | ||||
|  | ||||
|     VAddr GetCpuAddr() const override { | ||||
|         return cpu_addr; | ||||
|     } | ||||
|  | ||||
|     std::size_t GetSizeInBytes() const override { | ||||
|         return size; | ||||
|     } | ||||
|  | ||||
|     std::size_t GetSize() const { | ||||
|         return size; | ||||
|     } | ||||
|  | ||||
|     u64 GetOffset() const { | ||||
|         return offset; | ||||
|     } | ||||
|  | ||||
|     std::size_t GetAlignment() const { | ||||
|         return alignment; | ||||
|     const vk::Buffer* GetHandle() const { | ||||
|         return &*buffer.handle; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     VAddr cpu_addr{}; | ||||
|     std::size_t size{}; | ||||
|     u64 offset{}; | ||||
|     std::size_t alignment{}; | ||||
|     VKBuffer buffer; | ||||
| }; | ||||
|  | ||||
| class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | ||||
| using Buffer = std::shared_ptr<CachedBufferBlock>; | ||||
|  | ||||
| class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> { | ||||
| public: | ||||
|     explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, Memory::Memory& cpu_memory_, | ||||
|                            VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, | ||||
|                            VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size); | ||||
|     explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | ||||
|                            const VKDevice& device, VKMemoryManager& memory_manager, | ||||
|                            VKScheduler& scheduler, VKStagingBufferPool& staging_pool); | ||||
|     ~VKBufferCache(); | ||||
|  | ||||
|     /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been | ||||
|     /// allocated. | ||||
|     u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true); | ||||
|  | ||||
|     /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. | ||||
|     u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4); | ||||
|  | ||||
|     /// Reserves memory to be used by host's CPU. Returns mapped address and offset. | ||||
|     std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4); | ||||
|  | ||||
|     /// Reserves a region of memory to be used in subsequent upload/reserve operations. | ||||
|     void Reserve(std::size_t max_size); | ||||
|  | ||||
|     /// Ensures that the set data is sent to the device. | ||||
|     void Send(); | ||||
|  | ||||
|     /// Returns the buffer cache handle. | ||||
|     vk::Buffer GetBuffer() const { | ||||
|         return buffer_handle; | ||||
|     } | ||||
|     const vk::Buffer* GetEmptyBuffer(std::size_t size) override; | ||||
|  | ||||
| protected: | ||||
|     // We do not have to flush this cache as things in it are never modified by us. | ||||
|     void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} | ||||
|     void WriteBarrier() override {} | ||||
|  | ||||
|     Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; | ||||
|  | ||||
|     const vk::Buffer* ToHandle(const Buffer& buffer) override; | ||||
|  | ||||
|     void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||||
|                          const u8* data) override; | ||||
|  | ||||
|     void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||||
|                            u8* data) override; | ||||
|  | ||||
|     void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | ||||
|                    std::size_t dst_offset, std::size_t size) override; | ||||
|  | ||||
| private: | ||||
|     void AlignBuffer(std::size_t alignment); | ||||
|  | ||||
|     Tegra::MemoryManager& tegra_memory_manager; | ||||
|     Memory::Memory& cpu_memory; | ||||
|  | ||||
|     std::unique_ptr<VKStreamBuffer> stream_buffer; | ||||
|     vk::Buffer buffer_handle; | ||||
|  | ||||
|     u8* buffer_ptr = nullptr; | ||||
|     u64 buffer_offset = 0; | ||||
|     u64 buffer_offset_base = 0; | ||||
|     const VKDevice& device; | ||||
|     VKMemoryManager& memory_manager; | ||||
|     VKScheduler& scheduler; | ||||
|     VKStagingBufferPool& staging_pool; | ||||
| }; | ||||
|  | ||||
| } // namespace Vulkan | ||||
|   | ||||
| @@ -6,6 +6,7 @@ | ||||
| #include <optional> | ||||
| #include <tuple> | ||||
| #include <vector> | ||||
|  | ||||
| #include "common/alignment.h" | ||||
| #include "common/assert.h" | ||||
| #include "common/common_types.h" | ||||
| @@ -16,34 +17,32 @@ | ||||
|  | ||||
| namespace Vulkan { | ||||
|  | ||||
| // TODO(Rodrigo): Fine tune this number | ||||
| constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024; | ||||
| namespace { | ||||
|  | ||||
| u64 GetAllocationChunkSize(u64 required_size) { | ||||
|     static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20}; | ||||
|     auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size); | ||||
|     return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20); | ||||
| } | ||||
|  | ||||
| } // Anonymous namespace | ||||
|  | ||||
| class VKMemoryAllocation final { | ||||
| public: | ||||
|     explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, | ||||
|                                 vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type) | ||||
|         : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size}, | ||||
|           shifted_type{ShiftType(type)}, is_mappable{properties & | ||||
|                                                      vk::MemoryPropertyFlagBits::eHostVisible} { | ||||
|         if (is_mappable) { | ||||
|             const auto dev = device.GetLogical(); | ||||
|             const auto& dld = device.GetDispatchLoader(); | ||||
|             base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld)); | ||||
|         } | ||||
|     } | ||||
|                                 vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type) | ||||
|         : device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size}, | ||||
|           shifted_type{ShiftType(type)} {} | ||||
|  | ||||
|     ~VKMemoryAllocation() { | ||||
|         const auto dev = device.GetLogical(); | ||||
|         const auto& dld = device.GetDispatchLoader(); | ||||
|         if (is_mappable) | ||||
|             dev.unmapMemory(memory, dld); | ||||
|         dev.free(memory, nullptr, dld); | ||||
|     } | ||||
|  | ||||
|     VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) { | ||||
|         auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size), | ||||
|                                         static_cast<u64>(alignment)); | ||||
|         auto found = TryFindFreeSection(free_iterator, allocation_size, | ||||
|                                         static_cast<u64>(commit_size), static_cast<u64>(alignment)); | ||||
|         if (!found) { | ||||
|             found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size), | ||||
|                                        static_cast<u64>(alignment)); | ||||
| @@ -52,8 +51,7 @@ public: | ||||
|                 return nullptr; | ||||
|             } | ||||
|         } | ||||
|         u8* address = is_mappable ? base_address + *found : nullptr; | ||||
|         auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found, | ||||
|         auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found, | ||||
|                                                            *found + commit_size); | ||||
|         commits.push_back(commit.get()); | ||||
|  | ||||
| @@ -65,12 +63,10 @@ public: | ||||
|  | ||||
|     void Free(const VKMemoryCommitImpl* commit) { | ||||
|         ASSERT(commit); | ||||
|         const auto it = | ||||
|             std::find_if(commits.begin(), commits.end(), | ||||
|                          [&](const auto& stored_commit) { return stored_commit == commit; }); | ||||
|  | ||||
|         const auto it = std::find(std::begin(commits), std::end(commits), commit); | ||||
|         if (it == commits.end()) { | ||||
|             LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!"); | ||||
|             UNREACHABLE(); | ||||
|             UNREACHABLE_MSG("Freeing unallocated commit!"); | ||||
|             return; | ||||
|         } | ||||
|         commits.erase(it); | ||||
| @@ -88,11 +84,11 @@ private: | ||||
|     } | ||||
|  | ||||
|     /// A memory allocator, it may return a free region between "start" and "end" with the solicited | ||||
|     /// requeriments. | ||||
|     /// requirements. | ||||
|     std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const { | ||||
|         u64 iterator = start; | ||||
|         while (iterator + size < end) { | ||||
|             const u64 try_left = Common::AlignUp(iterator, alignment); | ||||
|         u64 iterator = Common::AlignUp(start, alignment); | ||||
|         while (iterator + size <= end) { | ||||
|             const u64 try_left = iterator; | ||||
|             const u64 try_right = try_left + size; | ||||
|  | ||||
|             bool overlap = false; | ||||
| @@ -100,7 +96,7 @@ private: | ||||
|                 const auto [commit_left, commit_right] = commit->interval; | ||||
|                 if (try_left < commit_right && commit_left < try_right) { | ||||
|                     // There's an overlap, continue the search where the overlapping commit ends. | ||||
|                     iterator = commit_right; | ||||
|                     iterator = Common::AlignUp(commit_right, alignment); | ||||
|                     overlap = true; | ||||
|                     break; | ||||
|                 } | ||||
| @@ -110,6 +106,7 @@ private: | ||||
|                 return try_left; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // No free regions where found, return an empty optional. | ||||
|         return std::nullopt; | ||||
|     } | ||||
| @@ -117,12 +114,8 @@ private: | ||||
|     const VKDevice& device;                   ///< Vulkan device. | ||||
|     const vk::DeviceMemory memory;            ///< Vulkan memory allocation handler. | ||||
|     const vk::MemoryPropertyFlags properties; ///< Vulkan properties. | ||||
|     const u64 alloc_size;                     ///< Size of this allocation. | ||||
|     const u64 allocation_size;                ///< Size of this allocation. | ||||
|     const u32 shifted_type;                   ///< Stored Vulkan type of this allocation, shifted. | ||||
|     const bool is_mappable;                   ///< Whether the allocation is mappable. | ||||
|  | ||||
|     /// Base address of the mapped pointer. | ||||
|     u8* base_address{}; | ||||
|  | ||||
|     /// Hints where the next free region is likely going to be. | ||||
|     u64 free_iterator{}; | ||||
| @@ -132,13 +125,15 @@ private: | ||||
| }; | ||||
|  | ||||
| VKMemoryManager::VKMemoryManager(const VKDevice& device) | ||||
|     : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())}, | ||||
|       is_memory_unified{GetMemoryUnified(props)} {} | ||||
|     : device{device}, properties{device.GetPhysical().getMemoryProperties( | ||||
|                           device.GetDispatchLoader())}, | ||||
|       is_memory_unified{GetMemoryUnified(properties)} {} | ||||
|  | ||||
| VKMemoryManager::~VKMemoryManager() = default; | ||||
|  | ||||
| VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) { | ||||
|     ASSERT(reqs.size < ALLOC_CHUNK_SIZE); | ||||
| VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements, | ||||
|                                        bool host_visible) { | ||||
|     const u64 chunk_size = GetAllocationChunkSize(requirements.size); | ||||
|  | ||||
|     // When a host visible commit is asked, search for host visible and coherent, otherwise search | ||||
|     // for a fast device local type. | ||||
| @@ -147,32 +142,21 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool | ||||
|             ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | ||||
|             : vk::MemoryPropertyFlagBits::eDeviceLocal; | ||||
|  | ||||
|     const auto TryCommit = [&]() -> VKMemoryCommit { | ||||
|         for (auto& alloc : allocs) { | ||||
|             if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits)) | ||||
|                 continue; | ||||
|  | ||||
|             if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) { | ||||
|                 return commit; | ||||
|             } | ||||
|         } | ||||
|         return {}; | ||||
|     }; | ||||
|  | ||||
|     if (auto commit = TryCommit(); commit) { | ||||
|     if (auto commit = TryAllocCommit(requirements, wanted_properties)) { | ||||
|         return commit; | ||||
|     } | ||||
|  | ||||
|     // Commit has failed, allocate more memory. | ||||
|     if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) { | ||||
|         // TODO(Rodrigo): Try to use host memory. | ||||
|         LOG_CRITICAL(Render_Vulkan, "Ran out of memory!"); | ||||
|         UNREACHABLE(); | ||||
|     if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) { | ||||
|         // TODO(Rodrigo): Handle these situations in some way like flushing to guest memory. | ||||
|         // Allocation has failed, panic. | ||||
|         UNREACHABLE_MSG("Ran out of VRAM!"); | ||||
|         return {}; | ||||
|     } | ||||
|  | ||||
|     // Commit again, this time it won't fail since there's a fresh allocation above. If it does, | ||||
|     // there's a bug. | ||||
|     auto commit = TryCommit(); | ||||
|     auto commit = TryAllocCommit(requirements, wanted_properties); | ||||
|     ASSERT(commit); | ||||
|     return commit; | ||||
| } | ||||
| @@ -180,8 +164,7 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool | ||||
| VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { | ||||
|     const auto dev = device.GetLogical(); | ||||
|     const auto& dld = device.GetDispatchLoader(); | ||||
|     const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld); | ||||
|     auto commit = Commit(requeriments, host_visible); | ||||
|     auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible); | ||||
|     dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld); | ||||
|     return commit; | ||||
| } | ||||
| @@ -189,25 +172,23 @@ VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { | ||||
| VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) { | ||||
|     const auto dev = device.GetLogical(); | ||||
|     const auto& dld = device.GetDispatchLoader(); | ||||
|     const auto requeriments = dev.getImageMemoryRequirements(image, dld); | ||||
|     auto commit = Commit(requeriments, host_visible); | ||||
|     auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible); | ||||
|     dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld); | ||||
|     return commit; | ||||
| } | ||||
|  | ||||
| bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, | ||||
|                                   u64 size) { | ||||
|     const u32 type = [&]() { | ||||
|         for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { | ||||
|             const auto flags = props.memoryTypes[type_index].propertyFlags; | ||||
|     const u32 type = [&] { | ||||
|         for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) { | ||||
|             const auto flags = properties.memoryTypes[type_index].propertyFlags; | ||||
|             if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) { | ||||
|                 // The type matches in type and in the wanted properties. | ||||
|                 return type_index; | ||||
|             } | ||||
|         } | ||||
|         LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!"); | ||||
|         UNREACHABLE(); | ||||
|         return 0u; | ||||
|         UNREACHABLE_MSG("Couldn't find a compatible memory type!"); | ||||
|         return 0U; | ||||
|     }(); | ||||
|  | ||||
|     const auto dev = device.GetLogical(); | ||||
| @@ -216,19 +197,33 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 | ||||
|     // Try to allocate found type. | ||||
|     const vk::MemoryAllocateInfo memory_ai(size, type); | ||||
|     vk::DeviceMemory memory; | ||||
|     if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld); | ||||
|     if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld); | ||||
|         res != vk::Result::eSuccess) { | ||||
|         LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res)); | ||||
|         return false; | ||||
|     } | ||||
|     allocs.push_back( | ||||
|     allocations.push_back( | ||||
|         std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type)); | ||||
|     return true; | ||||
| } | ||||
|  | ||||
| /*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) { | ||||
|     for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) { | ||||
|         if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { | ||||
| VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements, | ||||
|                                                vk::MemoryPropertyFlags wanted_properties) { | ||||
|     for (auto& allocation : allocations) { | ||||
|         if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) { | ||||
|             continue; | ||||
|         } | ||||
|         if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) { | ||||
|             return commit; | ||||
|         } | ||||
|     } | ||||
|     return {}; | ||||
| } | ||||
|  | ||||
| /*static*/ bool VKMemoryManager::GetMemoryUnified( | ||||
|     const vk::PhysicalDeviceMemoryProperties& properties) { | ||||
|     for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) { | ||||
|         if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { | ||||
|             // Memory is considered unified when heaps are device local only. | ||||
|             return false; | ||||
|         } | ||||
| @@ -236,17 +231,28 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 | ||||
|     return true; | ||||
| } | ||||
|  | ||||
| VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, | ||||
|                                        u8* data, u64 begin, u64 end) | ||||
|     : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {} | ||||
| VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, | ||||
|                                        vk::DeviceMemory memory, u64 begin, u64 end) | ||||
|     : device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {} | ||||
|  | ||||
| VKMemoryCommitImpl::~VKMemoryCommitImpl() { | ||||
|     allocation->Free(this); | ||||
| } | ||||
|  | ||||
| u8* VKMemoryCommitImpl::GetData() const { | ||||
|     ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit."); | ||||
|     return data; | ||||
| MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { | ||||
|     const auto dev = device.GetLogical(); | ||||
|     const auto address = reinterpret_cast<u8*>( | ||||
|         dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader())); | ||||
|     return MemoryMap{this, address}; | ||||
| } | ||||
|  | ||||
| void VKMemoryCommitImpl::Unmap() const { | ||||
|     const auto dev = device.GetLogical(); | ||||
|     dev.unmapMemory(memory, device.GetDispatchLoader()); | ||||
| } | ||||
|  | ||||
| MemoryMap VKMemoryCommitImpl::Map() const { | ||||
|     return Map(interval.second - interval.first); | ||||
| } | ||||
|  | ||||
| } // namespace Vulkan | ||||
|   | ||||
| @@ -12,6 +12,7 @@ | ||||
|  | ||||
| namespace Vulkan { | ||||
|  | ||||
| class MemoryMap; | ||||
| class VKDevice; | ||||
| class VKMemoryAllocation; | ||||
| class VKMemoryCommitImpl; | ||||
| @@ -21,13 +22,14 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>; | ||||
| class VKMemoryManager final { | ||||
| public: | ||||
|     explicit VKMemoryManager(const VKDevice& device); | ||||
|     VKMemoryManager(const VKMemoryManager&) = delete; | ||||
|     ~VKMemoryManager(); | ||||
|  | ||||
|     /** | ||||
|      * Commits a memory with the specified requeriments. | ||||
|      * @param reqs Requeriments returned from a Vulkan call. | ||||
|      * @param requirements Requirements returned from a Vulkan call. | ||||
|      * @param host_visible Signals the allocator that it *must* use host visible and coherent | ||||
|      * memory. When passing false, it will try to allocate device local memory. | ||||
|      *                     memory. When passing false, it will try to allocate device local memory. | ||||
|      * @returns A memory commit. | ||||
|      */ | ||||
|     VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible); | ||||
| @@ -47,25 +49,35 @@ private: | ||||
|     /// Allocates a chunk of memory. | ||||
|     bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); | ||||
|  | ||||
|     /// Returns true if the device uses an unified memory model. | ||||
|     static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props); | ||||
|     /// Tries to allocate a memory commit. | ||||
|     VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements, | ||||
|                                   vk::MemoryPropertyFlags wanted_properties); | ||||
|  | ||||
|     const VKDevice& device;                                  ///< Device handler. | ||||
|     const vk::PhysicalDeviceMemoryProperties props;          ///< Physical device properties. | ||||
|     const bool is_memory_unified;                            ///< True if memory model is unified. | ||||
|     std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations. | ||||
|     /// Returns true if the device uses an unified memory model. | ||||
|     static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties); | ||||
|  | ||||
|     const VKDevice& device;                              ///< Device handler. | ||||
|     const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties. | ||||
|     const bool is_memory_unified;                        ///< True if memory model is unified. | ||||
|     std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations. | ||||
| }; | ||||
|  | ||||
| class VKMemoryCommitImpl final { | ||||
|     friend VKMemoryAllocation; | ||||
|     friend MemoryMap; | ||||
|  | ||||
| public: | ||||
|     explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data, | ||||
|                                 u64 begin, u64 end); | ||||
|     explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, | ||||
|                                 vk::DeviceMemory memory, u64 begin, u64 end); | ||||
|     ~VKMemoryCommitImpl(); | ||||
|  | ||||
|     /// Returns the writeable memory map. The commit has to be mappable. | ||||
|     u8* GetData() const; | ||||
|     /// Maps a memory region and returns a pointer to it. | ||||
|     /// It's illegal to have more than one memory map at the same time. | ||||
|     MemoryMap Map(u64 size, u64 offset = 0) const; | ||||
|  | ||||
|     /// Maps the whole commit and returns a pointer to it. | ||||
|     /// It's illegal to have more than one memory map at the same time. | ||||
|     MemoryMap Map() const; | ||||
|  | ||||
|     /// Returns the Vulkan memory handler. | ||||
|     vk::DeviceMemory GetMemory() const { | ||||
| @@ -78,10 +90,46 @@ public: | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     /// Unmaps memory. | ||||
|     void Unmap() const; | ||||
|  | ||||
|     const VKDevice& device;           ///< Vulkan device. | ||||
|     std::pair<u64, u64> interval{};   ///< Interval where the commit exists. | ||||
|     vk::DeviceMemory memory;          ///< Vulkan device memory handler. | ||||
|     VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. | ||||
|     u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included. | ||||
| }; | ||||
|  | ||||
| /// Holds ownership of a memory map. | ||||
| class MemoryMap final { | ||||
| public: | ||||
|     explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address) | ||||
|         : commit{commit}, address{address} {} | ||||
|  | ||||
|     ~MemoryMap() { | ||||
|         if (commit) { | ||||
|             commit->Unmap(); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Prematurely releases the memory map. | ||||
|     void Release() { | ||||
|         commit->Unmap(); | ||||
|         commit = nullptr; | ||||
|     } | ||||
|  | ||||
|     /// Returns the address of the memory map. | ||||
|     u8* GetAddress() const { | ||||
|         return address; | ||||
|     } | ||||
|  | ||||
|     /// Returns the address of the memory map; | ||||
|     operator u8*() const { | ||||
|         return address; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit. | ||||
|     u8* address{};                      ///< Address to the mapped memory. | ||||
| }; | ||||
|  | ||||
| } // namespace Vulkan | ||||
|   | ||||
| @@ -3,86 +3,144 @@ | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include <algorithm> | ||||
| #include <memory> | ||||
| #include <optional> | ||||
| #include <tuple> | ||||
| #include <vector> | ||||
|  | ||||
| #include "common/alignment.h" | ||||
| #include "common/assert.h" | ||||
| #include "video_core/renderer_vulkan/declarations.h" | ||||
| #include "video_core/renderer_vulkan/vk_device.h" | ||||
| #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||||
| #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||
| #include "video_core/renderer_vulkan/vk_stream_buffer.h" | ||||
|  | ||||
| namespace Vulkan { | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; | ||||
| constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; | ||||
|  | ||||
| VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, | ||||
|                                VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, | ||||
|                                vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage) | ||||
|     : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{ | ||||
|                                                                                    pipeline_stage} { | ||||
|     CreateBuffers(memory_manager, usage); | ||||
|     ReserveWatches(WATCHES_INITIAL_RESERVE); | ||||
| constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024; | ||||
|  | ||||
| std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter, | ||||
|                                   vk::MemoryPropertyFlags wanted) { | ||||
|     const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader()); | ||||
|     for (u32 i = 0; i < properties.memoryTypeCount; i++) { | ||||
|         if (!(filter & (1 << i))) { | ||||
|             continue; | ||||
|         } | ||||
|         if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) { | ||||
|             return i; | ||||
|         } | ||||
|     } | ||||
|     return {}; | ||||
| } | ||||
|  | ||||
| } // Anonymous namespace | ||||
|  | ||||
| VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, | ||||
|                                vk::BufferUsageFlags usage) | ||||
|     : device{device}, scheduler{scheduler} { | ||||
|     CreateBuffers(usage); | ||||
|     ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); | ||||
|     ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); | ||||
| } | ||||
|  | ||||
| VKStreamBuffer::~VKStreamBuffer() = default; | ||||
|  | ||||
| std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) { | ||||
|     ASSERT(size <= buffer_size); | ||||
| std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { | ||||
|     ASSERT(size <= STREAM_BUFFER_SIZE); | ||||
|     mapped_size = size; | ||||
|  | ||||
|     if (offset + size > buffer_size) { | ||||
|         // The buffer would overflow, save the amount of used buffers, signal an invalidation and | ||||
|         // reset the state. | ||||
|         invalidation_mark = used_watches; | ||||
|         used_watches = 0; | ||||
|     if (alignment > 0) { | ||||
|         offset = Common::AlignUp(offset, alignment); | ||||
|     } | ||||
|  | ||||
|     WaitPendingOperations(offset); | ||||
|  | ||||
|     bool invalidated = false; | ||||
|     if (offset + size > STREAM_BUFFER_SIZE) { | ||||
|         // The buffer would overflow, save the amount of used watches and reset the state. | ||||
|         invalidation_mark = current_watch_cursor; | ||||
|         current_watch_cursor = 0; | ||||
|         offset = 0; | ||||
|     } | ||||
|  | ||||
|     return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; | ||||
| } | ||||
|         // Swap watches and reset waiting cursors. | ||||
|         std::swap(previous_watches, current_watches); | ||||
|         wait_cursor = 0; | ||||
|         wait_bound = 0; | ||||
|  | ||||
| void VKStreamBuffer::Send(u64 size) { | ||||
|     ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); | ||||
|  | ||||
|     if (invalidation_mark) { | ||||
|         // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. | ||||
|         // Ensure that we don't wait for uncommitted fences. | ||||
|         scheduler.Flush(); | ||||
|         std::for_each(watches.begin(), watches.begin() + *invalidation_mark, | ||||
|                       [&](auto& resource) { resource->Wait(); }); | ||||
|         invalidation_mark = std::nullopt; | ||||
|  | ||||
|         invalidated = true; | ||||
|     } | ||||
|  | ||||
|     if (used_watches + 1 >= watches.size()) { | ||||
|         // Ensure that there are enough watches. | ||||
|         ReserveWatches(WATCHES_RESERVE_CHUNK); | ||||
|     } | ||||
|     // Add a watch for this allocation. | ||||
|     watches[used_watches++]->Watch(scheduler.GetFence()); | ||||
|  | ||||
|     offset += size; | ||||
| } | ||||
|  | ||||
| void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { | ||||
|     const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0, | ||||
|                                          nullptr); | ||||
|  | ||||
|     const auto dev = device.GetLogical(); | ||||
|     const auto& dld = device.GetDispatchLoader(); | ||||
|     buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); | ||||
|     commit = memory_manager.Commit(*buffer, true); | ||||
|     mapped_pointer = commit->GetData(); | ||||
|     const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld)); | ||||
|     return {pointer, offset, invalidated}; | ||||
| } | ||||
|  | ||||
| void VKStreamBuffer::ReserveWatches(std::size_t grow_size) { | ||||
|     const std::size_t previous_size = watches.size(); | ||||
|     watches.resize(previous_size + grow_size); | ||||
|     std::generate(watches.begin() + previous_size, watches.end(), | ||||
|                   []() { return std::make_unique<VKFenceWatch>(); }); | ||||
| void VKStreamBuffer::Unmap(u64 size) { | ||||
|     ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); | ||||
|  | ||||
|     const auto dev = device.GetLogical(); | ||||
|     dev.unmapMemory(*memory, device.GetDispatchLoader()); | ||||
|  | ||||
|     offset += size; | ||||
|  | ||||
|     if (current_watch_cursor + 1 >= current_watches.size()) { | ||||
|         // Ensure that there are enough watches. | ||||
|         ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK); | ||||
|     } | ||||
|     auto& watch = current_watches[current_watch_cursor++]; | ||||
|     watch.upper_bound = offset; | ||||
|     watch.fence.Watch(scheduler.GetFence()); | ||||
| } | ||||
|  | ||||
| void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) { | ||||
|     const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive, | ||||
|                                          0, nullptr); | ||||
|     const auto dev = device.GetLogical(); | ||||
|     const auto& dld = device.GetDispatchLoader(); | ||||
|     buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); | ||||
|  | ||||
|     const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld); | ||||
|     // Prefer device local host visible allocations (this should hit AMD's pinned memory). | ||||
|     auto type = FindMemoryType(device, requirements.memoryTypeBits, | ||||
|                                vk::MemoryPropertyFlagBits::eHostVisible | | ||||
|                                    vk::MemoryPropertyFlagBits::eHostCoherent | | ||||
|                                    vk::MemoryPropertyFlagBits::eDeviceLocal); | ||||
|     if (!type) { | ||||
|         // Otherwise search for a host visible allocation. | ||||
|         type = FindMemoryType(device, requirements.memoryTypeBits, | ||||
|                               vk::MemoryPropertyFlagBits::eHostVisible | | ||||
|                                   vk::MemoryPropertyFlagBits::eHostCoherent); | ||||
|         ASSERT_MSG(type, "No host visible and coherent memory type found"); | ||||
|     } | ||||
|     const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type); | ||||
|     memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld); | ||||
|  | ||||
|     dev.bindBufferMemory(*buffer, *memory, 0, dld); | ||||
| } | ||||
|  | ||||
| void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) { | ||||
|     watches.resize(watches.size() + grow_size); | ||||
| } | ||||
|  | ||||
| void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { | ||||
|     if (!invalidation_mark) { | ||||
|         return; | ||||
|     } | ||||
|     while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) { | ||||
|         auto& watch = previous_watches[wait_cursor]; | ||||
|         wait_bound = watch.upper_bound; | ||||
|         watch.fence.Wait(); | ||||
|         ++wait_cursor; | ||||
|     } | ||||
| } | ||||
|  | ||||
| } // namespace Vulkan | ||||
|   | ||||
| @@ -4,28 +4,24 @@ | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <memory> | ||||
| #include <optional> | ||||
| #include <tuple> | ||||
| #include <vector> | ||||
|  | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/renderer_vulkan/declarations.h" | ||||
| #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||||
|  | ||||
| namespace Vulkan { | ||||
|  | ||||
| class VKDevice; | ||||
| class VKFence; | ||||
| class VKFenceWatch; | ||||
| class VKResourceManager; | ||||
| class VKScheduler; | ||||
|  | ||||
| class VKStreamBuffer { | ||||
| class VKStreamBuffer final { | ||||
| public: | ||||
|     explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, | ||||
|                             VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, | ||||
|                             vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage); | ||||
|     explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, | ||||
|                             vk::BufferUsageFlags usage); | ||||
|     ~VKStreamBuffer(); | ||||
|  | ||||
|     /** | ||||
| @@ -34,39 +30,47 @@ public: | ||||
|      * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer | ||||
|      * offset and a boolean that's true when buffer has been invalidated. | ||||
|      */ | ||||
|     std::tuple<u8*, u64, bool> Reserve(u64 size); | ||||
|     std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment); | ||||
|  | ||||
|     /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. | ||||
|     void Send(u64 size); | ||||
|     void Unmap(u64 size); | ||||
|  | ||||
|     vk::Buffer GetBuffer() const { | ||||
|     vk::Buffer GetHandle() const { | ||||
|         return *buffer; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     struct Watch final { | ||||
|         VKFenceWatch fence; | ||||
|         u64 upper_bound{}; | ||||
|     }; | ||||
|  | ||||
|     /// Creates Vulkan buffer handles committing the required the required memory. | ||||
|     void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage); | ||||
|     void CreateBuffers(vk::BufferUsageFlags usage); | ||||
|  | ||||
|     /// Increases the amount of watches available. | ||||
|     void ReserveWatches(std::size_t grow_size); | ||||
|     void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); | ||||
|  | ||||
|     void WaitPendingOperations(u64 requested_upper_bound); | ||||
|  | ||||
|     const VKDevice& device;                      ///< Vulkan device manager. | ||||
|     VKScheduler& scheduler;                      ///< Command scheduler. | ||||
|     const u64 buffer_size;                       ///< Total size of the stream buffer. | ||||
|     const vk::AccessFlags access;                ///< Access usage of this stream buffer. | ||||
|     const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer. | ||||
|  | ||||
|     UniqueBuffer buffer;   ///< Mapped buffer. | ||||
|     VKMemoryCommit commit; ///< Memory commit. | ||||
|     u8* mapped_pointer{};  ///< Pointer to the host visible commit | ||||
|     UniqueBuffer buffer;       ///< Mapped buffer. | ||||
|     UniqueDeviceMemory memory; ///< Memory allocation. | ||||
|  | ||||
|     u64 offset{};      ///< Buffer iterator. | ||||
|     u64 mapped_size{}; ///< Size reserved for the current copy. | ||||
|  | ||||
|     std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches | ||||
|     std::size_t used_watches{}; ///< Count of watches, reset on invalidation. | ||||
|     std::optional<std::size_t> | ||||
|         invalidation_mark{}; ///< Number of watches used in the current invalidation. | ||||
|     std::vector<Watch> current_watches;           ///< Watches recorded in the current iteration. | ||||
|     std::size_t current_watch_cursor{};           ///< Count of watches, reset on invalidation. | ||||
|     std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle. | ||||
|  | ||||
|     std::vector<Watch> previous_watches; ///< Watches used in the previous iteration. | ||||
|     std::size_t wait_cursor{};           ///< Last watch being waited for completion. | ||||
|     u64 wait_bound{};                    ///< Highest offset being watched for completion. | ||||
| }; | ||||
|  | ||||
| } // namespace Vulkan | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 bunnei
					bunnei