Merge pull request #11544 from Kelebek1/reduce_stream_buffer_renderdoc
Allow GPUs without rebar to open multiple RenderDoc captures
This commit is contained in:
		| @@ -24,25 +24,38 @@ using namespace Common::Literals; | ||||
|  | ||||
| // Maximum potential alignment of a Vulkan buffer | ||||
| constexpr VkDeviceSize MAX_ALIGNMENT = 256; | ||||
| // Maximum size to put elements in the stream buffer | ||||
| constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB; | ||||
| // Stream buffer size in bytes | ||||
| constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; | ||||
| constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; | ||||
| constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB; | ||||
|  | ||||
| size_t Region(size_t iterator) noexcept { | ||||
|     return iterator / REGION_SIZE; | ||||
| size_t GetStreamBufferSize(const Device& device) { | ||||
|     VkDeviceSize size{0}; | ||||
|     if (device.HasDebuggingToolAttached()) { | ||||
|         ForEachDeviceLocalHostVisibleHeap(device, [&size](size_t index, VkMemoryHeap& heap) { | ||||
|             size = std::max(size, heap.size); | ||||
|         }); | ||||
|         // If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be | ||||
|         // loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue | ||||
|         // as the heap will be much larger. | ||||
|         if (size <= 256_MiB) { | ||||
|             size = size * 40 / 100; | ||||
|         } | ||||
|     } else { | ||||
|         size = MAX_STREAM_BUFFER_SIZE; | ||||
|     } | ||||
|     return std::min(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, | ||||
|                                      Scheduler& scheduler_) | ||||
|     : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { | ||||
|     : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | ||||
|       stream_buffer_size{GetStreamBufferSize(device)}, region_size{stream_buffer_size / | ||||
|                                                                    StagingBufferPool::NUM_SYNCS} { | ||||
|     VkBufferCreateInfo stream_ci = { | ||||
|         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||||
|         .pNext = nullptr, | ||||
|         .flags = 0, | ||||
|         .size = STREAM_BUFFER_SIZE, | ||||
|         .size = stream_buffer_size, | ||||
|         .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | ||||
|                  VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, | ||||
|         .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||||
| @@ -63,7 +76,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem | ||||
| StagingBufferPool::~StagingBufferPool() = default; | ||||
|  | ||||
| StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) { | ||||
|     if (!deferred && usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { | ||||
|     if (!deferred && usage == MemoryUsage::Upload && size <= region_size) { | ||||
|         return GetStreamBuffer(size); | ||||
|     } | ||||
|     return GetStagingBuffer(size, usage, deferred); | ||||
| @@ -101,7 +114,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { | ||||
|     used_iterator = iterator; | ||||
|     free_iterator = std::max(free_iterator, iterator + size); | ||||
|  | ||||
|     if (iterator + size >= STREAM_BUFFER_SIZE) { | ||||
|     if (iterator + size >= stream_buffer_size) { | ||||
|         std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, | ||||
|                   current_tick); | ||||
|         used_iterator = 0; | ||||
|   | ||||
| @@ -90,6 +90,9 @@ private: | ||||
|     void ReleaseCache(MemoryUsage usage); | ||||
|  | ||||
|     void ReleaseLevel(StagingBuffersCache& cache, size_t log2); | ||||
|     size_t Region(size_t iter) const noexcept { | ||||
|         return iter / region_size; | ||||
|     } | ||||
|  | ||||
|     const Device& device; | ||||
|     MemoryAllocator& memory_allocator; | ||||
| @@ -97,6 +100,8 @@ private: | ||||
|  | ||||
|     vk::Buffer stream_buffer; | ||||
|     std::span<u8> stream_pointer; | ||||
|     VkDeviceSize stream_buffer_size; | ||||
|     VkDeviceSize region_size; | ||||
|  | ||||
|     size_t iterator = 0; | ||||
|     size_t used_iterator = 0; | ||||
|   | ||||
| @@ -9,6 +9,7 @@ | ||||
| #include "common/alignment.h" | ||||
| #include "common/assert.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/literals.h" | ||||
| #include "common/logging/log.h" | ||||
| #include "common/polyfill_ranges.h" | ||||
| #include "video_core/vulkan_common/vma.h" | ||||
| @@ -69,8 +70,7 @@ struct Range { | ||||
|     case MemoryUsage::Download: | ||||
|         return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; | ||||
|     case MemoryUsage::DeviceLocal: | ||||
|         return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | | ||||
|                VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT; | ||||
|         return {}; | ||||
|     } | ||||
|     return {}; | ||||
| } | ||||
| @@ -212,7 +212,20 @@ MemoryAllocator::MemoryAllocator(const Device& device_) | ||||
|     : device{device_}, allocator{device.GetAllocator()}, | ||||
|       properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, | ||||
|       buffer_image_granularity{ | ||||
|           device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} | ||||
|           device_.GetPhysical().GetProperties().limits.bufferImageGranularity} { | ||||
|     // GPUs not supporting rebar may only have a region with less than 256MB host visible/device | ||||
|     // local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to | ||||
|     // the heap running out of memory. With RenderDoc attached and only a small host/device region, | ||||
|     // only allow the stream buffer in this memory heap. | ||||
|     if (device.HasDebuggingToolAttached()) { | ||||
|         using namespace Common::Literals; | ||||
|         ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) { | ||||
|             if (heap.size <= 256_MiB) { | ||||
|                 valid_memory_types &= ~(1u << index); | ||||
|             } | ||||
|         }); | ||||
|     } | ||||
| } | ||||
|  | ||||
| MemoryAllocator::~MemoryAllocator() = default; | ||||
|  | ||||
| @@ -244,7 +257,7 @@ vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsa | ||||
|         .usage = MemoryUsageVma(usage), | ||||
|         .requiredFlags = 0, | ||||
|         .preferredFlags = MemoryUsagePreferedVmaFlags(usage), | ||||
|         .memoryTypeBits = 0, | ||||
|         .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types, | ||||
|         .pool = VK_NULL_HANDLE, | ||||
|         .pUserData = nullptr, | ||||
|         .priority = 0.f, | ||||
|   | ||||
| @@ -7,6 +7,7 @@ | ||||
| #include <span> | ||||
| #include <vector> | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/vulkan_common/vulkan_device.h" | ||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
|  | ||||
| VK_DEFINE_HANDLE(VmaAllocator) | ||||
| @@ -26,6 +27,18 @@ enum class MemoryUsage { | ||||
|     Stream,      ///< Requests device local host visible buffer, falling back host memory. | ||||
| }; | ||||
|  | ||||
| template <typename F> | ||||
| void ForEachDeviceLocalHostVisibleHeap(const Device& device, F&& f) { | ||||
|     auto memory_props = device.GetPhysical().GetMemoryProperties().memoryProperties; | ||||
|     for (size_t i = 0; i < memory_props.memoryTypeCount; i++) { | ||||
|         auto& memory_type = memory_props.memoryTypes[i]; | ||||
|         if ((memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && | ||||
|             (memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) { | ||||
|             f(memory_type.heapIndex, memory_props.memoryHeaps[memory_type.heapIndex]); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Ownership handle of a memory commitment. | ||||
| /// Points to a subregion of a memory allocation. | ||||
| class MemoryCommit { | ||||
| @@ -124,6 +137,7 @@ private: | ||||
|     std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. | ||||
|     VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers | ||||
|                                            // and optimal images | ||||
|     u32 valid_memory_types{~0u}; | ||||
| }; | ||||
|  | ||||
| } // namespace Vulkan | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 liamwhite
					liamwhite