Query Cachge: Fully rework Vulkan's query cache
This commit is contained in:
		| @@ -130,13 +130,17 @@ void LogSettings() { | |||||||
|     log_path("DataStorage_SDMCDir", Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir)); |     log_path("DataStorage_SDMCDir", Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir)); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void UpdateGPUAccuracy() { | ||||||
|  |     values.current_gpu_accuracy = values.gpu_accuracy.GetValue(); | ||||||
|  | } | ||||||
|  |  | ||||||
| bool IsGPULevelExtreme() { | bool IsGPULevelExtreme() { | ||||||
|     return values.gpu_accuracy.GetValue() == GpuAccuracy::Extreme; |     return values.current_gpu_accuracy == GpuAccuracy::Extreme; | ||||||
| } | } | ||||||
|  |  | ||||||
| bool IsGPULevelHigh() { | bool IsGPULevelHigh() { | ||||||
|     return values.gpu_accuracy.GetValue() == GpuAccuracy::Extreme || |     return values.current_gpu_accuracy == GpuAccuracy::Extreme || | ||||||
|            values.gpu_accuracy.GetValue() == GpuAccuracy::High; |            values.current_gpu_accuracy == GpuAccuracy::High; | ||||||
| } | } | ||||||
|  |  | ||||||
| bool IsFastmemEnabled() { | bool IsFastmemEnabled() { | ||||||
|   | |||||||
| @@ -307,6 +307,7 @@ struct Values { | |||||||
|                                                       Specialization::Default, |                                                       Specialization::Default, | ||||||
|                                                       true, |                                                       true, | ||||||
|                                                       true}; |                                                       true}; | ||||||
|  |     GpuAccuracy current_gpu_accuracy{GpuAccuracy::High}; | ||||||
|     SwitchableSetting<AnisotropyMode, true> max_anisotropy{ |     SwitchableSetting<AnisotropyMode, true> max_anisotropy{ | ||||||
|         linkage,          AnisotropyMode::Automatic, AnisotropyMode::Automatic, AnisotropyMode::X16, |         linkage,          AnisotropyMode::Automatic, AnisotropyMode::Automatic, AnisotropyMode::X16, | ||||||
|         "max_anisotropy", Category::RendererAdvanced}; |         "max_anisotropy", Category::RendererAdvanced}; | ||||||
| @@ -522,6 +523,7 @@ struct Values { | |||||||
|  |  | ||||||
| extern Values values; | extern Values values; | ||||||
|  |  | ||||||
|  | void UpdateGPUAccuracy(); | ||||||
| bool IsGPULevelExtreme(); | bool IsGPULevelExtreme(); | ||||||
| bool IsGPULevelHigh(); | bool IsGPULevelHigh(); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -272,13 +272,20 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad | |||||||
|     if (!cpu_addr) { |     if (!cpu_addr) { | ||||||
|         return {&slot_buffers[NULL_BUFFER_ID], 0}; |         return {&slot_buffers[NULL_BUFFER_ID], 0}; | ||||||
|     } |     } | ||||||
|     const BufferId buffer_id = FindBuffer(*cpu_addr, size); |     return ObtainCPUBuffer(*cpu_addr, size, sync_info, post_op); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class P> | ||||||
|  | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(VAddr cpu_addr, u32 size, | ||||||
|  |                                                                  ObtainBufferSynchronize sync_info, | ||||||
|  |                                                                  ObtainBufferOperation post_op) { | ||||||
|  |     const BufferId buffer_id = FindBuffer(cpu_addr, size); | ||||||
|     Buffer& buffer = slot_buffers[buffer_id]; |     Buffer& buffer = slot_buffers[buffer_id]; | ||||||
|  |  | ||||||
|     // synchronize op |     // synchronize op | ||||||
|     switch (sync_info) { |     switch (sync_info) { | ||||||
|     case ObtainBufferSynchronize::FullSynchronize: |     case ObtainBufferSynchronize::FullSynchronize: | ||||||
|         SynchronizeBuffer(buffer, *cpu_addr, size); |         SynchronizeBuffer(buffer, cpu_addr, size); | ||||||
|         break; |         break; | ||||||
|     default: |     default: | ||||||
|         break; |         break; | ||||||
| @@ -286,11 +293,11 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad | |||||||
|  |  | ||||||
|     switch (post_op) { |     switch (post_op) { | ||||||
|     case ObtainBufferOperation::MarkAsWritten: |     case ObtainBufferOperation::MarkAsWritten: | ||||||
|         MarkWrittenBuffer(buffer_id, *cpu_addr, size); |         MarkWrittenBuffer(buffer_id, cpu_addr, size); | ||||||
|         break; |         break; | ||||||
|     case ObtainBufferOperation::DiscardWrite: { |     case ObtainBufferOperation::DiscardWrite: { | ||||||
|         VAddr cpu_addr_start = Common::AlignDown(*cpu_addr, 64); |         VAddr cpu_addr_start = Common::AlignDown(cpu_addr, 64); | ||||||
|         VAddr cpu_addr_end = Common::AlignUp(*cpu_addr + size, 64); |         VAddr cpu_addr_end = Common::AlignUp(cpu_addr + size, 64); | ||||||
|         IntervalType interval{cpu_addr_start, cpu_addr_end}; |         IntervalType interval{cpu_addr_start, cpu_addr_end}; | ||||||
|         ClearDownload(interval); |         ClearDownload(interval); | ||||||
|         common_ranges.subtract(interval); |         common_ranges.subtract(interval); | ||||||
| @@ -300,7 +307,7 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad | |||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return {&buffer, buffer.Offset(*cpu_addr)}; |     return {&buffer, buffer.Offset(cpu_addr)}; | ||||||
| } | } | ||||||
|  |  | ||||||
| template <class P> | template <class P> | ||||||
|   | |||||||
| @@ -295,6 +295,10 @@ public: | |||||||
|     [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, |     [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, | ||||||
|                                                        ObtainBufferSynchronize sync_info, |                                                        ObtainBufferSynchronize sync_info, | ||||||
|                                                        ObtainBufferOperation post_op); |                                                        ObtainBufferOperation post_op); | ||||||
|  |  | ||||||
|  |     [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size, | ||||||
|  |                                                        ObtainBufferSynchronize sync_info, | ||||||
|  |                                                        ObtainBufferOperation post_op); | ||||||
|     void FlushCachedWrites(); |     void FlushCachedWrites(); | ||||||
|  |  | ||||||
|     /// Return true when there are uncommitted buffers to be downloaded |     /// Return true when there are uncommitted buffers to be downloaded | ||||||
| @@ -335,6 +339,14 @@ public: | |||||||
|  |  | ||||||
|     [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer(); |     [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer(); | ||||||
|  |  | ||||||
|  |     template <typename Func> | ||||||
|  |     void BufferOperations(Func&& func) { | ||||||
|  |         do { | ||||||
|  |             channel_state->has_deleted_buffers = false; | ||||||
|  |             func(); | ||||||
|  |         } while (channel_state->has_deleted_buffers); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     std::recursive_mutex mutex; |     std::recursive_mutex mutex; | ||||||
|     Runtime& runtime; |     Runtime& runtime; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -51,7 +51,7 @@ public: | |||||||
|     virtual void CreateChannel(Tegra::Control::ChannelState& channel); |     virtual void CreateChannel(Tegra::Control::ChannelState& channel); | ||||||
|  |  | ||||||
|     /// Bind a channel for execution. |     /// Bind a channel for execution. | ||||||
|     void BindToChannel(s32 id); |     virtual void BindToChannel(s32 id); | ||||||
|  |  | ||||||
|     /// Erase channel's state. |     /// Erase channel's state. | ||||||
|     void EraseChannel(s32 id); |     void EraseChannel(s32 id); | ||||||
|   | |||||||
| @@ -20,8 +20,6 @@ | |||||||
|  |  | ||||||
| namespace Tegra::Engines { | namespace Tegra::Engines { | ||||||
|  |  | ||||||
| using VideoCore::QueryType; |  | ||||||
|  |  | ||||||
| /// First register id that is actually a Macro call. | /// First register id that is actually a Macro call. | ||||||
| constexpr u32 MacroRegistersStart = 0xE00; | constexpr u32 MacroRegistersStart = 0xE00; | ||||||
|  |  | ||||||
| @@ -500,27 +498,21 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { | |||||||
| } | } | ||||||
|  |  | ||||||
| void Maxwell3D::ProcessQueryGet() { | void Maxwell3D::ProcessQueryGet() { | ||||||
|  |     VideoCommon::QueryPropertiesFlags flags{}; | ||||||
|  |     if (regs.report_semaphore.query.short_query == 0) { | ||||||
|  |         flags |= VideoCommon::QueryPropertiesFlags::HasTimeout; | ||||||
|  |     } | ||||||
|  |     const GPUVAddr sequence_address{regs.report_semaphore.Address()}; | ||||||
|  |     const VideoCommon::QueryType query_type = | ||||||
|  |         static_cast<VideoCommon::QueryType>(regs.report_semaphore.query.report.Value()); | ||||||
|  |     const u32 payload = regs.report_semaphore.payload; | ||||||
|  |     const u32 subreport = regs.report_semaphore.query.sub_report; | ||||||
|     switch (regs.report_semaphore.query.operation) { |     switch (regs.report_semaphore.query.operation) { | ||||||
|     case Regs::ReportSemaphore::Operation::Release: |     case Regs::ReportSemaphore::Operation::Release: | ||||||
|         if (regs.report_semaphore.query.short_query != 0) { |         if (regs.report_semaphore.query.short_query != 0) { | ||||||
|             const GPUVAddr sequence_address{regs.report_semaphore.Address()}; |             flags |= VideoCommon::QueryPropertiesFlags::IsAFence; | ||||||
|             const u32 payload = regs.report_semaphore.payload; |  | ||||||
|             std::function<void()> operation([this, sequence_address, payload] { |  | ||||||
|                 memory_manager.Write<u32>(sequence_address, payload); |  | ||||||
|             }); |  | ||||||
|             rasterizer->SignalFence(std::move(operation)); |  | ||||||
|         } else { |  | ||||||
|             struct LongQueryResult { |  | ||||||
|                 u64_le value; |  | ||||||
|                 u64_le timestamp; |  | ||||||
|             }; |  | ||||||
|             const GPUVAddr sequence_address{regs.report_semaphore.Address()}; |  | ||||||
|             const u32 payload = regs.report_semaphore.payload; |  | ||||||
|             [this, sequence_address, payload] { |  | ||||||
|                 memory_manager.Write<u64>(sequence_address + sizeof(u64), system.GPU().GetTicks()); |  | ||||||
|                 memory_manager.Write<u64>(sequence_address, payload); |  | ||||||
|             }(); |  | ||||||
|         } |         } | ||||||
|  |         rasterizer->Query(sequence_address, query_type, flags, payload, subreport); | ||||||
|         break; |         break; | ||||||
|     case Regs::ReportSemaphore::Operation::Acquire: |     case Regs::ReportSemaphore::Operation::Acquire: | ||||||
|         // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that |         // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that | ||||||
| @@ -528,11 +520,7 @@ void Maxwell3D::ProcessQueryGet() { | |||||||
|         UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); |         UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); | ||||||
|         break; |         break; | ||||||
|     case Regs::ReportSemaphore::Operation::ReportOnly: |     case Regs::ReportSemaphore::Operation::ReportOnly: | ||||||
|         if (const std::optional<u64> result = GetQueryResult()) { |         rasterizer->Query(sequence_address, query_type, flags, payload, subreport); | ||||||
|             // If the query returns an empty optional it means it's cached and deferred. |  | ||||||
|             // In this case we have a non-empty result, so we stamp it immediately. |  | ||||||
|             StampQueryResult(*result, regs.report_semaphore.query.short_query == 0); |  | ||||||
|         } |  | ||||||
|         break; |         break; | ||||||
|     case Regs::ReportSemaphore::Operation::Trap: |     case Regs::ReportSemaphore::Operation::Trap: | ||||||
|         UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); |         UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); | ||||||
| @@ -544,6 +532,10 @@ void Maxwell3D::ProcessQueryGet() { | |||||||
| } | } | ||||||
|  |  | ||||||
| void Maxwell3D::ProcessQueryCondition() { | void Maxwell3D::ProcessQueryCondition() { | ||||||
|  |     if (rasterizer->AccelerateConditionalRendering()) { | ||||||
|  |         execute_on = true; | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|     const GPUVAddr condition_address{regs.render_enable.Address()}; |     const GPUVAddr condition_address{regs.render_enable.Address()}; | ||||||
|     switch (regs.render_enable_override) { |     switch (regs.render_enable_override) { | ||||||
|     case Regs::RenderEnable::Override::AlwaysRender: |     case Regs::RenderEnable::Override::AlwaysRender: | ||||||
| @@ -553,10 +545,6 @@ void Maxwell3D::ProcessQueryCondition() { | |||||||
|         execute_on = false; |         execute_on = false; | ||||||
|         break; |         break; | ||||||
|     case Regs::RenderEnable::Override::UseRenderEnable: { |     case Regs::RenderEnable::Override::UseRenderEnable: { | ||||||
|         if (rasterizer->AccelerateConditionalRendering()) { |  | ||||||
|             execute_on = true; |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
|         switch (regs.render_enable.mode) { |         switch (regs.render_enable.mode) { | ||||||
|         case Regs::RenderEnable::Mode::True: { |         case Regs::RenderEnable::Mode::True: { | ||||||
|             execute_on = true; |             execute_on = true; | ||||||
| @@ -606,7 +594,13 @@ void Maxwell3D::ProcessCounterReset() { | |||||||
| #endif | #endif | ||||||
|     switch (regs.clear_report_value) { |     switch (regs.clear_report_value) { | ||||||
|     case Regs::ClearReport::ZPassPixelCount: |     case Regs::ClearReport::ZPassPixelCount: | ||||||
|         rasterizer->ResetCounter(QueryType::SamplesPassed); |         rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); | ||||||
|  |         break; | ||||||
|  |     case Regs::ClearReport::PrimitivesGenerated: | ||||||
|  |         rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount); | ||||||
|  |         break; | ||||||
|  |     case Regs::ClearReport::VtgPrimitivesOut: | ||||||
|  |         rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount); | ||||||
|         break; |         break; | ||||||
|     default: |     default: | ||||||
|         LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); |         LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); | ||||||
| @@ -620,28 +614,6 @@ void Maxwell3D::ProcessSyncPoint() { | |||||||
|     rasterizer->SignalSyncPoint(sync_point); |     rasterizer->SignalSyncPoint(sync_point); | ||||||
| } | } | ||||||
|  |  | ||||||
| std::optional<u64> Maxwell3D::GetQueryResult() { |  | ||||||
|     switch (regs.report_semaphore.query.report) { |  | ||||||
|     case Regs::ReportSemaphore::Report::Payload: |  | ||||||
|         return regs.report_semaphore.payload; |  | ||||||
|     case Regs::ReportSemaphore::Report::ZPassPixelCount64: |  | ||||||
| #if ANDROID |  | ||||||
|         if (!Settings::IsGPULevelHigh()) { |  | ||||||
|             // This is problematic on Android, disable on GPU Normal. |  | ||||||
|             return 120; |  | ||||||
|         } |  | ||||||
| #endif |  | ||||||
|         // Deferred. |  | ||||||
|         rasterizer->Query(regs.report_semaphore.Address(), QueryType::SamplesPassed, |  | ||||||
|                           system.GPU().GetTicks()); |  | ||||||
|         return std::nullopt; |  | ||||||
|     default: |  | ||||||
|         LOG_DEBUG(HW_GPU, "Unimplemented query report type {}", |  | ||||||
|                   regs.report_semaphore.query.report.Value()); |  | ||||||
|         return 1; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void Maxwell3D::ProcessCBBind(size_t stage_index) { | void Maxwell3D::ProcessCBBind(size_t stage_index) { | ||||||
|     // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader |     // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader | ||||||
|     // stage. |     // stage. | ||||||
|   | |||||||
| @@ -3182,9 +3182,6 @@ private: | |||||||
|     /// Handles writes to syncing register. |     /// Handles writes to syncing register. | ||||||
|     void ProcessSyncPoint(); |     void ProcessSyncPoint(); | ||||||
|  |  | ||||||
|     /// Returns a query's value or an empty object if the value will be deferred through a cache. |  | ||||||
|     std::optional<u64> GetQueryResult(); |  | ||||||
|  |  | ||||||
|     void RefreshParametersImpl(); |     void RefreshParametersImpl(); | ||||||
|  |  | ||||||
|     bool IsMethodExecutable(u32 method); |     bool IsMethodExecutable(u32 method); | ||||||
|   | |||||||
| @@ -362,21 +362,17 @@ void MaxwellDMA::ReleaseSemaphore() { | |||||||
|     const auto type = regs.launch_dma.semaphore_type; |     const auto type = regs.launch_dma.semaphore_type; | ||||||
|     const GPUVAddr address = regs.semaphore.address; |     const GPUVAddr address = regs.semaphore.address; | ||||||
|     const u32 payload = regs.semaphore.payload; |     const u32 payload = regs.semaphore.payload; | ||||||
|  |     VideoCommon::QueryPropertiesFlags flags{VideoCommon::QueryPropertiesFlags::IsAFence}; | ||||||
|     switch (type) { |     switch (type) { | ||||||
|     case LaunchDMA::SemaphoreType::NONE: |     case LaunchDMA::SemaphoreType::NONE: | ||||||
|         break; |         break; | ||||||
|     case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: { |     case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: { | ||||||
|         std::function<void()> operation( |         rasterizer->Query(address, VideoCommon::QueryType::Payload, flags, payload, 0); | ||||||
|             [this, address, payload] { memory_manager.Write<u32>(address, payload); }); |  | ||||||
|         rasterizer->SignalFence(std::move(operation)); |  | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: { |     case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: { | ||||||
|         std::function<void()> operation([this, address, payload] { |         rasterizer->Query(address, VideoCommon::QueryType::Payload, | ||||||
|             memory_manager.Write<u64>(address + sizeof(u64), system.GPU().GetTicks()); |                           flags | VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); | ||||||
|             memory_manager.Write<u64>(address, payload); |  | ||||||
|         }); |  | ||||||
|         rasterizer->SignalFence(std::move(operation)); |  | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     default: |     default: | ||||||
|   | |||||||
| @@ -82,10 +82,7 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||||||
|     if (op == GpuSemaphoreOperation::WriteLong) { |     if (op == GpuSemaphoreOperation::WriteLong) { | ||||||
|         const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; |         const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; | ||||||
|         const u32 payload = regs.semaphore_sequence; |         const u32 payload = regs.semaphore_sequence; | ||||||
|         [this, sequence_address, payload] { |         rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); | ||||||
|             memory_manager.Write<u64>(sequence_address + sizeof(u64), gpu.GetTicks()); |  | ||||||
|             memory_manager.Write<u64>(sequence_address, payload); |  | ||||||
|         }(); |  | ||||||
|     } else { |     } else { | ||||||
|         do { |         do { | ||||||
|             const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; |             const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; | ||||||
| @@ -120,10 +117,7 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||||||
| void Puller::ProcessSemaphoreRelease() { | void Puller::ProcessSemaphoreRelease() { | ||||||
|     const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; |     const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; | ||||||
|     const u32 payload = regs.semaphore_release; |     const u32 payload = regs.semaphore_release; | ||||||
|     std::function<void()> operation([this, sequence_address, payload] { |     rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); | ||||||
|         memory_manager.Write<u32>(sequence_address, payload); |  | ||||||
|     }); |  | ||||||
|     rasterizer->SignalFence(std::move(operation)); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| void Puller::ProcessSemaphoreAcquire() { | void Puller::ProcessSemaphoreAcquire() { | ||||||
| @@ -132,7 +126,6 @@ void Puller::ProcessSemaphoreAcquire() { | |||||||
|     while (word != value) { |     while (word != value) { | ||||||
|         regs.acquire_active = true; |         regs.acquire_active = true; | ||||||
|         regs.acquire_value = value; |         regs.acquire_value = value; | ||||||
|         std::this_thread::sleep_for(std::chrono::milliseconds(1)); |  | ||||||
|         rasterizer->ReleaseFences(); |         rasterizer->ReleaseFences(); | ||||||
|         word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); |         word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); | ||||||
|         // TODO(kemathe73) figure out how to do the acquire_timeout |         // TODO(kemathe73) figure out how to do the acquire_timeout | ||||||
|   | |||||||
| @@ -104,9 +104,28 @@ public: | |||||||
|         SignalFence(std::move(func)); |         SignalFence(std::move(func)); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     void WaitPendingFences() { |     void WaitPendingFences(bool force) { | ||||||
|         if constexpr (!can_async_check) { |         if constexpr (!can_async_check) { | ||||||
|             TryReleasePendingFences<true>(); |             if (force) { | ||||||
|  |                 TryReleasePendingFences<true>(); | ||||||
|  |             } else { | ||||||
|  |                 TryReleasePendingFences<false>(); | ||||||
|  |             } | ||||||
|  |         } else { | ||||||
|  |             if (!force) { | ||||||
|  |                 return; | ||||||
|  |             } | ||||||
|  |             std::mutex wait_mutex; | ||||||
|  |             std::condition_variable wait_cv; | ||||||
|  |             std::atomic<bool> wait_finished{}; | ||||||
|  |             std::function<void()> func([&] { | ||||||
|  |                 std::scoped_lock lk(wait_mutex); | ||||||
|  |                 wait_finished.store(true, std::memory_order_relaxed); | ||||||
|  |                 wait_cv.notify_all(); | ||||||
|  |             }); | ||||||
|  |             SignalFence(std::move(func)); | ||||||
|  |             std::unique_lock lk(wait_mutex); | ||||||
|  |             wait_cv.wait(lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); }); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -102,7 +102,8 @@ struct GPU::Impl { | |||||||
|  |  | ||||||
|     /// Signal the ending of command list. |     /// Signal the ending of command list. | ||||||
|     void OnCommandListEnd() { |     void OnCommandListEnd() { | ||||||
|         rasterizer->ReleaseFences(); |         rasterizer->ReleaseFences(false); | ||||||
|  |         Settings::UpdateGPUAccuracy(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Request a host GPU memory flush from the CPU. |     /// Request a host GPU memory flush from the CPU. | ||||||
| @@ -220,6 +221,7 @@ struct GPU::Impl { | |||||||
|     /// This can be used to launch any necessary threads and register any necessary |     /// This can be used to launch any necessary threads and register any necessary | ||||||
|     /// core timing events. |     /// core timing events. | ||||||
|     void Start() { |     void Start() { | ||||||
|  |         Settings::UpdateGPUAccuracy(); | ||||||
|         gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler); |         gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -41,6 +41,7 @@ set(SHADER_FILES | |||||||
|     pitch_unswizzle.comp |     pitch_unswizzle.comp | ||||||
|     present_bicubic.frag |     present_bicubic.frag | ||||||
|     present_gaussian.frag |     present_gaussian.frag | ||||||
|  |     resolve_conditional_render.comp | ||||||
|     smaa_edge_detection.vert |     smaa_edge_detection.vert | ||||||
|     smaa_edge_detection.frag |     smaa_edge_detection.frag | ||||||
|     smaa_blending_weight_calculation.vert |     smaa_blending_weight_calculation.vert | ||||||
|   | |||||||
							
								
								
									
										20
									
								
								src/video_core/host_shaders/resolve_conditional_render.comp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								src/video_core/host_shaders/resolve_conditional_render.comp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,20 @@ | |||||||
|  | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||||||
|  | // SPDX-License-Identifier: GPL-3.0-or-later | ||||||
|  |  | ||||||
|  | #version 450 | ||||||
|  |  | ||||||
|  | layout(local_size_x = 1) in; | ||||||
|  |  | ||||||
|  | layout(std430, binding = 0) buffer Query { | ||||||
|  |     uvec2 initial; | ||||||
|  |     uvec2 unknown; | ||||||
|  |     uvec2 current; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | layout(std430, binding = 1) buffer Result { | ||||||
|  |     uint result; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | void main() { | ||||||
|  |     result = all(equal(initial, current)) ? 1 : 0; | ||||||
|  | } | ||||||
| @@ -319,6 +319,25 @@ private: | |||||||
|     } |     } | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | class HLE_DrawIndirectByteCount final : public HLEMacroImpl { | ||||||
|  | public: | ||||||
|  |     explicit HLE_DrawIndirectByteCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||||||
|  |  | ||||||
|  |     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||||||
|  |         maxwell3d.RefreshParameters(); | ||||||
|  |  | ||||||
|  |         maxwell3d.regs.draw.begin = parameters[0]; | ||||||
|  |         maxwell3d.regs.draw_auto_stride = parameters[1]; | ||||||
|  |         maxwell3d.regs.draw_auto_byte_count = parameters[2]; | ||||||
|  |  | ||||||
|  |         if (maxwell3d.ShouldExecute()) { | ||||||
|  |             maxwell3d.draw_manager->DrawArray( | ||||||
|  |                 maxwell3d.regs.draw.topology, 0, | ||||||
|  |                 maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  |  | ||||||
| class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { | class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { | ||||||
| public: | public: | ||||||
|     explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |     explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||||||
| @@ -536,6 +555,11 @@ HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { | |||||||
|                          [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |                          [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { | ||||||
|                              return std::make_unique<HLE_TransformFeedbackSetup>(maxwell3d__); |                              return std::make_unique<HLE_TransformFeedbackSetup>(maxwell3d__); | ||||||
|                          })); |                          })); | ||||||
|  |     builders.emplace(0xB5F74EDB717278ECULL, | ||||||
|  |                      std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( | ||||||
|  |                          [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { | ||||||
|  |                              return std::make_unique<HLE_DrawIndirectByteCount>(maxwell3d__); | ||||||
|  |                          })); | ||||||
| } | } | ||||||
|  |  | ||||||
| HLEMacro::~HLEMacro() = default; | HLEMacro::~HLEMacro() = default; | ||||||
|   | |||||||
| @@ -25,6 +25,13 @@ | |||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/texture_cache/slot_vector.h" | #include "video_core/texture_cache/slot_vector.h" | ||||||
|  |  | ||||||
|  | namespace VideoCore { | ||||||
|  | enum class QueryType { | ||||||
|  |     SamplesPassed, | ||||||
|  | }; | ||||||
|  | constexpr std::size_t NumQueryTypes = 1; | ||||||
|  | } // namespace VideoCore | ||||||
|  |  | ||||||
| namespace VideoCommon { | namespace VideoCommon { | ||||||
|  |  | ||||||
| using AsyncJobId = SlotId; | using AsyncJobId = SlotId; | ||||||
| @@ -98,10 +105,10 @@ private: | |||||||
| }; | }; | ||||||
|  |  | ||||||
| template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> | ||||||
| class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | class QueryCacheLegacy : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||||||
| public: | public: | ||||||
|     explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_, |     explicit QueryCacheLegacy(VideoCore::RasterizerInterface& rasterizer_, | ||||||
|                             Core::Memory::Memory& cpu_memory_) |                               Core::Memory::Memory& cpu_memory_) | ||||||
|         : rasterizer{rasterizer_}, |         : rasterizer{rasterizer_}, | ||||||
|           // Use reinterpret_cast instead of static_cast as workaround for |           // Use reinterpret_cast instead of static_cast as workaround for | ||||||
|           // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) |           // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) | ||||||
|   | |||||||
| @@ -9,6 +9,7 @@ | |||||||
| #include <utility> | #include <utility> | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/polyfill_thread.h" | #include "common/polyfill_thread.h" | ||||||
|  | #include "video_core/query_cache/types.h" | ||||||
| #include "video_core/cache_types.h" | #include "video_core/cache_types.h" | ||||||
| #include "video_core/engines/fermi_2d.h" | #include "video_core/engines/fermi_2d.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
| @@ -26,11 +27,6 @@ struct ChannelState; | |||||||
|  |  | ||||||
| namespace VideoCore { | namespace VideoCore { | ||||||
|  |  | ||||||
| enum class QueryType { |  | ||||||
|     SamplesPassed, |  | ||||||
| }; |  | ||||||
| constexpr std::size_t NumQueryTypes = 1; |  | ||||||
|  |  | ||||||
| enum class LoadCallbackStage { | enum class LoadCallbackStage { | ||||||
|     Prepare, |     Prepare, | ||||||
|     Build, |     Build, | ||||||
| @@ -58,10 +54,10 @@ public: | |||||||
|     virtual void DispatchCompute() = 0; |     virtual void DispatchCompute() = 0; | ||||||
|  |  | ||||||
|     /// Resets the counter of a query |     /// Resets the counter of a query | ||||||
|     virtual void ResetCounter(QueryType type) = 0; |     virtual void ResetCounter(VideoCommon::QueryType type) = 0; | ||||||
|  |  | ||||||
|     /// Records a GPU query and caches it |     /// Records a GPU query and caches it | ||||||
|     virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; |     virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0; | ||||||
|  |  | ||||||
|     /// Signal an uniform buffer binding |     /// Signal an uniform buffer binding | ||||||
|     virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |     virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||||||
| @@ -83,7 +79,7 @@ public: | |||||||
|     virtual void SignalReference() = 0; |     virtual void SignalReference() = 0; | ||||||
|  |  | ||||||
|     /// Release all pending fences. |     /// Release all pending fences. | ||||||
|     virtual void ReleaseFences() = 0; |     virtual void ReleaseFences(bool force = true) = 0; | ||||||
|  |  | ||||||
|     /// Notify rasterizer that all caches should be flushed to Switch memory |     /// Notify rasterizer that all caches should be flushed to Switch memory | ||||||
|     virtual void FlushAll() = 0; |     virtual void FlushAll() = 0; | ||||||
|   | |||||||
| @@ -26,16 +26,18 @@ void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} | |||||||
| void RasterizerNull::DrawTexture() {} | void RasterizerNull::DrawTexture() {} | ||||||
| void RasterizerNull::Clear(u32 layer_count) {} | void RasterizerNull::Clear(u32 layer_count) {} | ||||||
| void RasterizerNull::DispatchCompute() {} | void RasterizerNull::DispatchCompute() {} | ||||||
| void RasterizerNull::ResetCounter(VideoCore::QueryType type) {} | void RasterizerNull::ResetCounter(VideoCommon::QueryType type) {} | ||||||
| void RasterizerNull::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | void RasterizerNull::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||||||
|                            std::optional<u64> timestamp) { |                            VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { | ||||||
|     if (!gpu_memory) { |     if (!gpu_memory) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  |     if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { | ||||||
|     gpu_memory->Write(gpu_addr, u64{0}); |         u64 ticks = m_gpu.GetTicks(); | ||||||
|     if (timestamp) { |         gpu_memory->Write<u64>(gpu_addr + 8, ticks); | ||||||
|         gpu_memory->Write(gpu_addr + 8, *timestamp); |         gpu_memory->Write<u64>(gpu_addr, static_cast<u64>(payload)); | ||||||
|  |     } else { | ||||||
|  |         gpu_memory->Write<u32>(gpu_addr, payload); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||||||
| @@ -74,7 +76,7 @@ void RasterizerNull::SignalSyncPoint(u32 value) { | |||||||
|     syncpoint_manager.IncrementHost(value); |     syncpoint_manager.IncrementHost(value); | ||||||
| } | } | ||||||
| void RasterizerNull::SignalReference() {} | void RasterizerNull::SignalReference() {} | ||||||
| void RasterizerNull::ReleaseFences() {} | void RasterizerNull::ReleaseFences(bool) {} | ||||||
| void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | ||||||
| void RasterizerNull::WaitForIdle() {} | void RasterizerNull::WaitForIdle() {} | ||||||
| void RasterizerNull::FragmentBarrier() {} | void RasterizerNull::FragmentBarrier() {} | ||||||
|   | |||||||
| @@ -42,8 +42,8 @@ public: | |||||||
|     void DrawTexture() override; |     void DrawTexture() override; | ||||||
|     void Clear(u32 layer_count) override; |     void Clear(u32 layer_count) override; | ||||||
|     void DispatchCompute() override; |     void DispatchCompute() override; | ||||||
|     void ResetCounter(VideoCore::QueryType type) override; |     void ResetCounter(VideoCommon::QueryType type) override; | ||||||
|     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |     void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; | ||||||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||||||
|     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | ||||||
|     void FlushAll() override; |     void FlushAll() override; | ||||||
| @@ -63,7 +63,7 @@ public: | |||||||
|     void SyncOperation(std::function<void()>&& func) override; |     void SyncOperation(std::function<void()>&& func) override; | ||||||
|     void SignalSyncPoint(u32 value) override; |     void SignalSyncPoint(u32 value) override; | ||||||
|     void SignalReference() override; |     void SignalReference() override; | ||||||
|     void ReleaseFences() override; |     void ReleaseFences(bool force) override; | ||||||
|     void FlushAndInvalidateRegion( |     void FlushAndInvalidateRegion( | ||||||
|         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     void WaitForIdle() override; |     void WaitForIdle() override; | ||||||
|   | |||||||
| @@ -27,7 +27,7 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { | |||||||
| } // Anonymous namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) | ||||||
|     : QueryCacheBase(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {} |     : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {} | ||||||
|  |  | ||||||
| QueryCache::~QueryCache() = default; | QueryCache::~QueryCache() = default; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -26,7 +26,7 @@ class RasterizerOpenGL; | |||||||
| using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | ||||||
|  |  | ||||||
| class QueryCache final | class QueryCache final | ||||||
|     : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { |     : public VideoCommon::QueryCacheLegacy<QueryCache, CachedQuery, CounterStream, HostCounter> { | ||||||
| public: | public: | ||||||
|     explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_); |     explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_); | ||||||
|     ~QueryCache(); |     ~QueryCache(); | ||||||
|   | |||||||
| @@ -396,13 +396,31 @@ void RasterizerOpenGL::DispatchCompute() { | |||||||
|     has_written_global_memory |= pipeline->WritesGlobalMemory(); |     has_written_global_memory |= pipeline->WritesGlobalMemory(); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { | void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) { | ||||||
|     query_cache.ResetCounter(type); |     if (type == VideoCommon::QueryType::ZPassPixelCount64) { | ||||||
|  |         query_cache.ResetCounter(VideoCore::QueryType::SamplesPassed); | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||||||
|                              std::optional<u64> timestamp) { |                              VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { | ||||||
|     query_cache.Query(gpu_addr, type, timestamp); |     if (type == VideoCommon::QueryType::ZPassPixelCount64) { | ||||||
|  |         std::optional<u64> timestamp{True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout) | ||||||
|  |                                      ? std::make_optional<u64>(gpu.GetTicks()) : std:: nullopt }; | ||||||
|  |         if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { | ||||||
|  |             query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()}); | ||||||
|  |         } else { | ||||||
|  |             query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, std::nullopt); | ||||||
|  |         } | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { | ||||||
|  |         u64 ticks = gpu.GetTicks(); | ||||||
|  |         gpu_memory->Write<u64>(gpu_addr + 8, ticks); | ||||||
|  |         gpu_memory->Write<u64>(gpu_addr, static_cast<u64>(payload)); | ||||||
|  |     } else { | ||||||
|  |         gpu_memory->Write<u32>(gpu_addr, payload); | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||||||
| @@ -573,8 +591,8 @@ void RasterizerOpenGL::SignalReference() { | |||||||
|     fence_manager.SignalOrdering(); |     fence_manager.SignalOrdering(); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::ReleaseFences() { | void RasterizerOpenGL::ReleaseFences(bool force) { | ||||||
|     fence_manager.WaitPendingFences(); |     fence_manager.WaitPendingFences(force); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size, | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size, | ||||||
|   | |||||||
| @@ -86,8 +86,8 @@ public: | |||||||
|     void DrawTexture() override; |     void DrawTexture() override; | ||||||
|     void Clear(u32 layer_count) override; |     void Clear(u32 layer_count) override; | ||||||
|     void DispatchCompute() override; |     void DispatchCompute() override; | ||||||
|     void ResetCounter(VideoCore::QueryType type) override; |     void ResetCounter(VideoCommon::QueryType type) override; | ||||||
|     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |     void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; | ||||||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||||||
|     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | ||||||
|     void FlushAll() override; |     void FlushAll() override; | ||||||
| @@ -107,7 +107,7 @@ public: | |||||||
|     void SyncOperation(std::function<void()>&& func) override; |     void SyncOperation(std::function<void()>&& func) override; | ||||||
|     void SignalSyncPoint(u32 value) override; |     void SignalSyncPoint(u32 value) override; | ||||||
|     void SignalReference() override; |     void SignalReference() override; | ||||||
|     void ReleaseFences() override; |     void ReleaseFences(bool force = true) override; | ||||||
|     void FlushAndInvalidateRegion( |     void FlushAndInvalidateRegion( | ||||||
|         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     void WaitForIdle() override; |     void WaitForIdle() override; | ||||||
|   | |||||||
| @@ -61,6 +61,9 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo | |||||||
|     if (device.IsExtTransformFeedbackSupported()) { |     if (device.IsExtTransformFeedbackSupported()) { | ||||||
|         flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; |         flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; | ||||||
|     } |     } | ||||||
|  |     if (device.IsExtConditionalRendering()) { | ||||||
|  |         flags |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT; | ||||||
|  |     } | ||||||
|     const VkBufferCreateInfo buffer_ci = { |     const VkBufferCreateInfo buffer_ci = { | ||||||
|         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||||||
|         .pNext = nullptr, |         .pNext = nullptr, | ||||||
|   | |||||||
| @@ -12,6 +12,7 @@ | |||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/div_ceil.h" | #include "common/div_ceil.h" | ||||||
| #include "video_core/host_shaders/astc_decoder_comp_spv.h" | #include "video_core/host_shaders/astc_decoder_comp_spv.h" | ||||||
|  | #include "video_core/host_shaders/resolve_conditional_render_comp_spv.h" | ||||||
| #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | ||||||
| #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | ||||||
| #include "video_core/renderer_vulkan/vk_compute_pass.h" | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||||||
| @@ -302,6 +303,52 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||||||
|     return {staging.buffer, staging.offset}; |     return {staging.buffer, staging.offset}; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(const Device& device_, | ||||||
|  |                                                                  Scheduler& scheduler_, | ||||||
|  |                                                                  DescriptorPool& descriptor_pool_, ComputePassDescriptorQueue& compute_pass_descriptor_queue_) | ||||||
|  |     : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, | ||||||
|  |                   INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr, | ||||||
|  |                   RESOLVE_CONDITIONAL_RENDER_COMP_SPV), | ||||||
|  |       scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {} | ||||||
|  |  | ||||||
|  | void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_buffer, | ||||||
|  |                                               u32 src_offset, bool compare_to_zero) { | ||||||
|  |     scheduler.RequestOutsideRenderPassOperationContext(); | ||||||
|  |  | ||||||
|  |     const size_t compare_size = compare_to_zero ? 8 : 24; | ||||||
|  |  | ||||||
|  |     compute_pass_descriptor_queue.Acquire(); | ||||||
|  |     compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, compare_size); | ||||||
|  |     compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, sizeof(u32)); | ||||||
|  |     const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; | ||||||
|  |  | ||||||
|  |     scheduler.RequestOutsideRenderPassOperationContext(); | ||||||
|  |     scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { | ||||||
|  |         static constexpr VkMemoryBarrier read_barrier{ | ||||||
|  |             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||||
|  |             .pNext = nullptr, | ||||||
|  |             .srcAccessMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||||||
|  |             .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, | ||||||
|  |         }; | ||||||
|  |         static constexpr VkMemoryBarrier write_barrier{ | ||||||
|  |             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||||
|  |             .pNext = nullptr, | ||||||
|  |             .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||||||
|  |             .dstAccessMask = VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, | ||||||
|  |         }; | ||||||
|  |         const VkDescriptorSet set = descriptor_allocator.Commit(); | ||||||
|  |         device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); | ||||||
|  |  | ||||||
|  |         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||||||
|  |                                VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, read_barrier); | ||||||
|  |         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); | ||||||
|  |         cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); | ||||||
|  |         cmdbuf.Dispatch(1, 1, 1); | ||||||
|  |         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||||||
|  |                                VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0, write_barrier); | ||||||
|  |     }); | ||||||
|  | } | ||||||
|  |  | ||||||
| ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, | ||||||
|                                  DescriptorPool& descriptor_pool_, |                                  DescriptorPool& descriptor_pool_, | ||||||
|                                  StagingBufferPool& staging_buffer_pool_, |                                  StagingBufferPool& staging_buffer_pool_, | ||||||
|   | |||||||
| @@ -82,6 +82,19 @@ private: | |||||||
|     ComputePassDescriptorQueue& compute_pass_descriptor_queue; |     ComputePassDescriptorQueue& compute_pass_descriptor_queue; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | class ConditionalRenderingResolvePass final : public ComputePass { | ||||||
|  | public: | ||||||
|  |     explicit ConditionalRenderingResolvePass( | ||||||
|  |         const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, | ||||||
|  |         ComputePassDescriptorQueue& compute_pass_descriptor_queue_); | ||||||
|  |  | ||||||
|  |     void Resolve(VkBuffer dst_buffer, VkBuffer src_buffer, u32 src_offset, bool compare_to_zero); | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     Scheduler& scheduler; | ||||||
|  |     ComputePassDescriptorQueue& compute_pass_descriptor_queue; | ||||||
|  | }; | ||||||
|  |  | ||||||
| class ASTCDecoderPass final : public ComputePass { | class ASTCDecoderPass final : public ComputePass { | ||||||
| public: | public: | ||||||
|     explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, |     explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, | ||||||
|   | |||||||
| @@ -8,6 +8,7 @@ | |||||||
| #include "video_core/fence_manager.h" | #include "video_core/fence_manager.h" | ||||||
| #include "video_core/renderer_vulkan/vk_buffer_cache.h" | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_texture_cache.h" | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||||||
|  |  | ||||||
| namespace Core { | namespace Core { | ||||||
| class System; | class System; | ||||||
| @@ -20,7 +21,6 @@ class RasterizerInterface; | |||||||
| namespace Vulkan { | namespace Vulkan { | ||||||
|  |  | ||||||
| class Device; | class Device; | ||||||
| class QueryCache; |  | ||||||
| class Scheduler; | class Scheduler; | ||||||
|  |  | ||||||
| class InnerFence : public VideoCommon::FenceBase { | class InnerFence : public VideoCommon::FenceBase { | ||||||
|   | |||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,101 +1,74 @@ | |||||||
| // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||||||
| // SPDX-License-Identifier: GPL-2.0-or-later | // SPDX-License-Identifier: GPL-3.0-or-later | ||||||
|  |  | ||||||
| #pragma once | #pragma once | ||||||
|  |  | ||||||
| #include <cstddef> |  | ||||||
| #include <memory> | #include <memory> | ||||||
| #include <utility> |  | ||||||
| #include <vector> |  | ||||||
|  |  | ||||||
| #include "common/common_types.h" | #include "video_core/query_cache/query_cache_base.h" | ||||||
| #include "video_core/query_cache.h" | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_resource_pool.h" |  | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" |  | ||||||
|  |  | ||||||
| namespace VideoCore { | namespace VideoCore { | ||||||
| class RasterizerInterface; | class RasterizerInterface; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | namespace VideoCommon { | ||||||
|  | class StreamerInterface; | ||||||
|  | } | ||||||
|  |  | ||||||
| namespace Vulkan { | namespace Vulkan { | ||||||
|  |  | ||||||
| class CachedQuery; |  | ||||||
| class Device; | class Device; | ||||||
| class HostCounter; |  | ||||||
| class QueryCache; |  | ||||||
| class Scheduler; | class Scheduler; | ||||||
|  | class StagingBufferPool; | ||||||
|  |  | ||||||
| using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | struct QueryCacheRuntimeImpl; | ||||||
|  |  | ||||||
| class QueryPool final : public ResourcePool { | class QueryCacheRuntime { | ||||||
| public: | public: | ||||||
|     explicit QueryPool(const Device& device, Scheduler& scheduler, VideoCore::QueryType type); |     explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, | ||||||
|     ~QueryPool() override; |                                Core::Memory::Memory& cpu_memory_, | ||||||
|  |                                Vulkan::BufferCache& buffer_cache_, const Device& device_, | ||||||
|  |                                const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, | ||||||
|  |                                StagingBufferPool& staging_pool_, | ||||||
|  |                                ComputePassDescriptorQueue& compute_pass_descriptor_queue, | ||||||
|  |                                DescriptorPool& descriptor_pool); | ||||||
|  |     ~QueryCacheRuntime(); | ||||||
|  |  | ||||||
|     std::pair<VkQueryPool, u32> Commit(); |     template <typename SyncValuesType> | ||||||
|  |     void SyncValues(std::span<SyncValuesType> values, VkBuffer base_src_buffer = nullptr); | ||||||
|  |  | ||||||
|     void Reserve(std::pair<VkQueryPool, u32> query); |     void Barriers(bool is_prebarrier); | ||||||
|  |  | ||||||
| protected: |     void EndHostConditionalRendering(); | ||||||
|     void Allocate(std::size_t begin, std::size_t end) override; |  | ||||||
|  |     void PauseHostConditionalRendering(); | ||||||
|  |  | ||||||
|  |     void ResumeHostConditionalRendering(); | ||||||
|  |  | ||||||
|  |     bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty); | ||||||
|  |  | ||||||
|  |     bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1, | ||||||
|  |                                                VideoCommon::LookupData object_2, bool qc_dirty, bool equal_check); | ||||||
|  |  | ||||||
|  |     VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type); | ||||||
|  |  | ||||||
|  |     void Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d); | ||||||
|  |  | ||||||
|  |     template <typename Func> | ||||||
|  |     void View3DRegs(Func&& func); | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     static constexpr std::size_t GROW_STEP = 512; |     void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal); | ||||||
|  |     void HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal); | ||||||
|     const Device& device; |     friend struct QueryCacheRuntimeImpl; | ||||||
|     const VideoCore::QueryType type; |     std::unique_ptr<QueryCacheRuntimeImpl> impl; | ||||||
|  |  | ||||||
|     std::vector<vk::QueryPool> pools; |  | ||||||
|     std::vector<bool> usage; |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| class QueryCache final | struct QueryCacheParams { | ||||||
|     : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { |     using RuntimeType = Vulkan::QueryCacheRuntime; | ||||||
| public: |  | ||||||
|     explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, |  | ||||||
|                         Core::Memory::Memory& cpu_memory_, const Device& device_, |  | ||||||
|                         Scheduler& scheduler_); |  | ||||||
|     ~QueryCache(); |  | ||||||
|  |  | ||||||
|     std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); |  | ||||||
|  |  | ||||||
|     void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query); |  | ||||||
|  |  | ||||||
|     const Device& GetDevice() const noexcept { |  | ||||||
|         return device; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     Scheduler& GetScheduler() const noexcept { |  | ||||||
|         return scheduler; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
| private: |  | ||||||
|     const Device& device; |  | ||||||
|     Scheduler& scheduler; |  | ||||||
|     std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { | using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>; | ||||||
| public: |  | ||||||
|     explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_, |  | ||||||
|                          VideoCore::QueryType type_); |  | ||||||
|     ~HostCounter(); |  | ||||||
|  |  | ||||||
|     void EndQuery(); |  | ||||||
|  |  | ||||||
| private: |  | ||||||
|     u64 BlockingQuery(bool async = false) const override; |  | ||||||
|  |  | ||||||
|     QueryCache& cache; |  | ||||||
|     const VideoCore::QueryType type; |  | ||||||
|     const std::pair<VkQueryPool, u32> query; |  | ||||||
|     const u64 tick; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> { |  | ||||||
| public: |  | ||||||
|     explicit CachedQuery(QueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_) |  | ||||||
|         : CachedQueryBase{cpu_addr_, host_ptr_} {} |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| } // namespace Vulkan | } // namespace Vulkan | ||||||
|   | |||||||
| @@ -24,6 +24,7 @@ | |||||||
| #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | ||||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||||
| #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_rasterizer.h" | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||||
| #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||||||
| @@ -170,9 +171,11 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||||||
|       buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, |       buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | ||||||
|                            guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), |                            guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), | ||||||
|       buffer_cache(*this, cpu_memory_, buffer_cache_runtime), |       buffer_cache(*this, cpu_memory_, buffer_cache_runtime), | ||||||
|  |       query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler, | ||||||
|  |                           staging_pool, compute_pass_descriptor_queue, descriptor_pool), | ||||||
|  |       query_cache(gpu, *this, cpu_memory_, query_cache_runtime), | ||||||
|       pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, |       pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, | ||||||
|                      render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), |                      render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), | ||||||
|       query_cache{*this, cpu_memory_, device, scheduler}, |  | ||||||
|       accelerate_dma(buffer_cache, texture_cache, scheduler), |       accelerate_dma(buffer_cache, texture_cache, scheduler), | ||||||
|       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | ||||||
|       wfi_event(device.GetLogical().CreateEvent()) { |       wfi_event(device.GetLogical().CreateEvent()) { | ||||||
| @@ -189,13 +192,15 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||||||
|     FlushWork(); |     FlushWork(); | ||||||
|     gpu_memory->FlushCaching(); |     gpu_memory->FlushCaching(); | ||||||
|  |  | ||||||
|  |     query_cache.NotifySegment(true); | ||||||
|  |  | ||||||
| #if ANDROID | #if ANDROID | ||||||
|     if (Settings::IsGPULevelHigh()) { |     if (Settings::IsGPULevelHigh()) { | ||||||
|         // This is problematic on Android, disable on GPU Normal. |         // This is problematic on Android, disable on GPU Normal. | ||||||
|         query_cache.UpdateCounters(); |         // query_cache.UpdateCounters(); | ||||||
|     } |     } | ||||||
| #else | #else | ||||||
|     query_cache.UpdateCounters(); |     // query_cache.UpdateCounters(); | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|     GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; |     GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; | ||||||
| @@ -207,13 +212,12 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||||||
|     pipeline->SetEngine(maxwell3d, gpu_memory); |     pipeline->SetEngine(maxwell3d, gpu_memory); | ||||||
|     pipeline->Configure(is_indexed); |     pipeline->Configure(is_indexed); | ||||||
|  |  | ||||||
|     BeginTransformFeedback(); |  | ||||||
|  |  | ||||||
|     UpdateDynamicStates(); |     UpdateDynamicStates(); | ||||||
|  |  | ||||||
|  |     HandleTransformFeedback(); | ||||||
|  |     query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, | ||||||
|  |                               maxwell3d->regs.zpass_pixel_count_enable); | ||||||
|     draw_func(); |     draw_func(); | ||||||
|  |  | ||||||
|     EndTransformFeedback(); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { | void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { | ||||||
| @@ -241,6 +245,14 @@ void RasterizerVulkan::DrawIndirect() { | |||||||
|         const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer(); |         const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer(); | ||||||
|         const auto& buffer = indirect_buffer.first; |         const auto& buffer = indirect_buffer.first; | ||||||
|         const auto& offset = indirect_buffer.second; |         const auto& offset = indirect_buffer.second; | ||||||
|  |         if (params.is_byte_count) { | ||||||
|  |             scheduler.Record([buffer_obj = buffer->Handle(), offset, | ||||||
|  |                               stride = params.stride](vk::CommandBuffer cmdbuf) { | ||||||
|  |                 cmdbuf.DrawIndirectByteCountEXT(1, 0, buffer_obj, offset, 0, | ||||||
|  |                                                 static_cast<u32>(stride)); | ||||||
|  |             }); | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|         if (params.include_count) { |         if (params.include_count) { | ||||||
|             const auto count = buffer_cache.GetDrawIndirectCount(); |             const auto count = buffer_cache.GetDrawIndirectCount(); | ||||||
|             const auto& draw_buffer = count.first; |             const auto& draw_buffer = count.first; | ||||||
| @@ -280,13 +292,15 @@ void RasterizerVulkan::DrawTexture() { | |||||||
|     SCOPE_EXIT({ gpu.TickWork(); }); |     SCOPE_EXIT({ gpu.TickWork(); }); | ||||||
|     FlushWork(); |     FlushWork(); | ||||||
|  |  | ||||||
|  |     query_cache.NotifySegment(true); | ||||||
|  |  | ||||||
| #if ANDROID | #if ANDROID | ||||||
|     if (Settings::IsGPULevelHigh()) { |     if (Settings::IsGPULevelHigh()) { | ||||||
|         // This is problematic on Android, disable on GPU Normal. |         // This is problematic on Android, disable on GPU Normal. | ||||||
|         query_cache.UpdateCounters(); |         // query_cache.UpdateCounters(); | ||||||
|     } |     } | ||||||
| #else | #else | ||||||
|     query_cache.UpdateCounters(); |     // query_cache.UpdateCounters(); | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|     texture_cache.SynchronizeGraphicsDescriptors(); |     texture_cache.SynchronizeGraphicsDescriptors(); | ||||||
| @@ -294,6 +308,8 @@ void RasterizerVulkan::DrawTexture() { | |||||||
|  |  | ||||||
|     UpdateDynamicStates(); |     UpdateDynamicStates(); | ||||||
|  |  | ||||||
|  |     query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, | ||||||
|  |                               maxwell3d->regs.zpass_pixel_count_enable); | ||||||
|     const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); |     const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); | ||||||
|     const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); |     const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); | ||||||
|     const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); |     const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); | ||||||
| @@ -319,12 +335,16 @@ void RasterizerVulkan::Clear(u32 layer_count) { | |||||||
| #if ANDROID | #if ANDROID | ||||||
|     if (Settings::IsGPULevelHigh()) { |     if (Settings::IsGPULevelHigh()) { | ||||||
|         // This is problematic on Android, disable on GPU Normal. |         // This is problematic on Android, disable on GPU Normal. | ||||||
|         query_cache.UpdateCounters(); |         // query_cache.UpdateCounters(); | ||||||
|     } |     } | ||||||
| #else | #else | ||||||
|     query_cache.UpdateCounters(); |     // query_cache.UpdateCounters(); | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  |     query_cache.NotifySegment(true); | ||||||
|  |     query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, | ||||||
|  |                               maxwell3d->regs.zpass_pixel_count_enable); | ||||||
|  |  | ||||||
|     auto& regs = maxwell3d->regs; |     auto& regs = maxwell3d->regs; | ||||||
|     const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || |     const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || | ||||||
|                            regs.clear_surface.A; |                            regs.clear_surface.A; | ||||||
| @@ -482,13 +502,13 @@ void RasterizerVulkan::DispatchCompute() { | |||||||
|     scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); |     scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { | void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) { | ||||||
|     query_cache.ResetCounter(type); |     query_cache.CounterReset(type); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||||||
|                              std::optional<u64> timestamp) { |                              VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { | ||||||
|     query_cache.Query(gpu_addr, type, timestamp); |     query_cache.CounterReport(gpu_addr, type, flags, payload, subreport); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||||||
| @@ -669,8 +689,8 @@ void RasterizerVulkan::SignalReference() { | |||||||
|     fence_manager.SignalReference(); |     fence_manager.SignalReference(); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerVulkan::ReleaseFences() { | void RasterizerVulkan::ReleaseFences(bool force) { | ||||||
|     fence_manager.WaitPendingFences(); |     fence_manager.WaitPendingFences(force); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, | void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, | ||||||
| @@ -694,6 +714,8 @@ void RasterizerVulkan::WaitForIdle() { | |||||||
|         flags |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; |         flags |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     query_cache.NotifyWFI(); | ||||||
|  |  | ||||||
|     scheduler.RequestOutsideRenderPassOperationContext(); |     scheduler.RequestOutsideRenderPassOperationContext(); | ||||||
|     scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf) { |     scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf) { | ||||||
|         cmdbuf.SetEvent(event, flags); |         cmdbuf.SetEvent(event, flags); | ||||||
| @@ -737,19 +759,7 @@ void RasterizerVulkan::TickFrame() { | |||||||
|  |  | ||||||
| bool RasterizerVulkan::AccelerateConditionalRendering() { | bool RasterizerVulkan::AccelerateConditionalRendering() { | ||||||
|     gpu_memory->FlushCaching(); |     gpu_memory->FlushCaching(); | ||||||
|     if (Settings::IsGPULevelHigh()) { |     return query_cache.AccelerateHostConditionalRendering(); | ||||||
|         // TODO(Blinkhawk): Reimplement Host conditional rendering. |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|     // Medium / Low Hack: stub any checks on queries written into the buffer cache. |  | ||||||
|     const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()}; |  | ||||||
|     Maxwell::ReportSemaphore::Compare cmp; |  | ||||||
|     if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp), |  | ||||||
|                                   VideoCommon::CacheType::BufferCache | |  | ||||||
|                                       VideoCommon::CacheType::QueryCache)) { |  | ||||||
|         return true; |  | ||||||
|     } |  | ||||||
|     return false; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, | ||||||
| @@ -795,6 +805,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||||||
|     if (!image_view) { |     if (!image_view) { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|  |     query_cache.NotifySegment(false); | ||||||
|     screen_info.image = image_view->ImageHandle(); |     screen_info.image = image_view->ImageHandle(); | ||||||
|     screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); |     screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); | ||||||
|     screen_info.width = image_view->size.width; |     screen_info.width = image_view->size.width; | ||||||
| @@ -933,31 +944,18 @@ void RasterizerVulkan::UpdateDynamicStates() { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerVulkan::BeginTransformFeedback() { | void RasterizerVulkan::HandleTransformFeedback() { | ||||||
|     const auto& regs = maxwell3d->regs; |     const auto& regs = maxwell3d->regs; | ||||||
|     if (regs.transform_feedback_enabled == 0) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     if (!device.IsExtTransformFeedbackSupported()) { |     if (!device.IsExtTransformFeedbackSupported()) { | ||||||
|         LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); |         LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || |     query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, | ||||||
|                      regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation)); |                               regs.transform_feedback_enabled); | ||||||
|     scheduler.Record( |     if (regs.transform_feedback_enabled != 0) { | ||||||
|         [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); |         UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || | ||||||
| } |                          regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation)); | ||||||
|  |  | ||||||
| void RasterizerVulkan::EndTransformFeedback() { |  | ||||||
|     const auto& regs = maxwell3d->regs; |  | ||||||
|     if (regs.transform_feedback_enabled == 0) { |  | ||||||
|         return; |  | ||||||
|     } |     } | ||||||
|     if (!device.IsExtTransformFeedbackSupported()) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     scheduler.Record( |  | ||||||
|         [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | ||||||
|   | |||||||
| @@ -84,8 +84,8 @@ public: | |||||||
|     void DrawTexture() override; |     void DrawTexture() override; | ||||||
|     void Clear(u32 layer_count) override; |     void Clear(u32 layer_count) override; | ||||||
|     void DispatchCompute() override; |     void DispatchCompute() override; | ||||||
|     void ResetCounter(VideoCore::QueryType type) override; |     void ResetCounter(VideoCommon::QueryType type) override; | ||||||
|     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |     void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; | ||||||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||||||
|     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | ||||||
|     void FlushAll() override; |     void FlushAll() override; | ||||||
| @@ -106,7 +106,7 @@ public: | |||||||
|     void SyncOperation(std::function<void()>&& func) override; |     void SyncOperation(std::function<void()>&& func) override; | ||||||
|     void SignalSyncPoint(u32 value) override; |     void SignalSyncPoint(u32 value) override; | ||||||
|     void SignalReference() override; |     void SignalReference() override; | ||||||
|     void ReleaseFences() override; |     void ReleaseFences(bool force = true) override; | ||||||
|     void FlushAndInvalidateRegion( |     void FlushAndInvalidateRegion( | ||||||
|         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     void WaitForIdle() override; |     void WaitForIdle() override; | ||||||
| @@ -146,9 +146,7 @@ private: | |||||||
|  |  | ||||||
|     void UpdateDynamicStates(); |     void UpdateDynamicStates(); | ||||||
|  |  | ||||||
|     void BeginTransformFeedback(); |     void HandleTransformFeedback(); | ||||||
|  |  | ||||||
|     void EndTransformFeedback(); |  | ||||||
|  |  | ||||||
|     void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); |     void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | ||||||
|     void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); |     void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); | ||||||
| @@ -195,8 +193,9 @@ private: | |||||||
|     TextureCache texture_cache; |     TextureCache texture_cache; | ||||||
|     BufferCacheRuntime buffer_cache_runtime; |     BufferCacheRuntime buffer_cache_runtime; | ||||||
|     BufferCache buffer_cache; |     BufferCache buffer_cache; | ||||||
|     PipelineCache pipeline_cache; |     QueryCacheRuntime query_cache_runtime; | ||||||
|     QueryCache query_cache; |     QueryCache query_cache; | ||||||
|  |     PipelineCache pipeline_cache; | ||||||
|     AccelerateDMA accelerate_dma; |     AccelerateDMA accelerate_dma; | ||||||
|     FenceManager fence_manager; |     FenceManager fence_manager; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -243,10 +243,10 @@ void Scheduler::AllocateNewContext() { | |||||||
| #if ANDROID | #if ANDROID | ||||||
|         if (Settings::IsGPULevelHigh()) { |         if (Settings::IsGPULevelHigh()) { | ||||||
|             // This is problematic on Android, disable on GPU Normal. |             // This is problematic on Android, disable on GPU Normal. | ||||||
|             query_cache->UpdateCounters(); |             query_cache->NotifySegment(true); | ||||||
|         } |         } | ||||||
| #else | #else | ||||||
|         query_cache->UpdateCounters(); |         query_cache->NotifySegment(true); | ||||||
| #endif | #endif | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -261,11 +261,12 @@ void Scheduler::EndPendingOperations() { | |||||||
| #if ANDROID | #if ANDROID | ||||||
|     if (Settings::IsGPULevelHigh()) { |     if (Settings::IsGPULevelHigh()) { | ||||||
|         // This is problematic on Android, disable on GPU Normal. |         // This is problematic on Android, disable on GPU Normal. | ||||||
|         query_cache->DisableStreams(); |         // query_cache->DisableStreams(); | ||||||
|     } |     } | ||||||
| #else | #else | ||||||
|     query_cache->DisableStreams(); |     // query_cache->DisableStreams(); | ||||||
| #endif | #endif | ||||||
|  |     query_cache->NotifySegment(false); | ||||||
|     EndRenderPass(); |     EndRenderPass(); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -15,6 +15,7 @@ | |||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/polyfill_thread.h" | #include "common/polyfill_thread.h" | ||||||
| #include "video_core/renderer_vulkan/vk_master_semaphore.h" | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||||
|  |  | ||||||
| namespace Vulkan { | namespace Vulkan { | ||||||
| @@ -24,7 +25,6 @@ class Device; | |||||||
| class Framebuffer; | class Framebuffer; | ||||||
| class GraphicsPipeline; | class GraphicsPipeline; | ||||||
| class StateTracker; | class StateTracker; | ||||||
| class QueryCache; |  | ||||||
|  |  | ||||||
| /// The scheduler abstracts command buffer and fence management with an interface that's able to do | /// The scheduler abstracts command buffer and fence management with an interface that's able to do | ||||||
| /// OpenGL-like operations on Vulkan command buffers. | /// OpenGL-like operations on Vulkan command buffers. | ||||||
|   | |||||||
| @@ -61,6 +61,7 @@ VK_DEFINE_HANDLE(VmaAllocator) | |||||||
|  |  | ||||||
| // Define miscellaneous extensions which may be used by the implementation here. | // Define miscellaneous extensions which may be used by the implementation here. | ||||||
| #define FOR_EACH_VK_EXTENSION(EXTENSION)                                                           \ | #define FOR_EACH_VK_EXTENSION(EXTENSION)                                                           \ | ||||||
|  |     EXTENSION(EXT, CONDITIONAL_RENDERING, conditional_rendering)                                   \ | ||||||
|     EXTENSION(EXT, CONSERVATIVE_RASTERIZATION, conservative_rasterization)                         \ |     EXTENSION(EXT, CONSERVATIVE_RASTERIZATION, conservative_rasterization)                         \ | ||||||
|     EXTENSION(EXT, DEPTH_RANGE_UNRESTRICTED, depth_range_unrestricted)                             \ |     EXTENSION(EXT, DEPTH_RANGE_UNRESTRICTED, depth_range_unrestricted)                             \ | ||||||
|     EXTENSION(EXT, MEMORY_BUDGET, memory_budget)                                                   \ |     EXTENSION(EXT, MEMORY_BUDGET, memory_budget)                                                   \ | ||||||
| @@ -93,6 +94,7 @@ VK_DEFINE_HANDLE(VmaAllocator) | |||||||
|  |  | ||||||
| // Define extensions where the absence of the extension may result in a degraded experience. | // Define extensions where the absence of the extension may result in a degraded experience. | ||||||
| #define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME)                                          \ | #define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME)                                          \ | ||||||
|  |     EXTENSION_NAME(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME)                                    \ | ||||||
|     EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)                               \ |     EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)                               \ | ||||||
|     EXTENSION_NAME(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME)                                 \ |     EXTENSION_NAME(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME)                                 \ | ||||||
|     EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME)                                   \ |     EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME)                                   \ | ||||||
| @@ -536,6 +538,10 @@ public: | |||||||
|         return extensions.shader_atomic_int64; |         return extensions.shader_atomic_int64; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     bool IsExtConditionalRendering() const { | ||||||
|  |         return extensions.conditional_rendering; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     bool HasTimelineSemaphore() const; |     bool HasTimelineSemaphore() const; | ||||||
|  |  | ||||||
|     /// Returns the minimum supported version of SPIR-V. |     /// Returns the minimum supported version of SPIR-V. | ||||||
|   | |||||||
| @@ -75,6 +75,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||||||
|     X(vkBeginCommandBuffer); |     X(vkBeginCommandBuffer); | ||||||
|     X(vkBindBufferMemory); |     X(vkBindBufferMemory); | ||||||
|     X(vkBindImageMemory); |     X(vkBindImageMemory); | ||||||
|  |     X(vkCmdBeginConditionalRenderingEXT); | ||||||
|     X(vkCmdBeginQuery); |     X(vkCmdBeginQuery); | ||||||
|     X(vkCmdBeginRenderPass); |     X(vkCmdBeginRenderPass); | ||||||
|     X(vkCmdBeginTransformFeedbackEXT); |     X(vkCmdBeginTransformFeedbackEXT); | ||||||
| @@ -91,6 +92,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||||||
|     X(vkCmdCopyBufferToImage); |     X(vkCmdCopyBufferToImage); | ||||||
|     X(vkCmdCopyImage); |     X(vkCmdCopyImage); | ||||||
|     X(vkCmdCopyImageToBuffer); |     X(vkCmdCopyImageToBuffer); | ||||||
|  |     X(vkCmdCopyQueryPoolResults); | ||||||
|     X(vkCmdDispatch); |     X(vkCmdDispatch); | ||||||
|     X(vkCmdDispatchIndirect); |     X(vkCmdDispatchIndirect); | ||||||
|     X(vkCmdDraw); |     X(vkCmdDraw); | ||||||
| @@ -99,6 +101,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||||||
|     X(vkCmdDrawIndexedIndirect); |     X(vkCmdDrawIndexedIndirect); | ||||||
|     X(vkCmdDrawIndirectCount); |     X(vkCmdDrawIndirectCount); | ||||||
|     X(vkCmdDrawIndexedIndirectCount); |     X(vkCmdDrawIndexedIndirectCount); | ||||||
|  |     X(vkCmdEndConditionalRenderingEXT); | ||||||
|     X(vkCmdEndQuery); |     X(vkCmdEndQuery); | ||||||
|     X(vkCmdEndRenderPass); |     X(vkCmdEndRenderPass); | ||||||
|     X(vkCmdEndTransformFeedbackEXT); |     X(vkCmdEndTransformFeedbackEXT); | ||||||
|   | |||||||
| @@ -185,6 +185,7 @@ struct DeviceDispatch : InstanceDispatch { | |||||||
|     PFN_vkBeginCommandBuffer vkBeginCommandBuffer{}; |     PFN_vkBeginCommandBuffer vkBeginCommandBuffer{}; | ||||||
|     PFN_vkBindBufferMemory vkBindBufferMemory{}; |     PFN_vkBindBufferMemory vkBindBufferMemory{}; | ||||||
|     PFN_vkBindImageMemory vkBindImageMemory{}; |     PFN_vkBindImageMemory vkBindImageMemory{}; | ||||||
|  |     PFN_vkCmdBeginConditionalRenderingEXT vkCmdBeginConditionalRenderingEXT{}; | ||||||
|     PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; |     PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; | ||||||
|     PFN_vkCmdBeginQuery vkCmdBeginQuery{}; |     PFN_vkCmdBeginQuery vkCmdBeginQuery{}; | ||||||
|     PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; |     PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; | ||||||
| @@ -202,6 +203,7 @@ struct DeviceDispatch : InstanceDispatch { | |||||||
|     PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage{}; |     PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage{}; | ||||||
|     PFN_vkCmdCopyImage vkCmdCopyImage{}; |     PFN_vkCmdCopyImage vkCmdCopyImage{}; | ||||||
|     PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{}; |     PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{}; | ||||||
|  |     PFN_vkCmdCopyQueryPoolResults vkCmdCopyQueryPoolResults{}; | ||||||
|     PFN_vkCmdDispatch vkCmdDispatch{}; |     PFN_vkCmdDispatch vkCmdDispatch{}; | ||||||
|     PFN_vkCmdDispatchIndirect vkCmdDispatchIndirect{}; |     PFN_vkCmdDispatchIndirect vkCmdDispatchIndirect{}; | ||||||
|     PFN_vkCmdDraw vkCmdDraw{}; |     PFN_vkCmdDraw vkCmdDraw{}; | ||||||
| @@ -210,6 +212,7 @@ struct DeviceDispatch : InstanceDispatch { | |||||||
|     PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{}; |     PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{}; | ||||||
|     PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{}; |     PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{}; | ||||||
|     PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{}; |     PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{}; | ||||||
|  |     PFN_vkCmdEndConditionalRenderingEXT vkCmdEndConditionalRenderingEXT{}; | ||||||
|     PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; |     PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; | ||||||
|     PFN_vkCmdEndQuery vkCmdEndQuery{}; |     PFN_vkCmdEndQuery vkCmdEndQuery{}; | ||||||
|     PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; |     PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; | ||||||
| @@ -1270,6 +1273,13 @@ public: | |||||||
|                                     regions.data()); |                                     regions.data()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     void CopyQueryPoolResults(VkQueryPool query_pool, u32 first_query, u32 query_count, | ||||||
|  |                               VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize stride, | ||||||
|  |                               VkQueryResultFlags flags) const noexcept { | ||||||
|  |         dld->vkCmdCopyQueryPoolResults(handle, query_pool, first_query, query_count, dst_buffer, | ||||||
|  |                                        dst_offset, stride, flags); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, |     void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, | ||||||
|                     u32 data) const noexcept { |                     u32 data) const noexcept { | ||||||
|         dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data); |         dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data); | ||||||
| @@ -1448,6 +1458,15 @@ public: | |||||||
|                                           counter_buffers, counter_buffer_offsets); |                                           counter_buffers, counter_buffer_offsets); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     void BeginConditionalRenderingEXT( | ||||||
|  |         const VkConditionalRenderingBeginInfoEXT& info) const noexcept { | ||||||
|  |         dld->vkCmdBeginConditionalRenderingEXT(handle, &info); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     void EndConditionalRenderingEXT() const noexcept { | ||||||
|  |         dld->vkCmdEndConditionalRenderingEXT(handle); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     void BeginDebugUtilsLabelEXT(const char* label, std::span<float, 4> color) const noexcept { |     void BeginDebugUtilsLabelEXT(const char* label, std::span<float, 4> color) const noexcept { | ||||||
|         const VkDebugUtilsLabelEXT label_info{ |         const VkDebugUtilsLabelEXT label_info{ | ||||||
|             .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, |             .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Fernando Sahmkow
					Fernando Sahmkow