Merge pull request #10942 from FernandoS27/android-is-a-pain-in-the-a--
Memory Tracking: Add mechanism to register small writes when gpu page is contested by GPU
This commit is contained in:
		| @@ -27,6 +27,7 @@ | ||||
| #include "core/file_sys/savedata_factory.h" | ||||
| #include "core/file_sys/vfs_concat.h" | ||||
| #include "core/file_sys/vfs_real.h" | ||||
| #include "core/gpu_dirty_memory_manager.h" | ||||
| #include "core/hid/hid_core.h" | ||||
| #include "core/hle/kernel/k_memory_manager.h" | ||||
| #include "core/hle/kernel/k_process.h" | ||||
| @@ -130,7 +131,10 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, | ||||
| struct System::Impl { | ||||
|     explicit Impl(System& system) | ||||
|         : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{}, | ||||
|           cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system} {} | ||||
|           cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system}, | ||||
|           gpu_dirty_memory_write_manager{} { | ||||
|         memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager); | ||||
|     } | ||||
|  | ||||
|     void Initialize(System& system) { | ||||
|         device_memory = std::make_unique<Core::DeviceMemory>(); | ||||
| @@ -234,6 +238,8 @@ struct System::Impl { | ||||
|         // Setting changes may require a full system reinitialization (e.g., disabling multicore). | ||||
|         ReinitializeIfNecessary(system); | ||||
|  | ||||
|         memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager); | ||||
|  | ||||
|         kernel.Initialize(); | ||||
|         cpu_manager.Initialize(); | ||||
|  | ||||
| @@ -540,6 +546,9 @@ struct System::Impl { | ||||
|  | ||||
|     std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{}; | ||||
|     std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{}; | ||||
|  | ||||
|     std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> | ||||
|         gpu_dirty_memory_write_manager{}; | ||||
| }; | ||||
|  | ||||
| System::System() : impl{std::make_unique<Impl>(*this)} {} | ||||
| @@ -629,10 +638,31 @@ void System::PrepareReschedule(const u32 core_index) { | ||||
|     impl->kernel.PrepareReschedule(core_index); | ||||
| } | ||||
|  | ||||
| Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() { | ||||
|     const std::size_t core = impl->kernel.GetCurrentHostThreadID(); | ||||
|     return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES | ||||
|                                                     ? core | ||||
|                                                     : Core::Hardware::NUM_CPU_CORES - 1]; | ||||
| } | ||||
|  | ||||
| /// Provides a constant reference to the current gou dirty memory manager. | ||||
| const Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() const { | ||||
|     const std::size_t core = impl->kernel.GetCurrentHostThreadID(); | ||||
|     return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES | ||||
|                                                     ? core | ||||
|                                                     : Core::Hardware::NUM_CPU_CORES - 1]; | ||||
| } | ||||
|  | ||||
| size_t System::GetCurrentHostThreadID() const { | ||||
|     return impl->kernel.GetCurrentHostThreadID(); | ||||
| } | ||||
|  | ||||
| void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) { | ||||
|     for (auto& manager : impl->gpu_dirty_memory_write_manager) { | ||||
|         manager.Gather(callback); | ||||
|     } | ||||
| } | ||||
|  | ||||
| PerfStatsResults System::GetAndResetPerfStats() { | ||||
|     return impl->GetAndResetPerfStats(); | ||||
| } | ||||
|   | ||||
| @@ -108,9 +108,10 @@ class CpuManager; | ||||
| class Debugger; | ||||
| class DeviceMemory; | ||||
| class ExclusiveMonitor; | ||||
| class SpeedLimiter; | ||||
| class GPUDirtyMemoryManager; | ||||
| class PerfStats; | ||||
| class Reporter; | ||||
| class SpeedLimiter; | ||||
| class TelemetrySession; | ||||
|  | ||||
| struct PerfStatsResults; | ||||
| @@ -225,6 +226,14 @@ public: | ||||
|     /// Prepare the core emulation for a reschedule | ||||
|     void PrepareReschedule(u32 core_index); | ||||
|  | ||||
|     /// Provides a reference to the gou dirty memory manager. | ||||
|     [[nodiscard]] Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager(); | ||||
|  | ||||
|     /// Provides a constant reference to the current gou dirty memory manager. | ||||
|     [[nodiscard]] const Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager() const; | ||||
|  | ||||
|     void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback); | ||||
|  | ||||
|     [[nodiscard]] size_t GetCurrentHostThreadID() const; | ||||
|  | ||||
|     /// Gets and resets core performance statistics | ||||
|   | ||||
							
								
								
									
										122
									
								
								src/core/gpu_dirty_memory_manager.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								src/core/gpu_dirty_memory_manager.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,122 @@ | ||||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||||
| // SPDX-License-Identifier: GPL-3.0-or-later | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <atomic> | ||||
| #include <bit> | ||||
| #include <functional> | ||||
| #include <mutex> | ||||
| #include <utility> | ||||
| #include <vector> | ||||
|  | ||||
| #include "core/memory.h" | ||||
|  | ||||
| namespace Core { | ||||
|  | ||||
| class GPUDirtyMemoryManager { | ||||
| public: | ||||
|     GPUDirtyMemoryManager() : current{default_transform} { | ||||
|         back_buffer.reserve(256); | ||||
|         front_buffer.reserve(256); | ||||
|     } | ||||
|  | ||||
|     ~GPUDirtyMemoryManager() = default; | ||||
|  | ||||
|     void Collect(VAddr address, size_t size) { | ||||
|         TransformAddress t = BuildTransform(address, size); | ||||
|         TransformAddress tmp, original; | ||||
|         do { | ||||
|             tmp = current.load(std::memory_order_acquire); | ||||
|             original = tmp; | ||||
|             if (tmp.address != t.address) { | ||||
|                 if (IsValid(tmp.address)) { | ||||
|                     std::scoped_lock lk(guard); | ||||
|                     back_buffer.emplace_back(tmp); | ||||
|                     current.exchange(t, std::memory_order_relaxed); | ||||
|                     return; | ||||
|                 } | ||||
|                 tmp.address = t.address; | ||||
|                 tmp.mask = 0; | ||||
|             } | ||||
|             if ((tmp.mask | t.mask) == tmp.mask) { | ||||
|                 return; | ||||
|             } | ||||
|             tmp.mask |= t.mask; | ||||
|         } while (!current.compare_exchange_weak(original, tmp, std::memory_order_release, | ||||
|                                                 std::memory_order_relaxed)); | ||||
|     } | ||||
|  | ||||
|     void Gather(std::function<void(VAddr, size_t)>& callback) { | ||||
|         { | ||||
|             std::scoped_lock lk(guard); | ||||
|             TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); | ||||
|             front_buffer.swap(back_buffer); | ||||
|             if (IsValid(t.address)) { | ||||
|                 front_buffer.emplace_back(t); | ||||
|             } | ||||
|         } | ||||
|         for (auto& transform : front_buffer) { | ||||
|             size_t offset = 0; | ||||
|             u64 mask = transform.mask; | ||||
|             while (mask != 0) { | ||||
|                 const size_t empty_bits = std::countr_zero(mask); | ||||
|                 offset += empty_bits << align_bits; | ||||
|                 mask = mask >> empty_bits; | ||||
|  | ||||
|                 const size_t continuous_bits = std::countr_one(mask); | ||||
|                 callback((static_cast<VAddr>(transform.address) << page_bits) + offset, | ||||
|                          continuous_bits << align_bits); | ||||
|                 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; | ||||
|                 offset += continuous_bits << align_bits; | ||||
|             } | ||||
|         } | ||||
|         front_buffer.clear(); | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     struct alignas(8) TransformAddress { | ||||
|         u32 address; | ||||
|         u32 mask; | ||||
|     }; | ||||
|  | ||||
|     constexpr static size_t page_bits = Memory::YUZU_PAGEBITS - 1; | ||||
|     constexpr static size_t page_size = 1ULL << page_bits; | ||||
|     constexpr static size_t page_mask = page_size - 1; | ||||
|  | ||||
|     constexpr static size_t align_bits = 6U; | ||||
|     constexpr static size_t align_size = 1U << align_bits; | ||||
|     constexpr static size_t align_mask = align_size - 1; | ||||
|     constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; | ||||
|  | ||||
|     bool IsValid(VAddr address) { | ||||
|         return address < (1ULL << 39); | ||||
|     } | ||||
|  | ||||
|     template <typename T> | ||||
|     T CreateMask(size_t top_bit, size_t minor_bit) { | ||||
|         T mask = ~T(0); | ||||
|         mask <<= (sizeof(T) * 8 - top_bit); | ||||
|         mask >>= (sizeof(T) * 8 - top_bit); | ||||
|         mask >>= minor_bit; | ||||
|         mask <<= minor_bit; | ||||
|         return mask; | ||||
|     } | ||||
|  | ||||
|     TransformAddress BuildTransform(VAddr address, size_t size) { | ||||
|         const size_t minor_address = address & page_mask; | ||||
|         const size_t minor_bit = minor_address >> align_bits; | ||||
|         const size_t top_bit = (minor_address + size + align_mask) >> align_bits; | ||||
|         TransformAddress result{}; | ||||
|         result.address = static_cast<u32>(address >> page_bits); | ||||
|         result.mask = CreateMask<u32>(top_bit, minor_bit); | ||||
|         return result; | ||||
|     } | ||||
|  | ||||
|     std::atomic<TransformAddress> current{}; | ||||
|     std::mutex guard; | ||||
|     std::vector<TransformAddress> back_buffer; | ||||
|     std::vector<TransformAddress> front_buffer; | ||||
| }; | ||||
|  | ||||
| } // namespace Core | ||||
| @@ -3,6 +3,7 @@ | ||||
|  | ||||
| #include <algorithm> | ||||
| #include <cstring> | ||||
| #include <span> | ||||
|  | ||||
| #include "common/assert.h" | ||||
| #include "common/atomic_ops.h" | ||||
| @@ -13,6 +14,7 @@ | ||||
| #include "common/swap.h" | ||||
| #include "core/core.h" | ||||
| #include "core/device_memory.h" | ||||
| #include "core/gpu_dirty_memory_manager.h" | ||||
| #include "core/hardware_properties.h" | ||||
| #include "core/hle/kernel/k_page_table.h" | ||||
| #include "core/hle/kernel/k_process.h" | ||||
| @@ -678,7 +680,7 @@ struct Memory::Impl { | ||||
|                 LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, | ||||
|                           GetInteger(vaddr), static_cast<u64>(data)); | ||||
|             }, | ||||
|             [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); }); | ||||
|             [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); }); | ||||
|         if (ptr) { | ||||
|             std::memcpy(ptr, &data, sizeof(T)); | ||||
|         } | ||||
| @@ -692,7 +694,7 @@ struct Memory::Impl { | ||||
|                 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", | ||||
|                           sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); | ||||
|             }, | ||||
|             [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); }); | ||||
|             [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); }); | ||||
|         if (ptr) { | ||||
|             const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); | ||||
|             return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); | ||||
| @@ -707,7 +709,7 @@ struct Memory::Impl { | ||||
|                 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", | ||||
|                           GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0])); | ||||
|             }, | ||||
|             [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(u128)); }); | ||||
|             [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); }); | ||||
|         if (ptr) { | ||||
|             const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); | ||||
|             return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); | ||||
| @@ -717,7 +719,7 @@ struct Memory::Impl { | ||||
|  | ||||
|     void HandleRasterizerDownload(VAddr address, size_t size) { | ||||
|         const size_t core = system.GetCurrentHostThreadID(); | ||||
|         auto& current_area = rasterizer_areas[core]; | ||||
|         auto& current_area = rasterizer_read_areas[core]; | ||||
|         const VAddr end_address = address + size; | ||||
|         if (current_area.start_address <= address && end_address <= current_area.end_address) | ||||
|             [[likely]] { | ||||
| @@ -726,9 +728,31 @@ struct Memory::Impl { | ||||
|         current_area = system.GPU().OnCPURead(address, size); | ||||
|     } | ||||
|  | ||||
|     Common::PageTable* current_page_table = nullptr; | ||||
|     std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{}; | ||||
|     void HandleRasterizerWrite(VAddr address, size_t size) { | ||||
|         const size_t core = system.GetCurrentHostThreadID(); | ||||
|         auto& current_area = rasterizer_write_areas[core]; | ||||
|         VAddr subaddress = address >> YUZU_PAGEBITS; | ||||
|         bool do_collection = current_area.last_address == subaddress; | ||||
|         if (!do_collection) [[unlikely]] { | ||||
|             do_collection = system.GPU().OnCPUWrite(address, size); | ||||
|             if (!do_collection) { | ||||
|                 return; | ||||
|             } | ||||
|             current_area.last_address = subaddress; | ||||
|         } | ||||
|         gpu_dirty_managers[core].Collect(address, size); | ||||
|     } | ||||
|  | ||||
|     struct GPUDirtyState { | ||||
|         VAddr last_address; | ||||
|     }; | ||||
|  | ||||
|     Core::System& system; | ||||
|     Common::PageTable* current_page_table = nullptr; | ||||
|     std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> | ||||
|         rasterizer_read_areas{}; | ||||
|     std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; | ||||
|     std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; | ||||
| }; | ||||
|  | ||||
| Memory::Memory(Core::System& system_) : system{system_} { | ||||
| @@ -876,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size) | ||||
|     impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size); | ||||
| } | ||||
|  | ||||
| void Memory::SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers) { | ||||
|     impl->gpu_dirty_managers = managers; | ||||
| } | ||||
|  | ||||
| Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) { | ||||
|     return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size); | ||||
| } | ||||
|   | ||||
| @@ -5,6 +5,7 @@ | ||||
|  | ||||
| #include <cstddef> | ||||
| #include <memory> | ||||
| #include <span> | ||||
| #include <string> | ||||
| #include "common/typed_address.h" | ||||
| #include "core/hle/result.h" | ||||
| @@ -15,7 +16,8 @@ struct PageTable; | ||||
|  | ||||
| namespace Core { | ||||
| class System; | ||||
| } | ||||
| class GPUDirtyMemoryManager; | ||||
| } // namespace Core | ||||
|  | ||||
| namespace Kernel { | ||||
| class PhysicalMemory; | ||||
| @@ -458,6 +460,8 @@ public: | ||||
|      */ | ||||
|     void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); | ||||
|  | ||||
|     void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); | ||||
|  | ||||
| private: | ||||
|     Core::System& system; | ||||
|  | ||||
|   | ||||
| @@ -115,7 +115,34 @@ void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { | ||||
|  | ||||
| template <class P> | ||||
| void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | ||||
|     memory_tracker.CachedCpuWrite(cpu_addr, size); | ||||
|     const bool is_dirty = IsRegionRegistered(cpu_addr, size); | ||||
|     if (!is_dirty) { | ||||
|         return; | ||||
|     } | ||||
|     VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE); | ||||
|     VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE); | ||||
|     if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { | ||||
|         WriteMemory(cpu_addr, size); | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     tmp_buffer.resize_destructive(size); | ||||
|     cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size); | ||||
|  | ||||
|     InlineMemoryImplementation(cpu_addr, size, tmp_buffer); | ||||
| } | ||||
|  | ||||
| template <class P> | ||||
| bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) { | ||||
|     const bool is_dirty = IsRegionRegistered(cpu_addr, size); | ||||
|     if (!is_dirty) { | ||||
|         return false; | ||||
|     } | ||||
|     if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { | ||||
|         return true; | ||||
|     } | ||||
|     WriteMemory(cpu_addr, size); | ||||
|     return false; | ||||
| } | ||||
|  | ||||
| template <class P> | ||||
| @@ -1553,6 +1580,14 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     InlineMemoryImplementation(dest_address, copy_size, inlined_buffer); | ||||
|  | ||||
|     return true; | ||||
| } | ||||
|  | ||||
| template <class P> | ||||
| void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size, | ||||
|                                                 std::span<const u8> inlined_buffer) { | ||||
|     const IntervalType subtract_interval{dest_address, dest_address + copy_size}; | ||||
|     ClearDownload(subtract_interval); | ||||
|     common_ranges.subtract(subtract_interval); | ||||
| @@ -1574,8 +1609,6 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | ||||
|     } else { | ||||
|         buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); | ||||
|     } | ||||
|  | ||||
|     return true; | ||||
| } | ||||
|  | ||||
| template <class P> | ||||
|   | ||||
| @@ -245,6 +245,8 @@ public: | ||||
|  | ||||
|     void CachedWriteMemory(VAddr cpu_addr, u64 size); | ||||
|  | ||||
|     bool OnCPUWrite(VAddr cpu_addr, u64 size); | ||||
|  | ||||
|     void DownloadMemory(VAddr cpu_addr, u64 size); | ||||
|  | ||||
|     std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); | ||||
| @@ -543,6 +545,9 @@ private: | ||||
|  | ||||
|     void ClearDownload(IntervalType subtract_interval); | ||||
|  | ||||
|     void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, | ||||
|                                     std::span<const u8> inlined_buffer); | ||||
|  | ||||
|     VideoCore::RasterizerInterface& rasterizer; | ||||
|     Core::Memory::Memory& cpu_memory; | ||||
|  | ||||
|   | ||||
| @@ -69,7 +69,6 @@ public: | ||||
|     } | ||||
|  | ||||
|     void SignalFence(std::function<void()>&& func) { | ||||
|         rasterizer.InvalidateGPUCache(); | ||||
|         bool delay_fence = Settings::IsGPULevelHigh(); | ||||
|         if constexpr (!can_async_check) { | ||||
|             TryReleasePendingFences<false>(); | ||||
| @@ -96,6 +95,7 @@ public: | ||||
|             guard.unlock(); | ||||
|             cv.notify_all(); | ||||
|         } | ||||
|         rasterizer.InvalidateGPUCache(); | ||||
|     } | ||||
|  | ||||
|     void SignalSyncPoint(u32 value) { | ||||
|   | ||||
| @@ -95,7 +95,9 @@ struct GPU::Impl { | ||||
|  | ||||
|     /// Synchronizes CPU writes with Host GPU memory. | ||||
|     void InvalidateGPUCache() { | ||||
|         rasterizer->InvalidateGPUCache(); | ||||
|         std::function<void(VAddr, size_t)> callback_writes( | ||||
|             [this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); | ||||
|         system.GatherGPUDirtyMemory(callback_writes); | ||||
|     } | ||||
|  | ||||
|     /// Signal the ending of command list. | ||||
| @@ -299,6 +301,10 @@ struct GPU::Impl { | ||||
|         gpu_thread.InvalidateRegion(addr, size); | ||||
|     } | ||||
|  | ||||
|     bool OnCPUWrite(VAddr addr, u64 size) { | ||||
|         return rasterizer->OnCPUWrite(addr, size); | ||||
|     } | ||||
|  | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
|         gpu_thread.FlushAndInvalidateRegion(addr, size); | ||||
| @@ -561,6 +567,10 @@ void GPU::InvalidateRegion(VAddr addr, u64 size) { | ||||
|     impl->InvalidateRegion(addr, size); | ||||
| } | ||||
|  | ||||
| bool GPU::OnCPUWrite(VAddr addr, u64 size) { | ||||
|     return impl->OnCPUWrite(addr, size); | ||||
| } | ||||
|  | ||||
| void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
|     impl->FlushAndInvalidateRegion(addr, size); | ||||
| } | ||||
|   | ||||
| @@ -250,6 +250,10 @@ public: | ||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated | ||||
|     void InvalidateRegion(VAddr addr, u64 size); | ||||
|  | ||||
|     /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is | ||||
|     /// sensible, false otherwise | ||||
|     bool OnCPUWrite(VAddr addr, u64 size); | ||||
|  | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size); | ||||
|  | ||||
|   | ||||
| @@ -47,7 +47,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system, | ||||
|         } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { | ||||
|             rasterizer->FlushRegion(flush->addr, flush->size); | ||||
|         } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { | ||||
|             rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); | ||||
|             rasterizer->OnCacheInvalidation(invalidate->addr, invalidate->size); | ||||
|         } else { | ||||
|             ASSERT(false); | ||||
|         } | ||||
| @@ -102,12 +102,12 @@ void ThreadManager::TickGPU() { | ||||
| } | ||||
|  | ||||
| void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | ||||
|     rasterizer->OnCPUWrite(addr, size); | ||||
|     rasterizer->OnCacheInvalidation(addr, size); | ||||
| } | ||||
|  | ||||
| void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
|     // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important | ||||
|     rasterizer->OnCPUWrite(addr, size); | ||||
|     rasterizer->OnCacheInvalidation(addr, size); | ||||
| } | ||||
|  | ||||
| u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { | ||||
|   | ||||
| @@ -109,7 +109,9 @@ public: | ||||
|     } | ||||
|  | ||||
|     /// Notify rasterizer that any caches of the specified region are desync with guest | ||||
|     virtual void OnCPUWrite(VAddr addr, u64 size) = 0; | ||||
|     virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0; | ||||
|  | ||||
|     virtual bool OnCPUWrite(VAddr addr, u64 size) = 0; | ||||
|  | ||||
|     /// Sync memory between guest and host. | ||||
|     virtual void InvalidateGPUCache() = 0; | ||||
|   | ||||
| @@ -47,7 +47,10 @@ bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheTyp | ||||
|     return false; | ||||
| } | ||||
| void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | ||||
| void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {} | ||||
| bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) { | ||||
|     return false; | ||||
| } | ||||
| void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {} | ||||
| VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) { | ||||
|     VideoCore::RasterizerDownloadArea new_area{ | ||||
|         .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), | ||||
|   | ||||
| @@ -53,7 +53,8 @@ public: | ||||
|                          VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     void InvalidateRegion(VAddr addr, u64 size, | ||||
|                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     void OnCPUWrite(VAddr addr, u64 size) override; | ||||
|     void OnCacheInvalidation(VAddr addr, u64 size) override; | ||||
|     bool OnCPUWrite(VAddr addr, u64 size) override; | ||||
|     VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | ||||
|     void InvalidateGPUCache() override; | ||||
|     void UnmapMemory(VAddr addr, u64 size) override; | ||||
|   | ||||
| @@ -485,12 +485,33 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache | ||||
|     } | ||||
| } | ||||
|  | ||||
| void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | ||||
| bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         if (buffer_cache.OnCPUWrite(addr, size)) { | ||||
|             return true; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     { | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.WriteMemory(addr, size); | ||||
|     } | ||||
|  | ||||
|     shader_cache.InvalidateRegion(addr, size); | ||||
|     return false; | ||||
| } | ||||
|  | ||||
| void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
|     } | ||||
|     shader_cache.OnCPUWrite(addr, size); | ||||
|     { | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.WriteMemory(addr, size); | ||||
| @@ -499,15 +520,11 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.CachedWriteMemory(addr, size); | ||||
|     } | ||||
|     shader_cache.InvalidateRegion(addr, size); | ||||
| } | ||||
|  | ||||
| void RasterizerOpenGL::InvalidateGPUCache() { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     shader_cache.SyncGuestHost(); | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.FlushCachedWrites(); | ||||
|     } | ||||
|     gpu.InvalidateGPUCache(); | ||||
| } | ||||
|  | ||||
| void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | ||||
| @@ -519,7 +536,7 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.WriteMemory(addr, size); | ||||
|     } | ||||
|     shader_cache.OnCPUWrite(addr, size); | ||||
|     shader_cache.OnCacheInvalidation(addr, size); | ||||
| } | ||||
|  | ||||
| void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { | ||||
|   | ||||
| @@ -98,7 +98,8 @@ public: | ||||
|     VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(VAddr addr, u64 size, | ||||
|                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     void OnCPUWrite(VAddr addr, u64 size) override; | ||||
|     void OnCacheInvalidation(VAddr addr, u64 size) override; | ||||
|     bool OnCPUWrite(VAddr addr, u64 size) override; | ||||
|     void InvalidateGPUCache() override; | ||||
|     void UnmapMemory(VAddr addr, u64 size) override; | ||||
|     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | ||||
|   | ||||
| @@ -566,11 +566,32 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s | ||||
|     } | ||||
| } | ||||
|  | ||||
| void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | ||||
| bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         if (buffer_cache.OnCPUWrite(addr, size)) { | ||||
|             return true; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     { | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.WriteMemory(addr, size); | ||||
|     } | ||||
|  | ||||
|     pipeline_cache.InvalidateRegion(addr, size); | ||||
|     return false; | ||||
| } | ||||
|  | ||||
| void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) { | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
|     } | ||||
|     pipeline_cache.OnCPUWrite(addr, size); | ||||
|  | ||||
|     { | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.WriteMemory(addr, size); | ||||
| @@ -579,14 +600,11 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.CachedWriteMemory(addr, size); | ||||
|     } | ||||
|     pipeline_cache.InvalidateRegion(addr, size); | ||||
| } | ||||
|  | ||||
| void RasterizerVulkan::InvalidateGPUCache() { | ||||
|     pipeline_cache.SyncGuestHost(); | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.FlushCachedWrites(); | ||||
|     } | ||||
|     gpu.InvalidateGPUCache(); | ||||
| } | ||||
|  | ||||
| void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | ||||
| @@ -598,7 +616,7 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.WriteMemory(addr, size); | ||||
|     } | ||||
|     pipeline_cache.OnCPUWrite(addr, size); | ||||
|     pipeline_cache.OnCacheInvalidation(addr, size); | ||||
| } | ||||
|  | ||||
| void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { | ||||
|   | ||||
| @@ -96,7 +96,8 @@ public: | ||||
|     void InvalidateRegion(VAddr addr, u64 size, | ||||
|                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; | ||||
|     void OnCPUWrite(VAddr addr, u64 size) override; | ||||
|     void OnCacheInvalidation(VAddr addr, u64 size) override; | ||||
|     bool OnCPUWrite(VAddr addr, u64 size) override; | ||||
|     void InvalidateGPUCache() override; | ||||
|     void UnmapMemory(VAddr addr, u64 size) override; | ||||
|     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | ||||
|   | ||||
| @@ -24,7 +24,7 @@ void ShaderCache::InvalidateRegion(VAddr addr, size_t size) { | ||||
|     RemovePendingShaders(); | ||||
| } | ||||
|  | ||||
| void ShaderCache::OnCPUWrite(VAddr addr, size_t size) { | ||||
| void ShaderCache::OnCacheInvalidation(VAddr addr, size_t size) { | ||||
|     std::scoped_lock lock{invalidation_mutex}; | ||||
|     InvalidatePagesInRegion(addr, size); | ||||
| } | ||||
|   | ||||
| @@ -62,7 +62,7 @@ public: | ||||
|     /// @brief Unmarks a memory region as cached and marks it for removal | ||||
|     /// @param addr Start address of the CPU write operation | ||||
|     /// @param size Number of bytes of the CPU write operation | ||||
|     void OnCPUWrite(VAddr addr, size_t size); | ||||
|     void OnCacheInvalidation(VAddr addr, size_t size); | ||||
|  | ||||
|     /// @brief Flushes delayed removal operations | ||||
|     void SyncGuestHost(); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 liamwhite
					liamwhite