From d86f9cd70910d4b96ec301e7d532b11d18a290a4 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 7 May 2019 17:30:36 -0400
Subject: [PATCH] Change texture_cache chaching from GPUAddr to CacheAddr

This also reverses the changes to make invalidation and flushing through
the GPU address.
---
 src/video_core/memory_manager.cpp             |   2 +-
 src/video_core/rasterizer_interface.h         |   4 -
 .../renderer_opengl/gl_rasterizer.cpp         |  16 ---
 .../renderer_opengl/gl_rasterizer.h           |   2 -
 src/video_core/texture_cache/surface_base.cpp |   5 +-
 src/video_core/texture_cache/surface_base.h   |  30 +++---
 src/video_core/texture_cache/texture_cache.h  | 102 +++++++-----------
 7 files changed, 60 insertions(+), 101 deletions(-)

diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 74a1441e39..5d8d126c18 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -69,7 +69,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
     const u64 aligned_size{Common::AlignUp(size, page_size)};
     const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
 
-    rasterizer.FlushAndInvalidateRegionEx(gpu_addr, cache_addr, aligned_size);
+    rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
     UnmapRange(gpu_addr, aligned_size);
 
     return gpu_addr;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index d5505ef9c8..3c18d3b1f8 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -49,10 +49,6 @@ public:
     /// and invalidated
     virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
 
-    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    /// and invalidated
-    virtual void FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) = 0;
-
     /// Attempt to use a faster method to perform a surface copy
     virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                                        const Tegra::Engines::Fermi2D::Regs::Surface& dst,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 482d0428c9..77ac963b40 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -737,27 +737,11 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
     buffer_cache.InvalidateRegion(addr, size);
 }
 
-void RasterizerOpenGL::InvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) {
-    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-    if (!addr || !size) {
-        return;
-    }
-    texture_cache.InvalidateRegionEx(gpu_addr, size);
-    shader_cache.InvalidateRegion(addr, size);
-    global_cache.InvalidateRegion(addr, size);
-    buffer_cache.InvalidateRegion(addr, size);
-}
-
 void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
     FlushRegion(addr, size);
     InvalidateRegion(addr, size);
 }
 
-void RasterizerOpenGL::FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) {
-    FlushRegion(addr, size);
-    InvalidateRegionEx(gpu_addr, addr, size);
-}
-
 bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                                              const Tegra::Engines::Fermi2D::Regs::Surface& dst,
                                              const Common::Rectangle<u32>& src_rect,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 971a38ab7d..5c37d3bfa6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -64,9 +64,7 @@ public:
     void FlushAll() override;
     void FlushRegion(CacheAddr addr, u64 size) override;
     void InvalidateRegion(CacheAddr addr, u64 size) override;
-    void InvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size);
     void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
-    void FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) override;
     bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                                const Tegra::Engines::Fermi2D::Regs::Surface& dst,
                                const Common::Rectangle<u32>& src_rect,
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 5273fcb444..0de0bc6561 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -25,7 +25,6 @@ SurfaceBaseImpl::SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams&
     u32 offset = 0;
     mipmap_offsets.resize(params.num_levels);
     mipmap_sizes.resize(params.num_levels);
-    gpu_addr_end = gpu_addr + memory_size;
     for (u32 i = 0; i < params.num_levels; i++) {
         mipmap_offsets[i] = offset;
         mipmap_sizes[i] = params.GetGuestMipmapSize(i);
@@ -99,8 +98,10 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
     }
 }
 
-void SurfaceBaseImpl::FlushBuffer(std::vector<u8>& staging_buffer) {
+void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
+                                  std::vector<u8>& staging_buffer) {
     MICROPROFILE_SCOPE(GPU_Flush_Texture);
+    auto host_ptr = memory_manager.GetPointer(gpu_addr);
     if (params.is_tiled) {
         ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width);
         for (u32 level = 0; level < params.num_levels; ++level) {
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 9c048eb881..74be3237d2 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -45,40 +45,40 @@ class SurfaceBaseImpl {
 public:
     void LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector<u8>& staging_buffer);
 
-    void FlushBuffer(std::vector<u8>& staging_buffer);
+    void FlushBuffer(Tegra::MemoryManager& memory_manager, std::vector<u8>& staging_buffer);
 
     GPUVAddr GetGpuAddr() const {
         return gpu_addr;
     }
 
-    GPUVAddr GetGpuAddrEnd() const {
-        return gpu_addr_end;
-    }
-
-    bool Overlaps(const GPUVAddr start, const GPUVAddr end) const {
-        return (gpu_addr < end) && (gpu_addr_end > start);
+    bool Overlaps(const CacheAddr start, const CacheAddr end) const {
+        return (cache_addr < end) && (cache_addr_end > start);
     }
 
     // Use only when recycling a surface
     void SetGpuAddr(const GPUVAddr new_addr) {
         gpu_addr = new_addr;
-        gpu_addr_end = new_addr + memory_size;
     }
 
     VAddr GetCpuAddr() const {
-        return gpu_addr;
+        return cpu_addr;
     }
 
     void SetCpuAddr(const VAddr new_addr) {
         cpu_addr = new_addr;
     }
 
-    u8* GetHostPtr() const {
-        return host_ptr;
+    CacheAddr GetCacheAddr() const {
+        return cache_addr;
     }
 
-    void SetHostPtr(u8* new_addr) {
-        host_ptr = new_addr;
+    CacheAddr GetCacheAddrEnd() const {
+        return cache_addr_end;
+    }
+
+    void SetCacheAddr(const CacheAddr new_addr) {
+        cache_addr = new_addr;
+        cache_addr_end = new_addr + memory_size;
     }
 
     const SurfaceParams& GetSurfaceParams() const {
@@ -201,13 +201,13 @@ protected:
 
     const SurfaceParams params;
     GPUVAddr gpu_addr{};
-    GPUVAddr gpu_addr_end{};
     std::vector<u32> mipmap_sizes;
     std::vector<u32> mipmap_offsets;
     const std::size_t layer_size;
     const std::size_t memory_size;
     const std::size_t host_memory_size;
-    u8* host_ptr;
+    CacheAddr cache_addr;
+    CacheAddr cache_addr_end{};
     VAddr cpu_addr;
 
 private:
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index f3b28453a7..43aaec0112 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -60,12 +60,6 @@ public:
         }
     }
 
-    void InvalidateRegionEx(GPUVAddr addr, std::size_t size) {
-        for (const auto& surface : GetSurfacesInRegionInner(addr, size)) {
-            Unregister(surface);
-        }
-    }
-
     TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
                             const VideoCommon::Shader::Sampler& entry) {
         const auto gpu_addr{config.tic.Address()};
@@ -154,9 +148,19 @@ public:
         return GetSurface(gpu_addr, params, true).second;
     }
 
-    TSurface TryFindFramebufferSurface(const u8* host_ptr) const {
-        const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))};
-        return it != registered_surfaces.end() ? *it->second.begin() : nullptr;
+    TSurface TryFindFramebufferSurface(const u8* host_ptr) {
+        const CacheAddr cache_addr = ToCacheAddr(host_ptr);
+        if (!cache_addr) {
+            return nullptr;
+        }
+        const CacheAddr page = cache_addr >> registry_page_bits;
+        std::list<TSurface>& list = registry[page];
+        for (auto& s : list) {
+            if (s->GetCacheAddr() == cache_addr) {
+                return s;
+            }
+        }
+        return nullptr;
     }
 
     u64 Tick() {
@@ -181,30 +185,28 @@ protected:
 
     void Register(TSurface surface) {
         const GPUVAddr gpu_addr = surface->GetGpuAddr();
-        u8* host_ptr = memory_manager->GetPointer(gpu_addr);
+        const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr));
         const std::size_t size = surface->GetSizeInBytes();
         const std::optional<VAddr> cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr);
-        if (!host_ptr || !cpu_addr) {
+        if (!cache_ptr || !cpu_addr) {
             LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
                          gpu_addr);
             return;
         }
-        surface->SetHostPtr(host_ptr);
+        surface->SetCacheAddr(cache_ptr);
         surface->SetCpuAddr(*cpu_addr);
-        registered_surfaces.add({GetInterval(host_ptr, size), {surface}});
-        rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
         RegisterInnerCache(surface);
         surface->MarkAsRegistered(true);
+        rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
     }
 
     void Unregister(TSurface surface) {
         if (surface->IsProtected())
             return;
         const GPUVAddr gpu_addr = surface->GetGpuAddr();
-        const void* host_ptr = surface->GetHostPtr();
+        const CacheAddr cache_ptr = surface->GetCacheAddr();
         const std::size_t size = surface->GetSizeInBytes();
         const VAddr cpu_addr = surface->GetCpuAddr();
-        registered_surfaces.erase(GetInterval(host_ptr, size));
         rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
         UnregisterInnerCache(surface);
         surface->MarkAsRegistered(false);
@@ -280,7 +282,7 @@ private:
         }
     }
 
-    std::pair<TSurface, TView> RebuildMirage(TSurface current_surface,
+    std::pair<TSurface, TView> RebuildSurface(TSurface current_surface,
                                              const SurfaceParams& params) {
         const auto gpu_addr = current_surface->GetGpuAddr();
         TSurface new_surface = GetUncachedSurface(gpu_addr, params);
@@ -297,7 +299,7 @@ private:
                                                      const SurfaceParams& params) {
         const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
         if (is_mirage) {
-            return RebuildMirage(current_surface, params);
+            return RebuildSurface(current_surface, params);
         }
         const bool matches_target = current_surface->MatchTarget(params.target);
         if (matches_target) {
@@ -356,7 +358,7 @@ private:
         const auto host_ptr{memory_manager->GetPointer(gpu_addr)};
         const auto cache_addr{ToCacheAddr(host_ptr)};
         const std::size_t candidate_size = params.GetGuestSizeInBytes();
-        auto overlaps{GetSurfacesInRegionInner(gpu_addr, candidate_size)};
+        auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
         if (overlaps.empty()) {
             return InitializeSurface(gpu_addr, params, preserve_contents);
         }
@@ -378,7 +380,7 @@ private:
                 if (s_result == MatchStructureResult::FullMatch) {
                     return ManageStructuralMatch(current_surface, params);
                 } else {
-                    return RebuildMirage(current_surface, params);
+                    return RebuildSurface(current_surface, params);
                 }
             }
             if (current_surface->GetSizeInBytes() <= candidate_size) {
@@ -429,58 +431,40 @@ private:
         }
         staging_buffer.resize(surface->GetHostSizeInBytes());
         surface->DownloadTexture(staging_buffer);
-        surface->FlushBuffer(staging_buffer);
+        surface->FlushBuffer(*memory_manager, staging_buffer);
         surface->MarkAsModified(false, Tick());
     }
 
-    std::vector<TSurface> GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const {
-        if (size == 0) {
-            return {};
-        }
-        const IntervalType interval{cache_addr, cache_addr + size};
-
-        std::vector<TSurface> surfaces;
-        for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) {
-            for (auto& s : pair.second) {
-                if (!s || !s->IsRegistered()) {
-                    continue;
-                }
-                surfaces.push_back(s);
-            }
-        }
-        return surfaces;
-    }
-
     void RegisterInnerCache(TSurface& surface) {
-        GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits;
-        const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits;
+        CacheAddr start = surface->GetCacheAddr() >> registry_page_bits;
+        const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
         while (start <= end) {
-            inner_cache[start].push_back(surface);
+            registry[start].push_back(surface);
             start++;
         }
     }
 
     void UnregisterInnerCache(TSurface& surface) {
-        GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits;
-        const GPUVAddr end = (surface->GetGpuAddrEnd() - 1)  >> inner_cache_page_bits;
+        CacheAddr start = surface->GetCacheAddr() >> registry_page_bits;
+        const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
         while (start <= end) {
-            inner_cache[start].remove(surface);
+            registry[start].remove(surface);
             start++;
         }
     }
 
-    std::vector<TSurface> GetSurfacesInRegionInner(const GPUVAddr gpu_addr, const std::size_t size) {
+    std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) {
         if (size == 0) {
             return {};
         }
-        const GPUVAddr gpu_addr_end = gpu_addr + size;
-        GPUVAddr start = gpu_addr >> inner_cache_page_bits;
-        const GPUVAddr end = (gpu_addr_end - 1) >> inner_cache_page_bits;
+        const CacheAddr cache_addr_end = cache_addr + size;
+        CacheAddr start = cache_addr >> registry_page_bits;
+        const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits;
         std::vector<TSurface> surfaces;
         while (start <= end) {
-            std::list<TSurface>& list = inner_cache[start];
+            std::list<TSurface>& list = registry[start];
             for (auto& s : list) {
-                if (!s->IsPicked() && s->Overlaps(gpu_addr, gpu_addr_end)) {
+                if (!s->IsPicked() && s->Overlaps(cache_addr, cache_addr_end)) {
                     s->MarkAsPicked(true);
                     surfaces.push_back(s);
                 }
@@ -510,11 +494,6 @@ private:
         return {};
     }
 
-    IntervalType GetInterval(const void* host_ptr, const std::size_t size) const {
-        const CacheAddr addr = ToCacheAddr(host_ptr);
-        return IntervalType::right_open(addr, addr + size);
-    }
-
     struct RenderInfo {
         RenderTargetConfig config;
         TSurface target;
@@ -531,11 +510,12 @@ private:
 
     u64 ticks{};
 
-    IntervalMap registered_surfaces;
-
-    static constexpr u64 inner_cache_page_bits{20};
-    static constexpr u64 inner_cache_page_size{1 << inner_cache_page_bits};
-    std::unordered_map<GPUVAddr, std::list<TSurface>> inner_cache;
+    // The internal Cache is different for the Texture Cache. It's based on buckets
+    // of 1MB. This fits better for the purpose of this cache as textures are normaly
+    // large in size.
+    static constexpr u64 registry_page_bits{20};
+    static constexpr u64 registry_page_size{1 << registry_page_bits};
+    std::unordered_map<CacheAddr, std::list<TSurface>> registry;
 
     /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
     /// previously been used. This is to prevent surfaces from being constantly created and