gl_rasterizer_cache: Implement caching for texture and framebuffer surfaces.
gl_rasterizer_cache: Improved cache management based on Citra's implementation. gl_surface_cache: Add some docstrings.
This commit is contained in:
		| @@ -435,22 +435,35 @@ void RasterizerOpenGL::DrawArrays() { | ||||
|  | ||||
|     // Mark framebuffer surfaces as dirty | ||||
|     if (color_surface != nullptr && write_color_fb) { | ||||
|         res_cache.FlushSurface(color_surface); | ||||
|         res_cache.MarkSurfaceAsDirty(color_surface); | ||||
|     } | ||||
|     if (depth_surface != nullptr && write_depth_fb) { | ||||
|         res_cache.FlushSurface(depth_surface); | ||||
|         res_cache.MarkSurfaceAsDirty(depth_surface); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} | ||||
|  | ||||
| void RasterizerOpenGL::FlushAll() {} | ||||
| void RasterizerOpenGL::FlushAll() { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); | ||||
| } | ||||
|  | ||||
| void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {} | ||||
| void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     res_cache.FlushRegion(addr, size); | ||||
| } | ||||
|  | ||||
| void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} | ||||
| void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     res_cache.InvalidateRegion(addr, size); | ||||
| } | ||||
|  | ||||
| void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} | ||||
| void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     res_cache.FlushRegion(addr, size); | ||||
|     res_cache.InvalidateRegion(addr, size); | ||||
| } | ||||
|  | ||||
| bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_Blits); | ||||
|   | ||||
| @@ -12,6 +12,7 @@ | ||||
| #include "core/core.h" | ||||
| #include "core/hle/kernel/process.h" | ||||
| #include "core/memory.h" | ||||
| #include "core/settings.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | ||||
| #include "video_core/textures/astc.h" | ||||
| @@ -215,7 +216,7 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup | ||||
|     cur_state.Apply(); | ||||
| } | ||||
|  | ||||
| CachedSurface::CachedSurface(const SurfaceParams& params) : params(params), gl_buffer_size(0) { | ||||
| CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) { | ||||
|     texture.Create(); | ||||
|     const auto& rect{params.GetRect()}; | ||||
|     AllocateSurfaceTexture(texture.handle, | ||||
| @@ -370,6 +371,12 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() { | ||||
|     draw_framebuffer.Create(); | ||||
| } | ||||
|  | ||||
| RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { | ||||
|     while (!surface_cache.empty()) { | ||||
|         UnregisterSurface(surface_cache.begin()->second); | ||||
|     } | ||||
| } | ||||
|  | ||||
| Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { | ||||
|     return GetSurface(SurfaceParams::CreateForTexture(config)); | ||||
| } | ||||
| @@ -425,9 +432,17 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { | ||||
|     surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); | ||||
| } | ||||
|  | ||||
| void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) { | ||||
|     surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); | ||||
|     surface->FlushGLBuffer(); | ||||
| void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) { | ||||
|     if (Settings::values.use_accurate_framebuffers) { | ||||
|         // If enabled, always flush dirty surfaces | ||||
|         surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); | ||||
|         surface->FlushGLBuffer(); | ||||
|     } else { | ||||
|         // Otherwise, don't mark surfaces that we write to as cached, because the resulting loads | ||||
|         // and flushes are very slow and do not seem to improve accuracy | ||||
|         const auto& params{surface->GetSurfaceParams()}; | ||||
|         Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false); | ||||
|     } | ||||
| } | ||||
|  | ||||
| Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { | ||||
| @@ -441,13 +456,16 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { | ||||
|     Surface surface; | ||||
|     if (search != surface_cache.end()) { | ||||
|         surface = search->second; | ||||
|         if (Settings::values.use_accurate_framebuffers) { | ||||
|             // Reload the surface from Switch memory | ||||
|             LoadSurface(surface); | ||||
|         } | ||||
|     } else { | ||||
|         surface = std::make_shared<CachedSurface>(params); | ||||
|         surface_cache[surface_key] = surface; | ||||
|         RegisterSurface(surface); | ||||
|         LoadSurface(surface); | ||||
|     } | ||||
|  | ||||
|     LoadSurface(surface); | ||||
|  | ||||
|     return surface; | ||||
| } | ||||
|  | ||||
| @@ -476,3 +494,87 @@ Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const { | ||||
|  | ||||
|     return surfaces[0]; | ||||
| } | ||||
|  | ||||
| void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*/) { | ||||
|     // TODO(bunnei): This is unused in the current implementation of the rasterizer cache. We should | ||||
|     // probably implement this in the future, but for now, the `use_accurate_framebufers` setting | ||||
|     // can be used to always flush. | ||||
| } | ||||
|  | ||||
| void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) { | ||||
|     for (const auto& pair : surface_cache) { | ||||
|         const auto& surface{pair.second}; | ||||
|         const auto& params{surface->GetSurfaceParams()}; | ||||
|  | ||||
|         if (params.IsOverlappingRegion(addr, size)) { | ||||
|             UnregisterSurface(surface); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { | ||||
|     const auto& params{surface->GetSurfaceParams()}; | ||||
|     const auto& surface_key{SurfaceKey::Create(params)}; | ||||
|     const auto& search{surface_cache.find(surface_key)}; | ||||
|  | ||||
|     if (search != surface_cache.end()) { | ||||
|         // Registered already | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     surface_cache[surface_key] = surface; | ||||
|     UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1); | ||||
| } | ||||
|  | ||||
| void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { | ||||
|     const auto& params{surface->GetSurfaceParams()}; | ||||
|     const auto& surface_key{SurfaceKey::Create(params)}; | ||||
|     const auto& search{surface_cache.find(surface_key)}; | ||||
|  | ||||
|     if (search == surface_cache.end()) { | ||||
|         // Unregistered already | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1); | ||||
|     surface_cache.erase(search); | ||||
| } | ||||
|  | ||||
| template <typename Map, typename Interval> | ||||
| constexpr auto RangeFromInterval(Map& map, const Interval& interval) { | ||||
|     return boost::make_iterator_range(map.equal_range(interval)); | ||||
| } | ||||
|  | ||||
| void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { | ||||
|     const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) - | ||||
|                           (addr >> Tegra::MemoryManager::PAGE_BITS) + 1; | ||||
|     const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS; | ||||
|     const u64 page_end = page_start + num_pages; | ||||
|  | ||||
|     // Interval maps will erase segments if count reaches 0, so if delta is negative we have to | ||||
|     // subtract after iterating | ||||
|     const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); | ||||
|     if (delta > 0) | ||||
|         cached_pages.add({pages_interval, delta}); | ||||
|  | ||||
|     for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { | ||||
|         const auto interval = pair.first & pages_interval; | ||||
|         const int count = pair.second; | ||||
|  | ||||
|         const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) | ||||
|                                                     << Tegra::MemoryManager::PAGE_BITS; | ||||
|         const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) | ||||
|                                                   << Tegra::MemoryManager::PAGE_BITS; | ||||
|         const u64 interval_size = interval_end_addr - interval_start_addr; | ||||
|  | ||||
|         if (delta > 0 && count == delta) | ||||
|             Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); | ||||
|         else if (delta < 0 && count == -delta) | ||||
|             Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); | ||||
|         else | ||||
|             ASSERT(count >= 0); | ||||
|     } | ||||
|  | ||||
|     if (delta < 0) | ||||
|         cached_pages.add({pages_interval, delta}); | ||||
| } | ||||
|   | ||||
| @@ -8,7 +8,7 @@ | ||||
| #include <map> | ||||
| #include <memory> | ||||
| #include <vector> | ||||
|  | ||||
| #include <boost/icl/interval_map.hpp> | ||||
| #include "common/common_types.h" | ||||
| #include "common/hash.h" | ||||
| #include "common/math_util.h" | ||||
| @@ -19,6 +19,7 @@ | ||||
| class CachedSurface; | ||||
| using Surface = std::shared_ptr<CachedSurface>; | ||||
| using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; | ||||
| using PageMap = boost::icl::interval_map<u64, int>; | ||||
|  | ||||
| struct SurfaceParams { | ||||
|     enum class PixelFormat { | ||||
| @@ -243,8 +244,10 @@ struct SurfaceParams { | ||||
|         return SurfaceType::Invalid; | ||||
|     } | ||||
|  | ||||
|     /// Returns the rectangle corresponding to this surface | ||||
|     MathUtil::Rectangle<u32> GetRect() const; | ||||
|  | ||||
|     /// Returns the size of this surface in bytes, adjusted for compression | ||||
|     size_t SizeInBytes() const { | ||||
|         const u32 compression_factor{GetCompressionFactor(pixel_format)}; | ||||
|         ASSERT(width % compression_factor == 0); | ||||
| @@ -253,10 +256,18 @@ struct SurfaceParams { | ||||
|                GetFormatBpp(pixel_format) / CHAR_BIT; | ||||
|     } | ||||
|  | ||||
|     /// Returns the CPU virtual address for this surface | ||||
|     VAddr GetCpuAddr() const; | ||||
|  | ||||
|     /// Returns true if the specified region overlaps with this surface's region in Switch memory | ||||
|     bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const { | ||||
|         return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes); | ||||
|     } | ||||
|  | ||||
|     /// Creates SurfaceParams from a texture configation | ||||
|     static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); | ||||
|  | ||||
|     /// Creates SurfaceParams from a framebuffer configation | ||||
|     static SurfaceParams CreateForFramebuffer( | ||||
|         const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config); | ||||
|  | ||||
| @@ -272,6 +283,7 @@ struct SurfaceParams { | ||||
|     size_t size_in_bytes; | ||||
| }; | ||||
|  | ||||
| /// Hashable variation of SurfaceParams, used for a key in the surface cache | ||||
| struct SurfaceKey : Common::HashableStruct<SurfaceParams> { | ||||
|     static SurfaceKey Create(const SurfaceParams& params) { | ||||
|         SurfaceKey res; | ||||
| @@ -325,18 +337,43 @@ private: | ||||
| class RasterizerCacheOpenGL final : NonCopyable { | ||||
| public: | ||||
|     RasterizerCacheOpenGL(); | ||||
|     ~RasterizerCacheOpenGL(); | ||||
|  | ||||
|     /// Get a surface based on the texture configuration | ||||
|     Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); | ||||
|  | ||||
|     /// Get the color and depth surfaces based on the framebuffer configuration | ||||
|     SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, | ||||
|                                                     const MathUtil::Rectangle<s32>& viewport); | ||||
|     void LoadSurface(const Surface& surface); | ||||
|     void FlushSurface(const Surface& surface); | ||||
|  | ||||
|     /// Marks the specified surface as "dirty", in that it is out of sync with Switch memory | ||||
|     void MarkSurfaceAsDirty(const Surface& surface); | ||||
|  | ||||
|     /// Tries to find a framebuffer GPU address based on the provided CPU address | ||||
|     Surface TryFindFramebufferSurface(VAddr cpu_addr) const; | ||||
|  | ||||
|     /// Write any cached resources overlapping the region back to memory (if dirty) | ||||
|     void FlushRegion(Tegra::GPUVAddr addr, size_t size); | ||||
|  | ||||
|     /// Mark the specified region as being invalidated | ||||
|     void InvalidateRegion(Tegra::GPUVAddr addr, size_t size); | ||||
|  | ||||
| private: | ||||
|     void LoadSurface(const Surface& surface); | ||||
|     Surface GetSurface(const SurfaceParams& params); | ||||
|  | ||||
|     /// Register surface into the cache | ||||
|     void RegisterSurface(const Surface& surface); | ||||
|  | ||||
|     /// Remove surface from the cache | ||||
|     void UnregisterSurface(const Surface& surface); | ||||
|  | ||||
|     /// Increase/decrease the number of surface in pages touching the specified region | ||||
|     void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); | ||||
|  | ||||
|     std::unordered_map<SurfaceKey, Surface> surface_cache; | ||||
|     PageMap cached_pages; | ||||
|  | ||||
|     OGLFramebuffer read_framebuffer; | ||||
|     OGLFramebuffer draw_framebuffer; | ||||
| }; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 bunnei
					bunnei