From e958b2c82875cf50c26854dd8fe651fa246dff9b Mon Sep 17 00:00:00 2001 From: Phantom Date: Mon, 25 Sep 2017 00:00:22 +0200 Subject: [PATCH 1/5] texcache upgrade --- src/core/hle/kernel/vm_manager.cpp | 1 - src/core/hle/service/gsp_gpu.cpp | 5 +- src/core/hw/gpu.cpp | 23 +- src/core/memory.cpp | 42 +- src/core/memory.h | 22 +- src/tests/core/arm/arm_test_common.cpp | 1 - src/video_core/rasterizer_interface.h | 3 + .../renderer_opengl/gl_rasterizer.cpp | 374 ++-- .../renderer_opengl/gl_rasterizer.h | 1 + .../renderer_opengl/gl_rasterizer_cache.cpp | 1567 ++++++++++------- .../renderer_opengl/gl_rasterizer_cache.h | 190 +- src/video_core/swrasterizer/swrasterizer.h | 1 + 12 files changed, 1287 insertions(+), 943 deletions(-) diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 7a007c065..2d6af0cbb 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -58,7 +58,6 @@ void VMManager::Reset() { page_table.pointers.fill(nullptr); page_table.attributes.fill(Memory::PageType::Unmapped); - page_table.cached_res_count.fill(0); UpdatePageTableForVMA(initial_vma); } diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index 88684b82d..be95718e9 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp @@ -476,10 +476,11 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { // TODO: Consider attempting rasterizer-accelerated surface blit if that usage is ever // possible/likely Memory::RasterizerFlushVirtualRegion(command.dma_request.source_address, - command.dma_request.size, Memory::FlushMode::Flush); + command.dma_request.size, + Memory::FlushMode::Flush); Memory::RasterizerFlushVirtualRegion(command.dma_request.dest_address, command.dma_request.size, - Memory::FlushMode::FlushAndInvalidate); + Memory::FlushMode::Invalidate); // TODO(Subv): These memory accesses should not go through the application's memory mapping. // They should go through the GSP module's memory mapping. diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 83ad9d898..9a6458d8e 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -96,20 +96,11 @@ static void MemoryFill(const Regs::MemoryFillConfig& config) { u8* start = Memory::GetPhysicalPointer(start_addr); u8* end = Memory::GetPhysicalPointer(end_addr); - // TODO: Consider always accelerating and returning vector of - // regions that the accelerated fill did not cover to - // reduce/eliminate the fill that the cpu has to do. - // This would also mean that the flush below is not needed. - // Fill should first flush all surfaces that touch but are - // not completely within the fill range. - // Then fill all completely covered surfaces, and return the - // regions that were between surfaces or within the touching - // ones for cpu to manually fill here. if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) return; - Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), - config.GetEndAddress() - config.GetStartAddress()); + Memory::RasterizerInvalidateRegion(config.GetStartAddress(), + config.GetEndAddress() - config.GetStartAddress()); if (config.fill_24bit) { // fill with 24-bit values @@ -199,7 +190,7 @@ static void DisplayTransfer(const Regs::DisplayTransferConfig& config) { u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); - Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); + Memory::RasterizerInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); for (u32 y = 0; y < output_height; ++y) { for (u32 x = 0; x < output_width; ++x) { @@ -363,8 +354,12 @@ static void TextureCopy(const Regs::DisplayTransferConfig& config) { size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); - Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), - static_cast(contiguous_output_size)); + // Only need to flush output if it has a gap + const auto FlushInvalidate_fn = (output_gap != 0) ? + Memory::RasterizerFlushAndInvalidateRegion : + Memory::RasterizerInvalidateRegion; + FlushInvalidate_fn(config.GetPhysicalOutputAddress(), + static_cast(contiguous_output_size)); u32 remaining_input = input_width; u32 remaining_output = output_width; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 9b394f84b..dde010d4d 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -50,7 +50,6 @@ static void MapPages(PageTable& page_table, u32 base, u32 size, u8* memory, Page page_table.attributes[base] = type; page_table.pointers[base] = memory; - page_table.cached_res_count[base] = 0; base += 1; if (memory != nullptr) @@ -187,7 +186,7 @@ void Write(const VAddr vaddr, const T data) { ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); break; case PageType::RasterizerCachedMemory: { - RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate); + RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T)); break; } @@ -195,7 +194,7 @@ void Write(const VAddr vaddr, const T data) { WriteMMIO(GetMMIOHandler(vaddr), vaddr, data); break; case PageType::RasterizerCachedSpecial: { - RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate); + RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); WriteMMIO(GetMMIOHandler(vaddr), vaddr, data); break; } @@ -315,7 +314,7 @@ u8* GetPhysicalPointer(PAddr address) { return target_pointer; } -void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { +void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached) { if (start == 0) { return; } @@ -336,14 +335,10 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { } VAddr vaddr = *maybe_vaddr; - u8& res_count = current_page_table->cached_res_count[vaddr >> PAGE_BITS]; - ASSERT_MSG(count_delta <= UINT8_MAX - res_count, - "Rasterizer resource cache counter overflow!"); - ASSERT_MSG(count_delta >= -res_count, "Rasterizer resource cache counter underflow!"); + PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; - // Switch page type to cached if now cached - if (res_count == 0) { - PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; + if (cached) { + // Switch page type to cached switch (page_type) { case PageType::Unmapped: // It is not necessary for a process to have this region mapped into its address @@ -360,12 +355,8 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { UNREACHABLE(); } } - - res_count += count_delta; - - // Switch page type to uncached if now uncached - if (res_count == 0) { - PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; + else { + // Switch page type to uncached switch (page_type) { case PageType::Unmapped: // It is not necessary for a process to have this region mapped into its address @@ -400,6 +391,12 @@ void RasterizerFlushRegion(PAddr start, u32 size) { } } +void RasterizerInvalidateRegion(PAddr start, u32 size) { + if (VideoCore::g_renderer != nullptr) { + VideoCore::g_renderer->Rasterizer()->InvalidateRegion(start, size); + } +} + void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) { // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be // null here @@ -431,6 +428,9 @@ void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) { case FlushMode::Flush: rasterizer->FlushRegion(physical_start, overlap_size); break; + case FlushMode::Invalidate: + rasterizer->InvalidateRegion(physical_start, overlap_size); + break; case FlushMode::FlushAndInvalidate: rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size); break; @@ -556,13 +556,13 @@ void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size break; } case PageType::RasterizerCachedMemory: { - RasterizerFlushVirtualRegion(current_vaddr, copy_amount, FlushMode::FlushAndInvalidate); + RasterizerFlushVirtualRegion(current_vaddr, copy_amount, FlushMode::Invalidate); std::memcpy(GetPointerFromVMA(current_vaddr), src_buffer, copy_amount); break; } case PageType::RasterizerCachedSpecial: { DEBUG_ASSERT(GetMMIOHandler(current_vaddr)); - RasterizerFlushVirtualRegion(current_vaddr, copy_amount, FlushMode::FlushAndInvalidate); + RasterizerFlushVirtualRegion(current_vaddr, copy_amount, FlushMode::Invalidate); GetMMIOHandler(current_vaddr)->WriteBlock(current_vaddr, src_buffer, copy_amount); break; } @@ -608,13 +608,13 @@ void ZeroBlock(const VAddr dest_addr, const size_t size) { break; } case PageType::RasterizerCachedMemory: { - RasterizerFlushVirtualRegion(current_vaddr, copy_amount, FlushMode::FlushAndInvalidate); + RasterizerFlushVirtualRegion(current_vaddr, copy_amount, FlushMode::Invalidate); std::memset(GetPointerFromVMA(current_vaddr), 0, copy_amount); break; } case PageType::RasterizerCachedSpecial: { DEBUG_ASSERT(GetMMIOHandler(current_vaddr)); - RasterizerFlushVirtualRegion(current_vaddr, copy_amount, FlushMode::FlushAndInvalidate); + RasterizerFlushVirtualRegion(current_vaddr, copy_amount, FlushMode::Invalidate); GetMMIOHandler(current_vaddr)->WriteBlock(current_vaddr, zeros.data(), copy_amount); break; } diff --git a/src/core/memory.h b/src/core/memory.h index 1865bfea0..052935f74 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -68,12 +68,6 @@ struct PageTable { * the corresponding entry in `pointers` MUST be set to null. */ std::array attributes; - - /** - * Indicates the number of externally cached resources touching a page that should be - * flushed before the memory is accessed - */ - std::array cached_res_count; }; /// Physical memory regions as seen from the ARM11 @@ -232,16 +226,20 @@ boost::optional PhysicalToVirtualAddress(PAddr addr); u8* GetPhysicalPointer(PAddr address); /** - * Adds the supplied value to the rasterizer resource cache counter of each - * page touching the region. + * Mark each page touching the region as cached. */ -void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta); +void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached); /** - * Flushes any externally cached rasterizer resources touching the given region. - */ +* Flushes any externally cached rasterizer resources touching the given region. +*/ void RasterizerFlushRegion(PAddr start, u32 size); +/** +* Invalidates any externally cached rasterizer resources touching the given region. +*/ +void RasterizerInvalidateRegion(PAddr start, u32 size); + /** * Flushes and invalidates any externally cached rasterizer resources touching the given region. */ @@ -250,6 +248,8 @@ void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size); enum class FlushMode { /// Write back modified surfaces to RAM Flush, + /// Remove region from the cache + Invalidate, /// Write back modified surfaces to RAM, and also remove them from the cache FlushAndInvalidate, }; diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp index cfe0d503a..a43cf892f 100644 --- a/src/tests/core/arm/arm_test_common.cpp +++ b/src/tests/core/arm/arm_test_common.cpp @@ -16,7 +16,6 @@ TestEnvironment::TestEnvironment(bool mutable_memory_) page_table.pointers.fill(nullptr); page_table.attributes.fill(Memory::PageType::Unmapped); - page_table.cached_res_count.fill(0); Memory::MapIoRegion(page_table, 0x00000000, 0x80000000, test_memory); Memory::MapIoRegion(page_table, 0x80000000, 0x80000000, test_memory); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 8ef7e74c7..1d4c98189 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -38,6 +38,9 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory virtual void FlushRegion(PAddr addr, u32 size) = 0; + /// Notify rasterizer that any caches of the specified region should be invalidated + virtual void InvalidateRegion(PAddr addr, u32 size) = 0; + /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory /// and invalidated virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7e09e4712..b1adc156e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -8,7 +8,6 @@ #include #include #include "common/assert.h" -#include "common/color.h" #include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" @@ -23,6 +22,9 @@ #include "video_core/renderer_opengl/pica_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" +using PixelFormat = SurfaceParams::PixelFormat; +using SurfaceType = SurfaceParams::SurfaceType; + MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); @@ -225,12 +227,27 @@ void RasterizerOpenGL::DrawTriangles() { MICROPROFILE_SCOPE(OpenGL_Drawing); const auto& regs = Pica::g_state.regs; + const bool has_stencil = regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; + + const bool write_color_fb = state.color_mask.red_enabled == GL_TRUE || + state.color_mask.green_enabled == GL_TRUE || + state.color_mask.blue_enabled == GL_TRUE || + state.color_mask.alpha_enabled == GL_TRUE; + + const bool write_depth_fb = state.depth.write_mask == GL_TRUE || + (has_stencil && state.stencil.write_mask != 0); + + const bool using_color_fb = regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && + write_color_fb; + const bool using_depth_fb = regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && + (state.depth.test_enabled || write_depth_fb); + // Sync and bind the framebuffer surfaces - CachedSurface* color_surface; - CachedSurface* depth_surface; + Surface color_surface; + Surface depth_surface; MathUtil::Rectangle rect; std::tie(color_surface, depth_surface, rect) = - res_cache.GetFramebufferSurfaces(regs.framebuffer.framebuffer); + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb); state.draw.draw_framebuffer = framebuffer.handle; state.Apply(); @@ -238,8 +255,7 @@ void RasterizerOpenGL::DrawTriangles() { glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0); if (depth_surface != nullptr) { - if (regs.framebuffer.framebuffer.depth_format == - Pica::FramebufferRegs::DepthFormat::D24S8) { + if (has_stencil) { // attach both depth and stencil glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depth_surface->texture.handle, 0); @@ -258,37 +274,42 @@ void RasterizerOpenGL::DrawTriangles() { // Sync the viewport // These registers hold half-width and half-height, so must be multiplied by 2 - GLsizei viewport_width = - (GLsizei)Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * 2; - GLsizei viewport_height = - (GLsizei)Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * 2; + const GLsizei viewport_width = + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * 2); + const GLsizei viewport_height = + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * 2); + + const float res_scale_width = color_surface != nullptr ? color_surface->res_scale_width : + (depth_surface == nullptr ? 1.0f : depth_surface->res_scale_width); + const float res_scale_height = color_surface != nullptr ? color_surface->res_scale_height : + (depth_surface == nullptr ? 1.0f : depth_surface->res_scale_height); glViewport( - (GLint)(rect.left + regs.rasterizer.viewport_corner.x * color_surface->res_scale_width), - (GLint)(rect.bottom + regs.rasterizer.viewport_corner.y * color_surface->res_scale_height), - (GLsizei)(viewport_width * color_surface->res_scale_width), - (GLsizei)(viewport_height * color_surface->res_scale_height)); + static_cast(rect.left + regs.rasterizer.viewport_corner.x * res_scale_width), + static_cast(rect.bottom + regs.rasterizer.viewport_corner.y * res_scale_height), + static_cast(viewport_width * res_scale_width), + static_cast(viewport_height * res_scale_height)); - if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width || - uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) { + if (uniform_block_data.data.framebuffer_scale[0] != res_scale_width || + uniform_block_data.data.framebuffer_scale[1] != res_scale_height) { - uniform_block_data.data.framebuffer_scale[0] = color_surface->res_scale_width; - uniform_block_data.data.framebuffer_scale[1] = color_surface->res_scale_height; + uniform_block_data.data.framebuffer_scale[0] = res_scale_width; + uniform_block_data.data.framebuffer_scale[1] = res_scale_height; uniform_block_data.dirty = true; } // Scissor checks are window-, not viewport-relative, which means that if the cached texture // sub-rect changes, the scissor bounds also need to be updated. GLint scissor_x1 = static_cast( - rect.left + regs.rasterizer.scissor_test.x1 * color_surface->res_scale_width); + rect.left + regs.rasterizer.scissor_test.x1 * res_scale_width); GLint scissor_y1 = static_cast( - rect.bottom + regs.rasterizer.scissor_test.y1 * color_surface->res_scale_height); + rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale_height); // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when // scaling or doing multisampling. GLint scissor_x2 = static_cast( - rect.left + (regs.rasterizer.scissor_test.x2 + 1) * color_surface->res_scale_width); + rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale_width); GLint scissor_y2 = static_cast( - rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * color_surface->res_scale_height); + rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * res_scale_height); if (uniform_block_data.data.scissor_x1 != scissor_x1 || uniform_block_data.data.scissor_x2 != scissor_x2 || @@ -309,7 +330,7 @@ void RasterizerOpenGL::DrawTriangles() { if (texture.enabled) { texture_samplers[texture_index].SyncWithConfig(texture.config); - CachedSurface* surface = res_cache.GetTextureSurface(texture); + Surface surface = res_cache.GetTextureSurface(texture); if (surface != nullptr) { state.texture_units[texture_index].texture_2d = surface->texture.handle; } else { @@ -386,14 +407,27 @@ void RasterizerOpenGL::DrawTriangles() { glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); // Mark framebuffer surfaces as dirty - // TODO: Restrict invalidation area to the viewport - if (color_surface != nullptr) { - color_surface->dirty = true; - res_cache.FlushRegion(color_surface->addr, color_surface->size, color_surface, true); + const u32 viewport_offset = + ((regs.framebuffer.framebuffer.GetHeight() - regs.rasterizer.viewport_corner.y - viewport_height) + * regs.framebuffer.framebuffer.GetWidth()) + + regs.rasterizer.viewport_corner.x; + + const u32 viewport_size = ((viewport_height - 1) * regs.framebuffer.framebuffer.GetWidth()) + + viewport_width; + + if (color_surface != nullptr && write_color_fb) { + res_cache.InvalidateRegion( + regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() + + (viewport_offset * color_surface->bytes_per_pixel), + viewport_size * color_surface->bytes_per_pixel, + color_surface); } - if (depth_surface != nullptr) { - depth_surface->dirty = true; - res_cache.FlushRegion(depth_surface->addr, depth_surface->size, depth_surface, true); + if (depth_surface != nullptr && write_depth_fb) { + res_cache.InvalidateRegion( + regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() + + (viewport_offset * depth_surface->bytes_per_pixel), + viewport_size * depth_surface->bytes_per_pixel, + depth_surface); } vertex_batch.clear(); @@ -891,227 +925,119 @@ void RasterizerOpenGL::FlushAll() { void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushRegion(addr, size, nullptr, false); + res_cache.FlushRegion(addr, size); +} + +void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.InvalidateRegion(addr, size, nullptr); } void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushRegion(addr, size, nullptr, true); + res_cache.FlushRegion(addr, size); + res_cache.InvalidateRegion(addr, size, nullptr); } bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { MICROPROFILE_SCOPE(OpenGL_Blits); - CachedSurface src_params; + SurfaceParams src_params; src_params.addr = config.GetPhysicalInputAddress(); - // It's important to use the correct source input width to properly skip over parts of the input - // image which will be cropped from the output but still affect the stride of the input image. - src_params.width = config.input_width; - // Using the output's height is fine because we don't read or skip over the remaining part of - // the image, and it allows for smaller texture cache lookup rectangles. + src_params.width = config.output_width; + src_params.stride = config.input_width; src_params.height = config.output_height; src_params.is_tiled = !config.input_linear; - src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format); + src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.input_format); + src_params.UpdateParams(); - CachedSurface dst_params; + SurfaceParams dst_params; dst_params.addr = config.GetPhysicalOutputAddress(); - dst_params.width = - config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value(); - dst_params.height = - config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value(); + dst_params.width = config.scaling != config.NoScale ? config.output_width.Value() / 2 : config.output_width.Value(); + dst_params.height = config.scaling == config.ScaleXY ? config.output_height.Value() / 2 : config.output_height.Value(); dst_params.is_tiled = config.input_linear != config.dont_swizzle; - dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format); + dst_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.output_format); + dst_params.UpdateParams(); MathUtil::Rectangle src_rect; - CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect); - - if (src_surface == nullptr) { + Surface src_surface; + std::tie(src_surface, src_rect) = res_cache.GetSurfaceSubRect(src_params, false, true); + if (src_surface == nullptr) return false; - } - - // Adjust the source rectangle to take into account parts of the input lines being cropped - if (config.input_width > config.output_width) { - src_rect.right -= static_cast((config.input_width - config.output_width) * - src_surface->res_scale_width); - } - - // Require destination surface to have same resolution scale as source to preserve scaling - dst_params.res_scale_width = src_surface->res_scale_width; - dst_params.res_scale_height = src_surface->res_scale_height; MathUtil::Rectangle dst_rect; - CachedSurface* dst_surface = res_cache.GetSurfaceRect(dst_params, true, false, dst_rect); - - if (dst_surface == nullptr) { + Surface dst_surface; + std::tie(dst_surface, dst_rect) = res_cache.GetSurfaceSubRect(dst_params, false, false); + if (dst_surface == nullptr) return false; - } - // Don't accelerate if the src and dst surfaces are the same - if (src_surface == dst_surface) { + if (config.flip_vertically) + std::swap(src_rect.top, src_rect.bottom); + + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) return false; - } - if (config.flip_vertically) { - std::swap(dst_rect.top, dst_rect.bottom); - } - - if (!res_cache.TryBlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) { - return false; - } - - u32 dst_size = dst_params.width * dst_params.height * - CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8; - dst_surface->dirty = true; - res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true); + res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); return true; } bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { - // TODO(tfarley): Try to hardware accelerate this - return false; + const u32 input_width = config.texture_copy.input_width * 16; + const u32 input_gap = config.texture_copy.input_gap * 16; + const u32 output_width = config.texture_copy.output_width * 16; + const u32 output_gap = config.texture_copy.output_gap * 16; + + if (config.texture_copy.size == 0) + return true; + + if (input_width != output_width || config.texture_copy.size % input_width != 0) + return false; + + SurfaceParams src_params; + src_params.addr = config.GetPhysicalInputAddress(); + src_params.stride = input_width + input_gap; // stride in bytes + src_params.width = input_width; // width in bytes + src_params.height = config.texture_copy.size / input_width; + src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width; + src_params.end = src_params.addr + src_params.size; + + MathUtil::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = res_cache.GetTexCopySurface(src_params); + if (src_surface == nullptr) + return false; + + if ((output_gap * 8) % SurfaceParams::GetFormatBpp(src_surface->pixel_format) != 0 || + (src_surface->is_tiled && src_surface->PixelsInBytes(output_gap) % 64 != 0)) + return false; + + SurfaceParams dst_params = *src_surface; + dst_params.addr = config.GetPhysicalOutputAddress(); + dst_params.stride = (output_width + output_gap) * src_surface->stride / src_params.stride; + dst_params.width = output_width * src_surface->stride / src_params.stride; + dst_params.height = src_surface->is_tiled ? src_params.height * 8 : src_params.height; + dst_params.UpdateParams(); + + const bool load_gap = output_gap != 0; // Since we are going to invalidate the gap if there is one, we will have to load it first + MathUtil::Rectangle dst_rect; + Surface dst_surface; + std::tie(dst_surface, dst_rect) = res_cache.GetSurfaceSubRect(dst_params, false, load_gap); + if (src_surface == nullptr) + return false; + + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) + return false; + + res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); + return true; } bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { - MICROPROFILE_SCOPE(OpenGL_Blits); - using PixelFormat = CachedSurface::PixelFormat; - using SurfaceType = CachedSurface::SurfaceType; - - CachedSurface* dst_surface = res_cache.TryGetFillSurface(config); - - if (dst_surface == nullptr) { + Surface dst_surface = res_cache.GetFillSurface(config); + if (dst_surface == nullptr) return false; - } - OpenGLState cur_state = OpenGLState::GetCurState(); - - SurfaceType dst_type = CachedSurface::GetFormatType(dst_surface->pixel_format); - - GLuint old_fb = cur_state.draw.draw_framebuffer; - cur_state.draw.draw_framebuffer = framebuffer.handle; - // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so - // Clear call isn't affected - cur_state.Apply(); - - if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - dst_surface->texture.handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - - // TODO: Handle additional pixel format and fill value size combinations to accelerate more - // cases - // For instance, checking if fill value's bytes/bits repeat to allow filling - // I8/A8/I4/A4/... - // Currently only handles formats that are multiples of the fill value size - - if (config.fill_24bit) { - switch (dst_surface->pixel_format) { - case PixelFormat::RGB8: - color_values[0] = config.value_24bit_r / 255.0f; - color_values[1] = config.value_24bit_g / 255.0f; - color_values[2] = config.value_24bit_b / 255.0f; - break; - default: - return false; - } - } else if (config.fill_32bit) { - u32 value = config.value_32bit; - - switch (dst_surface->pixel_format) { - case PixelFormat::RGBA8: - color_values[0] = (value >> 24) / 255.0f; - color_values[1] = ((value >> 16) & 0xFF) / 255.0f; - color_values[2] = ((value >> 8) & 0xFF) / 255.0f; - color_values[3] = (value & 0xFF) / 255.0f; - break; - default: - return false; - } - } else { - u16 value_16bit = config.value_16bit.Value(); - Math::Vec4 color; - - switch (dst_surface->pixel_format) { - case PixelFormat::RGBA8: - color_values[0] = (value_16bit >> 8) / 255.0f; - color_values[1] = (value_16bit & 0xFF) / 255.0f; - color_values[2] = color_values[0]; - color_values[3] = color_values[1]; - break; - case PixelFormat::RGB5A1: - color = Color::DecodeRGB5A1((const u8*)&value_16bit); - color_values[0] = color[0] / 31.0f; - color_values[1] = color[1] / 31.0f; - color_values[2] = color[2] / 31.0f; - color_values[3] = color[3]; - break; - case PixelFormat::RGB565: - color = Color::DecodeRGB565((const u8*)&value_16bit); - color_values[0] = color[0] / 31.0f; - color_values[1] = color[1] / 63.0f; - color_values[2] = color[2] / 31.0f; - break; - case PixelFormat::RGBA4: - color = Color::DecodeRGBA4((const u8*)&value_16bit); - color_values[0] = color[0] / 15.0f; - color_values[1] = color[1] / 15.0f; - color_values[2] = color[2] / 15.0f; - color_values[3] = color[3] / 15.0f; - break; - case PixelFormat::IA8: - case PixelFormat::RG8: - color_values[0] = (value_16bit >> 8) / 255.0f; - color_values[1] = (value_16bit & 0xFF) / 255.0f; - break; - default: - return false; - } - } - - cur_state.color_mask.red_enabled = GL_TRUE; - cur_state.color_mask.green_enabled = GL_TRUE; - cur_state.color_mask.blue_enabled = GL_TRUE; - cur_state.color_mask.alpha_enabled = GL_TRUE; - cur_state.Apply(); - glClearBufferfv(GL_COLOR, 0, color_values); - } else if (dst_type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - dst_surface->texture.handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - GLfloat value_float; - if (dst_surface->pixel_format == CachedSurface::PixelFormat::D16) { - value_float = config.value_32bit / 65535.0f; // 2^16 - 1 - } else if (dst_surface->pixel_format == CachedSurface::PixelFormat::D24) { - value_float = config.value_32bit / 16777215.0f; // 2^24 - 1 - } - - cur_state.depth.write_mask = GL_TRUE; - cur_state.Apply(); - glClearBufferfv(GL_DEPTH, 0, &value_float); - } else if (dst_type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - dst_surface->texture.handle, 0); - - GLfloat value_float = (config.value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 - GLint value_int = (config.value_32bit >> 24); - - cur_state.depth.write_mask = GL_TRUE; - cur_state.stencil.write_mask = 0xFF; - cur_state.Apply(); - glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int); - } - - cur_state.draw.draw_framebuffer = old_fb; - // TODO: Return scissor test to previous value when scissor test is implemented - cur_state.Apply(); - - dst_surface->dirty = true; - res_cache.FlushRegion(dst_surface->addr, dst_surface->size, dst_surface, true); + res_cache.InvalidateRegion(dst_surface->addr, dst_surface->size, dst_surface); return true; } @@ -1123,16 +1049,18 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con } MICROPROFILE_SCOPE(OpenGL_CacheManagement); - CachedSurface src_params; + SurfaceParams src_params; src_params.addr = framebuffer_addr; - src_params.width = config.width; + src_params.width = std::min(config.width.Value(), pixel_stride); src_params.height = config.height; - src_params.pixel_stride = pixel_stride; + src_params.stride = pixel_stride; src_params.is_tiled = false; - src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format); + src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.color_format); + src_params.UpdateParams(); MathUtil::Rectangle src_rect; - CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect); + Surface src_surface; + std::tie(src_surface, src_rect) = res_cache.GetSurfaceSubRect(src_params, false, true); if (src_surface == nullptr) { return false; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 46c62961c..e83cb48fc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -43,6 +43,7 @@ public: void NotifyPicaRegisterChanged(u32 id) override; void FlushAll() override; void FlushRegion(PAddr addr, u32 size) override; + void InvalidateRegion(PAddr addr, u32 size) override; void FlushAndInvalidateRegion(PAddr addr, u32 size) override; bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index f37894e7a..87edca9d3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -9,11 +9,14 @@ #include #include #include +#include #include #include "common/bit_field.h" +#include "common/color.h" #include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" +#include "common/scope_exit.h" #include "common/vector_math.h" #include "core/frontend/emu_window.h" #include "core/memory.h" @@ -25,13 +28,18 @@ #include "video_core/utils.h" #include "video_core/video_core.h" +using SurfaceType = SurfaceParams::SurfaceType; +using PixelFormat = SurfaceParams::PixelFormat; + +static std::array transfer_framebuffers; + struct FormatTuple { GLint internal_format; GLenum format; GLenum type; }; -static const std::array fb_format_tuples = {{ +static constexpr std::array fb_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8 {GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1 @@ -39,86 +47,152 @@ static const std::array fb_format_tuples = {{ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 }}; -static const std::array depth_format_tuples = {{ +static constexpr std::array depth_format_tuples = {{ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16 {}, {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 }}; -RasterizerCacheOpenGL::RasterizerCacheOpenGL() { - transfer_framebuffers[0].Create(); - transfer_framebuffers[1].Create(); -} +static constexpr FormatTuple tex_tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE }; -RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { - FlushAll(); -} - -static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, - u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, - u8* gl_data, bool morton_to_gl) { - using PixelFormat = CachedSurface::PixelFormat; - - u8* data_ptrs[2]; - u32 depth_stencil_shifts[2] = {24, 8}; - - if (morton_to_gl) { - std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]); +static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { + const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); + if (type == SurfaceType::Color) { + ASSERT((size_t)pixel_format < fb_format_tuples.size()); + return fb_format_tuples[(unsigned int)pixel_format]; } + else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { + size_t tuple_idx = (size_t)pixel_format - 14; + ASSERT(tuple_idx < depth_format_tuples.size()); + return depth_format_tuples[tuple_idx]; + } + else { + return tex_tuple; + } +} - if (pixel_format == PixelFormat::D24S8) { - for (unsigned y = 0; y < height; ++y) { - for (unsigned x = 0; x < width; ++x) { - const u32 coarse_y = y & ~7; - u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + - coarse_y * width * bytes_per_pixel; - u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; +template +constexpr auto RangeFromInterval(Map& map, const Interval& interval) { + return boost::make_iterator_range(map.equal_range(interval)); +} - data_ptrs[morton_to_gl] = morton_data + morton_offset; - data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; +enum MortonCopyFlags : int { + MortonToGl = (1 << 0), + CheckRange = (1 << 1), + D24S8Format = (1 << 2), + BytesPerPixelBits = 3, // bits 3-4 + GLBytesPerPixelBits = 5, // bits 5-6 + MaxValue = (1 << 7) - 1, +}; +template +static void MortonCopyPixels(u32 width, u32 height, const u8* in_data, u8* out_data, PAddr base, PAddr start, PAddr end) { + constexpr bool check_range = (flags & MortonCopyFlags::CheckRange) ? true : false; + constexpr bool morton_to_gl = (flags & MortonCopyFlags::MortonToGl) ? true : false; + + constexpr bool D24S8format = (flags & MortonCopyFlags::D24S8Format) ? true : false; + + constexpr u32 bytes_per_pixel = u32(((flags) >> MortonCopyFlags::BytesPerPixelBits) & 0x3) + 1; // 2bits, starting with value 1 + constexpr u32 gl_bytes_per_pixel = u32(((flags) >> MortonCopyFlags::GLBytesPerPixelBits) & 0x3) + 1; // 2bits, starting with value 1 + + if (check_range) + ASSERT(start >= base && end <= (base + (width * height * bytes_per_pixel))); + const u32 start_offset = start - base; + const u32 end_offset = end - base; + + for (u32 x = 0; x < width; ++x) { + for (u32 y = 0; y < height; ++y) { + const u32 coarse_x = x & ~7; + const u32 coarse_y = y & ~7; + u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; + u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; + + if (check_range) { + if (morton_offset >= end_offset && coarse_x == 0 && coarse_y == 0) // Out of range and new tile + return; + if (morton_offset < start_offset || morton_offset >= end_offset) // Out of range + continue; + } + + const size_t copy_bytes = check_range ? std::min(end_offset - morton_offset, bytes_per_pixel) : bytes_per_pixel; + + const u8* const in_ptr = &in_data[morton_to_gl ? morton_offset : gl_pixel_index]; + u8* const out_ptr = &out_data[morton_to_gl ? gl_pixel_index : morton_offset]; + + if (D24S8format) { // Swap depth and stencil value ordering since 3DS does not match OpenGL - u32 depth_stencil; - memcpy(&depth_stencil, data_ptrs[1], sizeof(u32)); - depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | - (depth_stencil >> depth_stencil_shifts[1]); - - memcpy(data_ptrs[0], &depth_stencil, sizeof(u32)); + constexpr size_t swap_offset = morton_to_gl ? 3 : 1; + std::array swap_buf; + std::memcpy(&swap_buf[4 - swap_offset], &in_ptr[0], swap_offset); + std::memcpy(&swap_buf[0], &in_ptr[swap_offset], 4 - swap_offset); + std::memcpy(out_ptr, &swap_buf[0], copy_bytes); } - } - } else { - for (unsigned y = 0; y < height; ++y) { - for (unsigned x = 0; x < width; ++x) { - const u32 coarse_y = y & ~7; - u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + - coarse_y * width * bytes_per_pixel; - u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; - - data_ptrs[morton_to_gl] = morton_data + morton_offset; - data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; - - memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); + else { + std::memcpy(out_ptr, in_ptr, copy_bytes); } } } } -void RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, - CachedSurface::SurfaceType type, - const MathUtil::Rectangle& src_rect, - const MathUtil::Rectangle& dst_rect) { - using SurfaceType = CachedSurface::SurfaceType; +template +class FunctionTable { +public: + FunctionTable() { + FillArray(); + } + const auto& operator [](size_t pos) const { + return table[pos]; + } +private: + template + void FillArray() { + table[P - 1] = &MortonCopyPixels

; + FillArray

(); + } + template <> + void FillArray<0>() {} + std::array), size> table; +}; +static const FunctionTable MortonCopyFnTable; +// Allocate an uninitialized texture of appropriate size and format for the surface +static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tuple, u32 width, u32 height) { OpenGLState cur_state = OpenGLState::GetCurState(); + // Keep track of previous texture bindings + GLuint old_tex = cur_state.texture_units[0].texture_2d; + cur_state.texture_units[0].texture_2d = texture; + cur_state.Apply(); + glActiveTexture(GL_TEXTURE0); + + glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0, + format_tuple.format, format_tuple.type, nullptr); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + // Restore previous texture bindings + cur_state.texture_units[0].texture_2d = old_tex; + cur_state.Apply(); +} + +static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rect, + GLuint dst_tex, const MathUtil::Rectangle& dst_rect, + SurfaceType type) { + OpenGLState cur_state = OpenGLState::GetCurState(); + + OpenGLState prev_state = cur_state; + SCOPE_EXIT({ prev_state.Apply(); }); + // Make sure textures aren't bound to texture units, since going to bind them to framebuffer // components OpenGLState::ResetTexture(src_tex); OpenGLState::ResetTexture(dst_tex); // Keep track of previous framebuffer bindings - GLuint old_fbs[2] = {cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer}; cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle; cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle; cur_state.Apply(); @@ -163,637 +237,890 @@ void RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); - // Restore previous framebuffer bindings - cur_state.draw.read_framebuffer = old_fbs[0]; - cur_state.draw.draw_framebuffer = old_fbs[1]; - cur_state.Apply(); -} - -bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, - const MathUtil::Rectangle& src_rect, - CachedSurface* dst_surface, - const MathUtil::Rectangle& dst_rect) { - - if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, - dst_surface->pixel_format)) { - return false; - } - - BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, - CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect); return true; } -static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, - u32 width, u32 height) { - // Allocate an uninitialized texture of appropriate size and format for the surface - using SurfaceType = CachedSurface::SurfaceType; - +static bool FillSurface(const Surface& surface, const u8* fill_data) { OpenGLState cur_state = OpenGLState::GetCurState(); - // Keep track of previous texture bindings - GLuint old_tex = cur_state.texture_units[0].texture_2d; - cur_state.texture_units[0].texture_2d = texture; + OpenGLState prev_state = cur_state; + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState::ResetTexture(surface->texture.handle); + + cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle; cur_state.Apply(); - glActiveTexture(GL_TEXTURE0); - SurfaceType type = CachedSurface::GetFormatType(pixel_format); + if (surface->type == SurfaceType::Color || surface->type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - FormatTuple tuple; - if (type == SurfaceType::Color) { - ASSERT((size_t)pixel_format < fb_format_tuples.size()); - tuple = fb_format_tuples[(unsigned int)pixel_format]; - } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { - size_t tuple_idx = (size_t)pixel_format - 14; - ASSERT(tuple_idx < depth_format_tuples.size()); - tuple = depth_format_tuples[tuple_idx]; - } else { - tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; - } + Pica::Texture::TextureInfo tex_info{}; + tex_info.format = static_cast(surface->pixel_format); + Math::Vec4 color = Pica::Texture::LookupTexture(fill_data, 0, 0, tex_info); - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, tuple.format, - tuple.type, nullptr); + std::array color_values = { + color.x / 255.f, + color.y / 255.f, + color.z / 255.f, + color.w / 255.f + }; - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - - // Restore previous texture bindings - cur_state.texture_units[0].texture_2d = old_tex; - cur_state.Apply(); -} - -MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192)); -CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, - bool load_if_create) { - using PixelFormat = CachedSurface::PixelFormat; - using SurfaceType = CachedSurface::SurfaceType; - - if (params.addr == 0) { - return nullptr; - } - - u32 params_size = - params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8; - - // Check for an exact match in existing surfaces - CachedSurface* best_exact_surface = nullptr; - float exact_surface_goodness = -1.f; - - auto surface_interval = - boost::icl::interval::right_open(params.addr, params.addr + params_size); - auto range = surface_cache.equal_range(surface_interval); - for (auto it = range.first; it != range.second; ++it) { - for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { - CachedSurface* surface = it2->get(); - - // Check if the request matches the surface exactly - if (params.addr == surface->addr && params.width == surface->width && - params.height == surface->height && params.pixel_format == surface->pixel_format) { - // Make sure optional param-matching criteria are fulfilled - bool tiling_match = (params.is_tiled == surface->is_tiled); - bool res_scale_match = (params.res_scale_width == surface->res_scale_width && - params.res_scale_height == surface->res_scale_height); - if (!match_res_scale || res_scale_match) { - // Prioritize same-tiling and highest resolution surfaces - float match_goodness = - (float)tiling_match + surface->res_scale_width * surface->res_scale_height; - if (match_goodness > exact_surface_goodness || surface->dirty) { - exact_surface_goodness = match_goodness; - best_exact_surface = surface; - } - } - } - } - } - - // Return the best exact surface if found - if (best_exact_surface != nullptr) { - return best_exact_surface; - } - - // No matching surfaces found, so create a new one - u8* texture_src_data = Memory::GetPhysicalPointer(params.addr); - if (texture_src_data == nullptr) { - return nullptr; - } - - MICROPROFILE_SCOPE(OpenGL_SurfaceUpload); - - // Stride only applies to linear images. - ASSERT(params.pixel_stride == 0 || !params.is_tiled); - - std::shared_ptr new_surface = std::make_shared(); - - new_surface->addr = params.addr; - new_surface->size = params_size; - - new_surface->texture.Create(); - new_surface->width = params.width; - new_surface->height = params.height; - new_surface->pixel_stride = params.pixel_stride; - new_surface->res_scale_width = params.res_scale_width; - new_surface->res_scale_height = params.res_scale_height; - - new_surface->is_tiled = params.is_tiled; - new_surface->pixel_format = params.pixel_format; - new_surface->dirty = false; - - if (!load_if_create) { - // Don't load any data; just allocate the surface's texture - AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, - new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); - } else { - // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead - // of memory upload below if that's a common scenario in some game - - Memory::RasterizerFlushRegion(params.addr, params_size); - - // Load data from memory to the new surface - OpenGLState cur_state = OpenGLState::GetCurState(); - - GLuint old_tex = cur_state.texture_units[0].texture_2d; - cur_state.texture_units[0].texture_2d = new_surface->texture.handle; + cur_state.color_mask.red_enabled = GL_TRUE; + cur_state.color_mask.green_enabled = GL_TRUE; + cur_state.color_mask.blue_enabled = GL_TRUE; + cur_state.color_mask.alpha_enabled = GL_TRUE; cur_state.Apply(); - glActiveTexture(GL_TEXTURE0); + glClearBufferfv(GL_COLOR, 0, &color_values[0]); + } + else if (surface->type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - if (!new_surface->is_tiled) { - // TODO: Ensure this will always be a color format, not a depth or other format - ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size()); - const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format]; + u32 value_32bit = 0; + GLfloat value_float; - glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->pixel_stride); - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, - tuple.format, tuple.type, texture_src_data); - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - } else { - SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format); - if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) { - FormatTuple tuple; - if ((size_t)params.pixel_format < fb_format_tuples.size()) { - tuple = fb_format_tuples[(unsigned int)params.pixel_format]; - } else { - // Texture - tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; - } - - std::vector> tex_buffer(params.width * params.height); - - Pica::Texture::TextureInfo tex_info; - tex_info.width = params.width; - tex_info.height = params.height; - tex_info.format = (Pica::TexturingRegs::TextureFormat)params.pixel_format; - tex_info.SetDefaultStride(); - tex_info.physical_address = params.addr; - - for (unsigned y = 0; y < params.height; ++y) { - for (unsigned x = 0; x < params.width; ++x) { - tex_buffer[x + params.width * y] = Pica::Texture::LookupTexture( - texture_src_data, x, params.height - 1 - y, tex_info); - } - } - - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, - 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data()); - } else { - // Depth/Stencil formats need special treatment since they aren't sampleable using - // LookupTexture and can't use RGBA format - size_t tuple_idx = (size_t)params.pixel_format - 14; - ASSERT(tuple_idx < depth_format_tuples.size()); - const FormatTuple& tuple = depth_format_tuples[tuple_idx]; - - u32 bytes_per_pixel = CachedSurface::GetFormatBpp(params.pixel_format) / 8; - - // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type - bool use_4bpp = (params.pixel_format == PixelFormat::D24); - - u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; - - std::vector temp_fb_depth_buffer(params.width * params.height * - gl_bytes_per_pixel); - - u8* temp_fb_depth_buffer_ptr = - use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data(); - - MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, - gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, - true); - - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, - 0, tuple.format, tuple.type, temp_fb_depth_buffer.data()); - } + if (surface->pixel_format == SurfaceParams::PixelFormat::D16) { + std::memcpy(&value_32bit, fill_data, 2); + value_float = value_32bit / 65535.0f; // 2^16 - 1 + } + else if (surface->pixel_format == SurfaceParams::PixelFormat::D24) { + std::memcpy(&value_32bit, fill_data, 3); + value_float = value_32bit / 16777215.0f; // 2^24 - 1 } - // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface - if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) { - OGLTexture scaled_texture; - scaled_texture.Create(); - - AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, - new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); - BlitTextures(new_surface->texture.handle, scaled_texture.handle, - CachedSurface::GetFormatType(new_surface->pixel_format), - MathUtil::Rectangle(0, 0, new_surface->width, new_surface->height), - MathUtil::Rectangle(0, 0, new_surface->GetScaledWidth(), - new_surface->GetScaledHeight())); - - new_surface->texture.Release(); - new_surface->texture.handle = scaled_texture.handle; - scaled_texture.handle = 0; - cur_state.texture_units[0].texture_2d = new_surface->texture.handle; - cur_state.Apply(); - } - - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - - cur_state.texture_units[0].texture_2d = old_tex; + cur_state.depth.write_mask = GL_TRUE; cur_state.Apply(); + glClearBufferfv(GL_DEPTH, 0, &value_float); } + else if (surface->type == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, surface->texture.handle, 0); - Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1); - surface_cache.add(std::make_pair(boost::icl::interval::right_open( - new_surface->addr, new_surface->addr + new_surface->size), - std::set>({new_surface}))); - return new_surface.get(); + u32 value_32bit; + std::memcpy(&value_32bit, fill_data, 4); + + GLfloat value_float = (value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 + GLint value_int = (value_32bit >> 24); + + cur_state.depth.write_mask = GL_TRUE; + cur_state.stencil.write_mask = -1; + cur_state.Apply(); + glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int); + } + return true; } -CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, - bool match_res_scale, bool load_if_create, - MathUtil::Rectangle& out_rect) { - if (params.addr == 0) { - return nullptr; - } - - u32 total_pixels = params.width * params.height; - u32 params_size = total_pixels * CachedSurface::GetFormatBpp(params.pixel_format) / 8; - - // Attempt to find encompassing surfaces - CachedSurface* best_subrect_surface = nullptr; - float subrect_surface_goodness = -1.f; - - auto surface_interval = - boost::icl::interval::right_open(params.addr, params.addr + params_size); - auto cache_upper_bound = surface_cache.upper_bound(surface_interval); - for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { - for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { - CachedSurface* surface = it2->get(); - - // Check if the request is contained in the surface - if (params.addr >= surface->addr && - params.addr + params_size - 1 <= surface->addr + surface->size - 1 && - params.pixel_format == surface->pixel_format) { - // Make sure optional param-matching criteria are fulfilled - bool tiling_match = (params.is_tiled == surface->is_tiled); - bool res_scale_match = (params.res_scale_width == surface->res_scale_width && - params.res_scale_height == surface->res_scale_height); - if (!match_res_scale || res_scale_match) { - // Prioritize same-tiling and highest resolution surfaces - float match_goodness = - (float)tiling_match + surface->res_scale_width * surface->res_scale_height; - if (match_goodness > subrect_surface_goodness || surface->dirty) { - subrect_surface_goodness = match_goodness; - best_subrect_surface = surface; - } - } - } - } - } - - // Return the best subrect surface if found - if (best_subrect_surface != nullptr) { - unsigned int bytes_per_pixel = - (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8); - - int x0, y0; - - if (!params.is_tiled) { - u32 begin_pixel_index = (params.addr - best_subrect_surface->addr) / bytes_per_pixel; - x0 = begin_pixel_index % best_subrect_surface->width; - y0 = begin_pixel_index / best_subrect_surface->width; - - out_rect = MathUtil::Rectangle(x0, y0, x0 + params.width, y0 + params.height); - } else { - u32 bytes_per_tile = 8 * 8 * bytes_per_pixel; - u32 tiles_per_row = best_subrect_surface->width / 8; - - u32 begin_tile_index = (params.addr - best_subrect_surface->addr) / bytes_per_tile; - x0 = begin_tile_index % tiles_per_row * 8; - y0 = begin_tile_index / tiles_per_row * 8; - - // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory. - out_rect = - MathUtil::Rectangle(x0, best_subrect_surface->height - y0, x0 + params.width, - best_subrect_surface->height - (y0 + params.height)); - } - - out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width); - out_rect.right = (int)(out_rect.right * best_subrect_surface->res_scale_width); - out_rect.top = (int)(out_rect.top * best_subrect_surface->res_scale_height); - out_rect.bottom = (int)(out_rect.bottom * best_subrect_surface->res_scale_height); - - return best_subrect_surface; - } - - // No subrect found - create and return a new surface - if (!params.is_tiled) { - out_rect = MathUtil::Rectangle(0, 0, (int)(params.width * params.res_scale_width), - (int)(params.height * params.res_scale_height)); - } else { - out_rect = MathUtil::Rectangle(0, (int)(params.height * params.res_scale_height), - (int)(params.width * params.res_scale_width), 0); - } - - return GetSurface(params, match_res_scale, load_if_create); -} - -CachedSurface* RasterizerCacheOpenGL::GetTextureSurface( - const Pica::TexturingRegs::FullTextureConfig& config) { - - Pica::Texture::TextureInfo info = - Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); - - CachedSurface params; - params.addr = info.physical_address; - params.width = info.width; - params.height = info.height; - params.is_tiled = true; - params.pixel_format = CachedSurface::PixelFormatFromTextureFormat(info.format); - return GetSurface(params, false, true); -} - -std::tuple> -RasterizerCacheOpenGL::GetFramebufferSurfaces( - const Pica::FramebufferRegs::FramebufferConfig& config) { - - const auto& regs = Pica::g_state.regs; - - // Make sur that framebuffers don't overlap if both color and depth are being used - u32 fb_area = config.GetWidth() * config.GetHeight(); - bool framebuffers_overlap = - config.GetColorBufferPhysicalAddress() != 0 && - config.GetDepthBufferPhysicalAddress() != 0 && - MathUtil::IntervalsIntersect( - config.GetColorBufferPhysicalAddress(), - fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())), - config.GetDepthBufferPhysicalAddress(), - fb_area * Pica::FramebufferRegs::BytesPerDepthPixel(config.depth_format)); - bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0; - bool depth_write_enable = regs.framebuffer.output_merger.depth_write_enable && - regs.framebuffer.framebuffer.allow_depth_stencil_write; - bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && - (regs.framebuffer.output_merger.depth_test_enable || depth_write_enable || - !framebuffers_overlap); - - if (framebuffers_overlap && using_color_fb && using_depth_fb) { - LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " - "overlapping framebuffers not supported!"); - using_depth_fb = false; - } - - // get color and depth surfaces - CachedSurface color_params; - CachedSurface depth_params; - color_params.width = depth_params.width = config.GetWidth(); - color_params.height = depth_params.height = config.GetHeight(); - color_params.is_tiled = depth_params.is_tiled = true; - +SurfaceParams::SurfaceParams() { // Set the internal resolution, assume the same scaling factor for top and bottom screens float resolution_scale_factor = Settings::values.resolution_factor; if (resolution_scale_factor == 0.0f) { // Auto - scale resolution to the window size resolution_scale_factor = VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio(); } - // Scale the resolution by the specified factor - color_params.res_scale_width = resolution_scale_factor; - depth_params.res_scale_width = resolution_scale_factor; - color_params.res_scale_height = resolution_scale_factor; - depth_params.res_scale_height = resolution_scale_factor; + res_scale_width = resolution_scale_factor; + res_scale_height = resolution_scale_factor; +} - color_params.addr = config.GetColorBufferPhysicalAddress(); - color_params.pixel_format = CachedSurface::PixelFormatFromColorFormat(config.color_format); +MathUtil::Rectangle CachedSurface::GetSubRect(const SurfaceParams& sub_surface) const { + const u32 begin_pixel_index = PixelsInBytes(sub_surface.addr - addr); + const int x0 = begin_pixel_index % width; + const int y0 = begin_pixel_index / width; - depth_params.addr = config.GetDepthBufferPhysicalAddress(); - depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format); + if (is_tiled) + return MathUtil::Rectangle(x0, height - y0 - sub_surface.height, x0 + sub_surface.width, height - y0); // Bottom to top - MathUtil::Rectangle color_rect; - CachedSurface* color_surface = - using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr; + return MathUtil::Rectangle(x0, y0, x0 + sub_surface.width, y0 + sub_surface.height); // Top to bottom +} - MathUtil::Rectangle depth_rect; - CachedSurface* depth_surface = - using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr; +MathUtil::Rectangle CachedSurface::GetScaledSubRect(const SurfaceParams& sub_surface) const { + auto rect = GetSubRect(sub_surface); + rect.left = static_cast(rect.left * res_scale_width); + rect.right = static_cast(rect.right * res_scale_width); + rect.top = static_cast(rect.top * res_scale_height); + rect.bottom = static_cast(rect.bottom * res_scale_height); + return rect; +} - // Sanity check to make sure found surfaces aren't the same - if (using_depth_fb && using_color_fb && color_surface == depth_surface) { - LOG_CRITICAL( - Render_OpenGL, - "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!"); - using_depth_fb = false; - depth_surface = nullptr; +bool CachedSurface::ExactMatch(const SurfaceParams& other_surface) const { + return (other_surface.addr == addr && + other_surface.width == width && + other_surface.height == height && + other_surface.stride == stride && + other_surface.pixel_format == pixel_format && + other_surface.is_tiled == is_tiled); +} + +bool CachedSurface::CanSubRect(const SurfaceParams& sub_surface) const { + if (sub_surface.addr < addr || sub_surface.end > end || sub_surface.stride != stride || + sub_surface.pixel_format != pixel_format || sub_surface.is_tiled != is_tiled) + return false; + + auto rect = GetSubRect(sub_surface); + + if (rect.left + sub_surface.width > stride) + return false; + + if (is_tiled) + return ((height - rect.bottom) % 8 == 0 && rect.left % 8 == 0); + + return true; +} + +bool CachedSurface::CanCopy(const SurfaceParams& dest_surface) const { + if (type == SurfaceType::Fill && IsRegionValid(dest_surface.GetInterval()) && + dest_surface.addr >= addr && dest_surface.end <= end) { // dest_surface is within our fill range + if (fill_size != dest_surface.bytes_per_pixel) { + // Check if bits repeat for our fill_size + const u32 dest_bytes_per_pixel = std::max(dest_surface.bytes_per_pixel, 1u); // Take care of 4bpp formats + std::vector fill_test(fill_size * dest_bytes_per_pixel); + + for (u32 i = 0; i < dest_bytes_per_pixel; ++i) + std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); + + for (u32 i = 0; i < fill_size; ++i) + if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], dest_bytes_per_pixel) != 0) + return false; + + if (dest_surface.bytes_per_pixel == 0 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) // 4bpp compare + return false; + } + return true; + } + if (CanSubRect(dest_surface) && dest_surface.width == stride) + return true; + + return false; +} + +static void CopySurface(const Surface& src_surface, const Surface& dest_surface) { + if (src_surface == dest_surface) + return; + + // This is only called when CanCopy is true, no need to run checks here + if (src_surface->type == SurfaceType::Fill) { + // FillSurface needs a 4 bytes buffer + const u32 fill_offset = (dest_surface->addr - src_surface->addr) % src_surface->fill_size; + std::array fill_buffer; + + u32 fill_buff_pos = fill_offset; + for (int i : {0, 1, 2, 3}) + fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; + + FillSurface(dest_surface, &fill_buffer[0]); + } + if (src_surface->CanSubRect(*dest_surface)) { + BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(*dest_surface), + dest_surface->texture.handle, dest_surface->GetScaledRect(), + src_surface->type); + } + dest_surface->gl_buffer_dirty = true; +} + +MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); +void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { + ASSERT(type != SurfaceType::Fill); + + const u8* const texture_src_data = Memory::GetPhysicalPointer(addr); + if (texture_src_data == nullptr) + return; + + MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); + + ASSERT(load_start >= addr && load_end <= end); + const u32 start_offset = load_start - addr; + + if (!is_tiled) { + ASSERT(type == SurfaceType::Color); + std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, load_end - load_start); + } + else { + if (type == SurfaceType::Texture) { + Pica::Texture::TextureInfo tex_info{}; + tex_info.width = width; + tex_info.height = height; + tex_info.format = static_cast(pixel_format); + tex_info.SetDefaultStride(); + tex_info.physical_address = addr; + + for (unsigned y = 0; y < height; ++y) { + for (unsigned x = 0; x < width; ++x) { + auto vec4 = Pica::Texture::LookupTexture(texture_src_data, x, height - 1 - y, tex_info); + const size_t offset = (x + (width * y)) * 4; + std::memcpy(&gl_buffer[offset], vec4.AsArray(), 4); + } + } + } + else { + size_t copyfn_offset = MortonCopyFlags::MortonToGl; + copyfn_offset |= (bytes_per_pixel - 1) << MortonCopyFlags::BytesPerPixelBits; + copyfn_offset |= (gl_bytes_per_pixel - 1) << MortonCopyFlags::GLBytesPerPixelBits; + + if (load_start != addr || load_end != end) + copyfn_offset |= MortonCopyFlags::CheckRange; + if (pixel_format == PixelFormat::D24S8) + copyfn_offset |= MortonCopyFlags::D24S8Format; + + MortonCopyFnTable[copyfn_offset](width, height, + texture_src_data, &gl_buffer[gl_buffer_offset], addr, load_start, load_end); + } + } +} + +MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); +void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { + u8* const dst_buffer = Memory::GetPhysicalPointer(addr); + if (dst_buffer == nullptr) + return; + + MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); + + ASSERT(flush_start >= addr && flush_end <= end); + const u32 start_offset = flush_start - addr; + const u32 end_offset = flush_end - addr; + + if (type == SurfaceType::Fill) { + const u32 coarse_start_offset = start_offset - (start_offset % fill_size); + const u32 backup_bytes = start_offset % fill_size; + std::array backup_data; + if (backup_bytes) + std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); + + for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) + std::memcpy(&dst_buffer[offset], &fill_data[0], std::min(fill_size, end_offset - offset)); + + if (backup_bytes) + std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); + } + else if (!is_tiled) { + ASSERT(type == SurfaceType::Color); + std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); + } + else { + size_t copyfn_offset = (bytes_per_pixel - 1) << MortonCopyFlags::BytesPerPixelBits; + copyfn_offset |= (gl_bytes_per_pixel - 1) << MortonCopyFlags::GLBytesPerPixelBits; + + if (flush_start != addr || flush_end != end) + copyfn_offset |= MortonCopyFlags::CheckRange; + if (pixel_format == PixelFormat::D24S8) + copyfn_offset |= MortonCopyFlags::D24S8Format; + + MortonCopyFnTable[copyfn_offset](width, height, + &gl_buffer[gl_buffer_offset], dst_buffer, addr, flush_start, flush_end); + } +} + +void CachedSurface::UploadGLTexture() { + if (type == SurfaceType::Fill) + return; + + ASSERT(gl_buffer.size() == width * height * gl_bytes_per_pixel); + + const FormatTuple& tuple = GetFormatTuple(pixel_format); + + // Load data from memory to the surface + OpenGLState cur_state = OpenGLState::GetCurState(); + + GLuint old_tex = cur_state.texture_units[0].texture_2d; + cur_state.texture_units[0].texture_2d = texture.handle; + cur_state.Apply(); + + glActiveTexture(GL_TEXTURE0); + glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, + tuple.format, tuple.type, &gl_buffer[0]); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + cur_state.texture_units[0].texture_2d = old_tex; + cur_state.Apply(); + + // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface + if (res_scale_width != 1.f || res_scale_height != 1.f) { + OGLTexture scaled_texture; + scaled_texture.Create(); + + AllocateSurfaceTexture(scaled_texture.handle, tuple, GetScaledWidth(), GetScaledHeight()); + BlitTextures(texture.handle, GetRect(), scaled_texture.handle, GetScaledRect(), type); + + std::swap(texture.handle, scaled_texture.handle); } - MathUtil::Rectangle rect; + gl_buffer_dirty = false; +} - if (color_surface != nullptr && depth_surface != nullptr && - (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) { - // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if - // they don't match - if (color_rect.left != 0 || color_rect.top != 0) { - color_surface = GetSurface(color_params, true, true); - } +void CachedSurface::DownloadGLTexture() { + if (gl_buffer.size() == 0) + gl_buffer.resize(width * height * gl_bytes_per_pixel); - if (depth_rect.left != 0 || depth_rect.top != 0) { - depth_surface = GetSurface(depth_params, true, true); - } + if (!gl_buffer_dirty || type == SurfaceType::Fill) + return; - if (!color_surface->is_tiled) { - rect = MathUtil::Rectangle( - 0, 0, (int)(color_params.width * color_params.res_scale_width), - (int)(color_params.height * color_params.res_scale_height)); - } else { - rect = MathUtil::Rectangle( - 0, (int)(color_params.height * color_params.res_scale_height), - (int)(color_params.width * color_params.res_scale_width), 0); + const FormatTuple& tuple = GetFormatTuple(pixel_format); + + OpenGLState cur_state = OpenGLState::GetCurState(); + GLuint old_tex = cur_state.texture_units[0].texture_2d; + + // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush + OGLTexture unscaled_tex; + if (res_scale_width != 1.f || res_scale_height != 1.f) { + unscaled_tex.Create(); + + AllocateSurfaceTexture(unscaled_tex.handle, tuple, width, height); + BlitTextures(texture.handle, GetScaledRect(), unscaled_tex.handle, GetRect(), type); + + cur_state.texture_units[0].texture_2d = unscaled_tex.handle; + } + else { + cur_state.texture_units[0].texture_2d = texture.handle; + } + cur_state.Apply(); + + glActiveTexture(GL_TEXTURE0); + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[0]); + + cur_state.texture_units[0].texture_2d = old_tex; + cur_state.Apply(); + + gl_buffer_dirty = false; +} + +enum MatchType : int { + Exact = (1 << 0), // Surfaces perfectly match + SubRect = (1 << 1), // Surface encompasses params + Invalid = (1 << 2), // Flag that can be applied to other match types, invalid matches require validation before they can be used + Copy = (1 << 3), // Surface we can copy from + TexCopy = (1 << 4), // Surface that will match a display transfer "texture copy" parameters + All = (1 << 5) - 1, + None = 0, +}; + +constexpr MatchType operator | (MatchType lhs, MatchType rhs) { + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + +/// Get the best surface match (and its match type) for the given flags, higher flag value meaning lower priority +template +std::tuple FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, bool match_res_scale) { + constexpr float MATCH_GOODNESS_RESET = -1.f; + + Surface match_surface = nullptr; + MatchType match_type = MatchType::All; // Starting from lowest possible priority + float best_match_goodness = MATCH_GOODNESS_RESET; + + for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { + for (auto& surface : pair.second) { + const bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height); + const float match_goodness = surface->res_scale_width * surface->res_scale_height; + const MatchType invalid_mask = surface->IsRegionValid(params.GetInterval()) ? MatchType::None : MatchType::Invalid; + + if (!(find_flags & MatchType::Invalid) && invalid_mask == MatchType::Invalid) + continue; + + const auto IsMatch_Helper = [&](MatchType check_type, auto match_fn) { + if (!(find_flags & check_type)) + return false; + + check_type = check_type | invalid_mask; + + // Lower flag value means higher priority + if (match_type < check_type) // We already have a better match type + return true; // Return true to skip to the next surface + + if (!match_fn()) + return false; + + if (!match_res_scale || res_scale_match || surface->type == SurfaceType::Fill) { // Found a match + if (match_type > check_type) { + best_match_goodness = MATCH_GOODNESS_RESET; + match_type = check_type; + } + if (match_goodness > best_match_goodness) { + best_match_goodness = match_goodness; + match_surface = surface; + } + } + return false; + }; + if (IsMatch_Helper(MatchType::Exact, [&] { return surface->ExactMatch(params); })) + continue; + if (IsMatch_Helper(MatchType::SubRect, [&] { return surface->CanSubRect(params); })) + continue; + if (IsMatch_Helper(MatchType::Copy, [&] { return surface->CanCopy(params); })) + continue; + if (IsMatch_Helper(MatchType::TexCopy, [&] { + if (surface->pixel_format == PixelFormat::Invalid || + surface->addr > params.addr || surface->end < params.end || + ((params.addr - surface->addr) * 8) % SurfaceParams::GetFormatBpp(surface->pixel_format) != 0 || + (params.width * 8) % SurfaceParams::GetFormatBpp(surface->pixel_format) != 0 || + (params.stride * 8) % SurfaceParams::GetFormatBpp(surface->pixel_format) != 0) + return false; + + const u32 begin_pixel_index = surface->PixelsInBytes(params.addr - surface->addr); + const int x0 = begin_pixel_index % surface->width; + const int y0 = begin_pixel_index / surface->width; + + if (!surface->is_tiled) + return (surface->PixelsInBytes(params.stride) == surface->stride && + x0 + surface->PixelsInBytes(params.width) <= surface->stride); + + return (surface->PixelsInBytes(params.addr - surface->addr) % 64 == 0 && + surface->PixelsInBytes(params.width) % 64 == 0 && + surface->PixelsInBytes(params.stride) == surface->stride * 8 && + x0 + surface->PixelsInBytes(params.width / 8) <= surface->stride); + })) + continue; } - } else if (color_surface != nullptr) { - rect = color_rect; - } else if (depth_surface != nullptr) { - rect = depth_rect; - } else { - rect = MathUtil::Rectangle(0, 0, 0, 0); + } + return std::make_tuple(match_surface, (match_surface == nullptr) ? MatchType::None : match_type); +} + +RasterizerCacheOpenGL::RasterizerCacheOpenGL() { + transfer_framebuffers[0].Create(); + transfer_framebuffers[1].Create(); +} + +RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { + FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); + transfer_framebuffers[0].Release(); + transfer_framebuffers[1].Release(); +} + +bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, + const MathUtil::Rectangle& src_rect, + const Surface& dst_surface, + const MathUtil::Rectangle& dst_rect) { + if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, + dst_surface->pixel_format)) + return false; + + return BlitTextures(src_surface->texture.handle, src_rect, + dst_surface->texture.handle, dst_rect, + src_surface->type); +} + +Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool match_res_scale, bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return nullptr; + } + + // Check for an exact or subrect match in existing surfaces + Surface surface_match; + MatchType match_type; + std::tie(surface_match, match_type) = + FindMatch(surface_cache, params, match_res_scale); + + if (surface_match != nullptr) { + if (load_if_create && (match_type & MatchType::Invalid)) { + ValidateSurface(surface_match, params.addr, params.size); + } + return surface_match; + } + + ASSERT(params.width == params.stride); // Use GetSurfaceSubRect instead + + Surface new_surface = CreateSurface(params); + if (load_if_create) + ValidateSurface(new_surface, params.addr, params.size); + + RegisterSurface(new_surface); + + return new_surface; +} + +SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, + bool match_res_scale, + bool load_if_create) { + MathUtil::Rectangle out_rect{}; + + if (params.addr == 0 || params.height * params.width == 0) { + return std::make_tuple(nullptr, out_rect); + } + + // Attempt to find encompassing surface + Surface subrect_match; + MatchType match_type; + std::tie(subrect_match, match_type) = FindMatch(surface_cache, params, match_res_scale); + + // Return the best subrect surface if found + if (subrect_match != nullptr) { + out_rect = subrect_match->GetScaledSubRect(params); + // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory. + if (params.is_tiled) + std::swap(out_rect.top, out_rect.bottom); + + if (load_if_create && (match_type & MatchType::Invalid)) + ValidateSurface(subrect_match, params.addr, params.size); + + return std::make_tuple(subrect_match, out_rect); + } + + // No subrect found - create and return a new surface + SurfaceParams new_params = params; + new_params.width = params.stride; // Can't have gaps in a surface + new_params.UpdateParams(); + + out_rect = new_params.GetScaledRect(); + if (new_params.is_tiled) + std::swap(out_rect.top, out_rect.bottom); + + // If stride was bigger than width we need to adjust our output rect + out_rect.right = static_cast(params.width * new_params.res_scale_width); + + Surface new_surface = CreateSurface(new_params); + if (load_if_create) + ValidateSurface(new_surface, new_params.addr, new_params.size); + + RegisterSurface(new_surface); + + return std::make_tuple(new_surface, out_rect); +} + +Surface RasterizerCacheOpenGL::GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config) { + Pica::Texture::TextureInfo info = Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); + + SurfaceParams params; + params.addr = info.physical_address; + params.width = info.width; + params.height = info.height; + params.is_tiled = true; + params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(info.format); + params.UpdateParams(); + return GetSurface(params, false, true); +} + +SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb, + bool using_depth_fb) { + const auto& regs = Pica::g_state.regs; + const auto& config = regs.framebuffer.framebuffer; + + // Make sur that framebuffers don't overlap if both color and depth are being used + u32 fb_area = config.GetWidth() * config.GetHeight(); + bool framebuffers_overlap = config.GetColorBufferPhysicalAddress() != 0 && + config.GetDepthBufferPhysicalAddress() != 0 && + MathUtil::IntervalsIntersect( + config.GetColorBufferPhysicalAddress(), + fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())), + config.GetDepthBufferPhysicalAddress(), + fb_area * Pica::FramebufferRegs::BytesPerDepthPixel(config.depth_format)); + + if (framebuffers_overlap && using_color_fb && using_depth_fb) { + LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!"); + using_depth_fb = false; + } + + // get color and depth surfaces + SurfaceParams color_params; + SurfaceParams depth_params; + color_params.is_tiled = depth_params.is_tiled = true; + + color_params.addr = config.GetColorBufferPhysicalAddress(); + color_params.width = depth_params.width = config.GetWidth(); + color_params.height = depth_params.height = config.GetHeight(); + color_params.pixel_format = SurfaceParams::PixelFormatFromColorFormat(config.color_format); + color_params.UpdateParams(); + + MathUtil::Rectangle rect{}; + Surface color_surface = nullptr; + if (using_color_fb) + std::tie(color_surface, rect) = GetSurfaceSubRect(color_params, true, true); + + depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); + depth_params.addr = config.GetDepthBufferPhysicalAddress(); + depth_params.UpdateParams(); + + Surface depth_surface = nullptr; + if (using_depth_fb && color_surface != nullptr) { + const PAddr validate_addr = depth_params.addr; + const u32 validate_size = depth_params.size; + + // Can't specify separate color and depth viewport offsets in OpenGL, so make sure depth_surface will have the same offsets + depth_params.addr -= color_surface->PixelsInBytes(color_params.addr - color_surface->addr) * depth_params.bytes_per_pixel; + depth_params.height = color_surface->height; + depth_params.UpdateParams(); + + depth_surface = GetSurface(depth_params, true, false); + ValidateSurface(depth_surface, validate_addr, validate_size); + } + else if (using_depth_fb) { + std::tie(depth_surface, rect) = GetSurfaceSubRect(depth_params, true, true); } return std::make_tuple(color_surface, depth_surface, rect); } -CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) { - auto surface_interval = - boost::icl::interval::right_open(config.GetStartAddress(), config.GetEndAddress()); - auto range = surface_cache.equal_range(surface_interval); - for (auto it = range.first; it != range.second; ++it) { - for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { - int bits_per_value = 0; - if (config.fill_24bit) { - bits_per_value = 24; - } else if (config.fill_32bit) { - bits_per_value = 32; - } else { - bits_per_value = 16; - } +Surface RasterizerCacheOpenGL::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) { + Surface new_surface = std::make_shared(); - CachedSurface* surface = it2->get(); + new_surface->addr = config.GetStartAddress(); + new_surface->end = config.GetEndAddress(); + new_surface->size = new_surface->end - new_surface->addr; + new_surface->type = SurfaceType::Fill; + std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); + if (config.fill_32bit) + new_surface->fill_size = 4; + else if (config.fill_24bit) + new_surface->fill_size = 3; + else + new_surface->fill_size = 2; - if (surface->addr == config.GetStartAddress() && - CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value && - (surface->width * surface->height * - CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == - (config.GetEndAddress() - config.GetStartAddress())) { - return surface; - } - } - } - - return nullptr; + RegisterSurface(new_surface); + return new_surface; } -MICROPROFILE_DEFINE(OpenGL_SurfaceDownload, "OpenGL", "Surface Download", MP_RGB(128, 192, 64)); -void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) { - using PixelFormat = CachedSurface::PixelFormat; - using SurfaceType = CachedSurface::SurfaceType; +SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { + MathUtil::Rectangle rect{}; - if (!surface->dirty) { - return; - } + Surface match_surface; + MatchType match_type; + std::tie(match_surface, match_type) = FindMatch(surface_cache, params, false); - MICROPROFILE_SCOPE(OpenGL_SurfaceDownload); + if (match_type & MatchType::Invalid) + ValidateSurface(match_surface, params.addr, params.size); - u8* dst_buffer = Memory::GetPhysicalPointer(surface->addr); - if (dst_buffer == nullptr) { - return; - } + if (match_surface != nullptr) { + SurfaceParams match_subrect = params; + match_subrect.width = match_surface->PixelsInBytes(params.width); + match_subrect.stride = match_surface->PixelsInBytes(params.stride); - OpenGLState cur_state = OpenGLState::GetCurState(); - GLuint old_tex = cur_state.texture_units[0].texture_2d; - - OGLTexture unscaled_tex; - GLuint texture_to_flush = surface->texture.handle; - - // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush - if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) { - unscaled_tex.Create(); - - AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, - surface->height); - BlitTextures( - surface->texture.handle, unscaled_tex.handle, - CachedSurface::GetFormatType(surface->pixel_format), - MathUtil::Rectangle(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()), - MathUtil::Rectangle(0, 0, surface->width, surface->height)); - - texture_to_flush = unscaled_tex.handle; - } - - cur_state.texture_units[0].texture_2d = texture_to_flush; - cur_state.Apply(); - glActiveTexture(GL_TEXTURE0); - - if (!surface->is_tiled) { - // TODO: Ensure this will always be a color format, not a depth or other format - ASSERT((size_t)surface->pixel_format < fb_format_tuples.size()); - const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format]; - - glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->pixel_stride); - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); - } else { - SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format); - if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) { - ASSERT((size_t)surface->pixel_format < fb_format_tuples.size()); - const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format]; - - u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8; - - std::vector temp_gl_buffer(surface->width * surface->height * bytes_per_pixel); - - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); - - // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion - // is necessary. - MortonCopyPixels(surface->pixel_format, surface->width, surface->height, - bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), - false); - } else { - // Depth/Stencil formats need special treatment since they aren't sampleable using - // LookupTexture and can't use RGBA format - size_t tuple_idx = (size_t)surface->pixel_format - 14; - ASSERT(tuple_idx < depth_format_tuples.size()); - const FormatTuple& tuple = depth_format_tuples[tuple_idx]; - - u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8; - - // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type - bool use_4bpp = (surface->pixel_format == PixelFormat::D24); - - u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; - - std::vector temp_gl_buffer(surface->width * surface->height * gl_bytes_per_pixel); - - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); - - u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data(); - - MortonCopyPixels(surface->pixel_format, surface->width, surface->height, - bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, - false); + if (match_surface->is_tiled) { + match_subrect.width /= 8; + match_subrect.stride /= 8; + match_subrect.height *= 8; } + + rect = match_surface->GetScaledSubRect(match_subrect); + if (match_surface->is_tiled) + std::swap(rect.top, rect.bottom); } - surface->dirty = false; - - cur_state.texture_units[0].texture_2d = old_tex; - cur_state.Apply(); + return std::make_tuple(match_surface, rect); } -void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, - bool invalidate) { - if (size == 0) { +void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { + if (size == 0) + return; + + bool upload_texture = false; + const auto validate_interval = (surface->type != SurfaceType::Texture) ? + SurfaceInterval::right_open(addr, addr + size) : + surface->GetInterval(); + + if (surface->type == SurfaceType::Fill) { + // Sanity check, fill surfaces will always be valid when used + ASSERT(surface->IsRegionValid(validate_interval)); return; } - // Gather up unique surfaces that touch the region - std::unordered_set> touching_surfaces; + for (;;) { + const auto it = surface->invalid_regions.find(validate_interval); + if (it == surface->invalid_regions.end()) + break; - auto surface_interval = boost::icl::interval::right_open(addr, addr + size); - auto cache_upper_bound = surface_cache.upper_bound(surface_interval); - for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { - std::copy_if(it->second.begin(), it->second.end(), - std::inserter(touching_surfaces, touching_surfaces.end()), - [skip_surface](std::shared_ptr surface) { - return (surface.get() != skip_surface); - }); - } + const auto interval = *it & validate_interval; + const PAddr interval_start = boost::icl::first(interval); + const PAddr interval_end = boost::icl::last_next(interval); - // Flush and invalidate surfaces - for (auto surface : touching_surfaces) { - FlushSurface(surface.get()); - if (invalidate) { - Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1); - surface_cache.subtract( - std::make_pair(boost::icl::interval::right_open( - surface->addr, surface->addr + surface->size), - std::set>({surface}))); + // Look for a valid surface to blit + SurfaceParams params = *surface; + const u32 pixel_offset = params.PixelsInBytes(interval_start - params.addr); + if (!params.is_tiled) { + params.addr += (pixel_offset - (pixel_offset % params.width)) * + SurfaceParams::GetFormatBpp(params.pixel_format) / 8; // Start of the row + params.height = (params.PixelsInBytes(interval_end - params.addr - 1) / params.width) + 1; } + else { + params.addr += (pixel_offset - (pixel_offset % (params.width * 8))) * + SurfaceParams::GetFormatBpp(params.pixel_format) / 8; // Start of the tiled row + params.height = ((params.PixelsInBytes(interval_end - params.addr - 1) / (params.width * 8)) + 1) * 8; + } + params.UpdateParams(); + + Surface match_surface; + MatchType match_type; + std::tie(match_surface, match_type) = + FindMatch(surface_cache, params, true); + + if (match_type == MatchType::Copy) { + // Need to call CopySurface and possibly create a new one first, which GetSurface will do for us + if (params.GetInterval() == surface->GetInterval()) { + CopySurface(match_surface, surface); + surface->invalid_regions.clear(); + return; + } + Surface tmp_surface = GetSurface(params, true, false); + if (tmp_surface != nullptr) + CopySurface(match_surface, tmp_surface); + match_surface = tmp_surface; + } + + if (match_surface != nullptr) { + const auto src_rect = (match_type == MatchType::SubRect) ? + match_surface->GetScaledSubRect(params) : + match_surface->GetScaledRect(); + const auto dest_rect = surface->GetScaledSubRect(params); + + BlitSurfaces(match_surface, src_rect, surface, dest_rect); + surface->gl_buffer_dirty = true; + + surface->invalid_regions.erase(params.GetInterval()); + continue; + } + + // Load data from 3DS memory + FlushRegion(interval_start, interval_end - interval_start); + surface->DownloadGLTexture(); + surface->LoadGLBuffer(interval_start, interval_end); + upload_texture = true; + + surface->invalid_regions.erase(interval); } + + if (upload_texture) + surface->UploadGLTexture(); +} + +void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size) { + if (size == 0) + return; + + const auto flush_interval = SurfaceInterval::right_open(addr, addr + size); + for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { + const auto interval = pair.first & flush_interval; + auto& surface = pair.second; + + // Sanity check, this surface is the last one that marked this region dirty + ASSERT(surface->IsRegionValid(interval)); + surface->DownloadGLTexture(); + surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); + } + + // Reset dirty regions + dirty_regions.erase(flush_interval); } void RasterizerCacheOpenGL::FlushAll() { - for (auto& surfaces : surface_cache) { - for (auto& surface : surfaces.second) { - FlushSurface(surface.get()); + FlushRegion(0, 0xFFFFFFFF); +} + +void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) { + if (size == 0) + return; + + const auto invalid_interval = SurfaceInterval::right_open(addr, addr + size); + + if (region_owner != nullptr) { + ASSERT(region_owner->type != SurfaceType::Texture); + ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); + ASSERT(region_owner->width == region_owner->stride); // Surfaces can't have a gap + region_owner->gl_buffer_dirty = true; + region_owner->invalid_regions.erase(invalid_interval); + } + + SurfaceSet remove_surfaces; + + for (auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { + for (auto& cached_surface : pair.second) { + if (cached_surface == region_owner) + continue; + + // If cpu is invalidating this region we want to remove it + // to (likely) mark the memory pages as uncached + // but before that we have to flush its region that is still valid + if (region_owner == nullptr) { + const auto flush_intervals = SurfaceRegions(cached_surface->GetInterval()) - invalid_interval; + for (const auto& interval : flush_intervals) { + FlushRegion(boost::icl::first(interval), boost::icl::length(interval)); + } + remove_surfaces.emplace(cached_surface); + continue; + } + + const auto interval = cached_surface->GetInterval() & invalid_interval; + + cached_surface->invalid_regions.insert(interval); + + // Remove only "empty" fill surfaces to avoid destroying and recreating OGL textures + if (cached_surface->type == SurfaceType::Fill && + !cached_surface->IsRegionPartiallyValid(cached_surface->GetInterval())) + remove_surfaces.emplace(cached_surface); } } + + if (region_owner != nullptr) + dirty_regions.set(std::make_pair(invalid_interval, region_owner)); + else + dirty_regions.erase(invalid_interval); + + for (auto& remove_surface : remove_surfaces) + UnregisterSurface(remove_surface); +} + +Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { + Surface surface = std::make_shared(); + static_cast(*surface) = params; + + surface->texture.Create(); + + // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type + surface->gl_bytes_per_pixel = + (surface->pixel_format == PixelFormat::D24 || surface->type == SurfaceType::Texture) ? + 4 : + surface->bytes_per_pixel; + + surface->gl_buffer_offset = (surface->pixel_format == PixelFormat::D24) ? 1 : 0; + + surface->gl_buffer_dirty = false; + surface->invalid_regions.insert(surface->GetInterval()); + AllocateSurfaceTexture(surface->texture.handle, + GetFormatTuple(surface->pixel_format), + surface->GetScaledWidth(), + surface->GetScaledHeight()); + + return surface; +} + +void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { + surface_cache.add(std::make_pair(surface->GetInterval(), SurfaceSet({ surface }))); + UpdatePagesCachedCount(surface->addr, surface->size, 1); +} + +void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { + UpdatePagesCachedCount(surface->addr, surface->size, -1); + surface_cache.subtract(std::make_pair(surface->GetInterval(), SurfaceSet({ surface }))); +} + +void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) { + const u32 num_pages = ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1; + const u32 page_start = addr >> Memory::PAGE_BITS; + const u32 page_end = page_start + num_pages; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to subtract after iterating + const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) + cached_pages.add(std::make_pair(pages_interval, delta)); + + for (auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const PAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; + const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const u32 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); + else if (delta < 0 && count == -delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + else + ASSERT(count >= 0); + } + + if (delta < 0) + cached_pages.add(std::make_pair(pages_interval, delta)); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index aea20c693..4b84e8ad6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -12,6 +12,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-local-typedef" #endif +#include #include #ifdef __GNUC__ #pragma GCC diagnostic pop @@ -20,21 +21,32 @@ #include "common/assert.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/math_util.h" #include "core/hw/gpu.h" #include "video_core/regs_framebuffer.h" #include "video_core/regs_texturing.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -namespace MathUtil { -template -struct Rectangle; -} - struct CachedSurface; +using Surface = std::shared_ptr; +using SurfaceSet = std::set; -using SurfaceCache = boost::icl::interval_map>>; +using SurfaceRegions = boost::icl::interval_set; +using SurfaceMap = boost::icl::interval_map; +using SurfaceCache = boost::icl::interval_map; + +using SurfaceInterval = SurfaceCache::interval_type; +static_assert(std::is_same() && + std::is_same(), "incorrect interval types"); + +using SurfaceRect_Tuple = std::tuple>; +using SurfaceSurfaceRect_Tuple = std::tuple>; + +using PageMap = boost::icl::interval_map; + +struct SurfaceParams { + explicit SurfaceParams(); -struct CachedSurface { enum class PixelFormat { // First 5 formats are shared between textures and color buffers RGBA8 = 0, @@ -68,10 +80,11 @@ struct CachedSurface { Texture = 1, Depth = 2, DepthStencil = 3, - Invalid = 4, + Fill = 4, + Invalid = 5 }; - static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { + static unsigned int GetFormatBpp(SurfaceParams::PixelFormat format) { static const std::array bpp_table = { 32, // RGBA8 24, // RGB8 @@ -162,31 +175,98 @@ struct CachedSurface { return SurfaceType::Invalid; } + /// Update the params "size", "end", "bytes_per_pixel" and "type" from the already set "addr", "width", "height" and "pixel_format" + void UpdateParams() { + size = width * height * GetFormatBpp(pixel_format) / 8; + + if (stride == 0) + stride = width; + else + size += (stride - width) * (height - 1) * GetFormatBpp(pixel_format) / 8; + + end = addr + size; + type = GetFormatType(pixel_format); + bytes_per_pixel = GetFormatBpp(pixel_format) / 8; + } + + SurfaceInterval GetInterval() const { + return SurfaceInterval::right_open(addr, end); + } + u32 GetScaledWidth() const { - return (u32)(width * res_scale_width); + return static_cast(width * res_scale_width); } u32 GetScaledHeight() const { - return (u32)(height * res_scale_height); + return static_cast(height * res_scale_height); } - PAddr addr; - u32 size; + MathUtil::Rectangle GetRect() const { + return MathUtil::Rectangle(0, 0, width, height); + } - PAddr min_valid; - PAddr max_valid; + MathUtil::Rectangle GetScaledRect() const { + return MathUtil::Rectangle(0, 0, GetScaledWidth(), GetScaledHeight()); + } - OGLTexture texture; - u32 width; - u32 height; - /// Stride between lines, in pixels. Only valid for images in linear format. - u32 pixel_stride = 0; + u32 PixelsInBytes(u32 size) const { + return size * 8 / GetFormatBpp(pixel_format); + } + + PAddr addr = 0; + PAddr end = 0; + u32 size = 0; + + u32 width = 0; + u32 height = 0; + u32 stride = 0; float res_scale_width = 1.f; float res_scale_height = 1.f; - bool is_tiled; - PixelFormat pixel_format; - bool dirty; + bool is_tiled = false; + u32 bytes_per_pixel = 0; + PixelFormat pixel_format = PixelFormat::Invalid; + SurfaceType type = SurfaceType::Invalid; +}; + +struct CachedSurface : SurfaceParams { + bool ExactMatch(const SurfaceParams& other_surface) const; + bool CanSubRect(const SurfaceParams& sub_surface) const; + bool CanCopy(const SurfaceParams& dest_surface) const; + + MathUtil::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; + MathUtil::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; + + bool IsRegionValid(const SurfaceInterval& interval) const { + return (invalid_regions.find(interval) == invalid_regions.end()); + } + + bool IsRegionPartiallyValid(const SurfaceInterval& interval) const { + const auto it = invalid_regions.find(interval); + if (it == invalid_regions.end()) + return true; + return ((boost::icl::first(*it) > addr) || (boost::icl::last_next(*it) < end)); + } + + SurfaceRegions invalid_regions; + + u32 fill_size = 0; /// Number of bytes to read from fill_data + std::array fill_data; + + OGLTexture texture; + + u32 gl_bytes_per_pixel; + int gl_buffer_offset; + std::vector gl_buffer; + bool gl_buffer_dirty; + + // Read/Write data in 3DS memory to/from gl_buffer + void LoadGLBuffer(PAddr load_start, PAddr load_end); + void FlushGLBuffer(PAddr flush_start, PAddr flush_end); + + // Upload/Download data in gl_buffer in/to this surface's texture + void UploadGLTexture(); + void DownloadGLTexture(); }; class RasterizerCacheOpenGL : NonCopyable { @@ -194,46 +274,56 @@ public: RasterizerCacheOpenGL(); ~RasterizerCacheOpenGL(); - /// Blits one texture to another - void BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, - const MathUtil::Rectangle& src_rect, - const MathUtil::Rectangle& dst_rect); + /// Blit one surface's texture to another + bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle& src_rect, + const Surface& dst_surface, const MathUtil::Rectangle& dst_rect); - /// Attempt to blit one surface's texture to another - bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle& src_rect, - CachedSurface* dst_surface, const MathUtil::Rectangle& dst_rect); - - /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) - CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, - bool load_if_create); + /// Load a texture from 3DS memory to OpenGL and cache it (if not already cached) + Surface GetSurface(const SurfaceParams& params, bool match_res_scale, bool load_if_create); /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from /// 3DS memory to OpenGL and caches it (if not already cached) - CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, - bool load_if_create, MathUtil::Rectangle& out_rect); + SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, bool match_res_scale, + bool load_if_create); - /// Gets a surface based on the texture configuration - CachedSurface* GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); + /// Get a surface based on the texture configuration + Surface GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); - /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer - /// configuration - std::tuple> GetFramebufferSurfaces( - const Pica::FramebufferRegs::FramebufferConfig& config); + /// Get the color and depth surfaces based on the framebuffer configuration + SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb); - /// Attempt to get a surface that exactly matches the fill region and format - CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config); + /// Get a surface that matches the fill config + Surface GetFillSurface(const GPU::Regs::MemoryFillConfig& config); - /// Write the surface back to memory - void FlushSurface(CachedSurface* surface); + /// Get a surface that matches a "texture copy" display transfer config + SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); - /// Write any cached resources overlapping the region back to memory (if dirty) and optionally - /// invalidate them in the cache - void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate); + /// Write any cached resources overlapping the region back to memory (if dirty) + void FlushRegion(PAddr addr, u32 size); + + /// Mark region as being invalidated by region_owner (nullptr if 3DS memory) + void InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner); /// Flush all cached resources tracked by this cache manager void FlushAll(); private: + /// Update surface's texture for given region when necessary + void ValidateSurface(const Surface& surface, PAddr addr, u32 size); + + /// Create a new surface + Surface CreateSurface(const SurfaceParams& params); + + /// Register surface into the cache + void RegisterSurface(const Surface& surface); + + /// Remove surface from the cache + void UnregisterSurface(const Surface& surface); + + /// Increase/decrease the number of surface in pages touching the specified region + void UpdatePagesCachedCount(PAddr addr, u32 size, int delta); + SurfaceCache surface_cache; - OGLFramebuffer transfer_framebuffers[2]; + SurfaceMap dirty_regions; + PageMap cached_pages; }; diff --git a/src/video_core/swrasterizer/swrasterizer.h b/src/video_core/swrasterizer/swrasterizer.h index 6d42d7409..6c524f013 100644 --- a/src/video_core/swrasterizer/swrasterizer.h +++ b/src/video_core/swrasterizer/swrasterizer.h @@ -22,6 +22,7 @@ class SWRasterizer : public RasterizerInterface { void NotifyPicaRegisterChanged(u32 id) override {} void FlushAll() override {} void FlushRegion(PAddr addr, u32 size) override {} + void InvalidateRegion(PAddr addr, u32 size) override {} void FlushAndInvalidateRegion(PAddr addr, u32 size) override {} }; } From 96861c6347cfdbc4a2a017153fdb20fdb66096c8 Mon Sep 17 00:00:00 2001 From: Phantom Date: Tue, 26 Sep 2017 22:22:23 +0200 Subject: [PATCH 2/5] temp scale fix --- .../renderer_opengl/gl_rasterizer.cpp | 13 +- .../renderer_opengl/gl_rasterizer_cache.cpp | 206 ++++++++++-------- .../renderer_opengl/gl_rasterizer_cache.h | 14 +- 3 files changed, 136 insertions(+), 97 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b1adc156e..75fbc70a9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -961,13 +961,16 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe MathUtil::Rectangle src_rect; Surface src_surface; - std::tie(src_surface, src_rect) = res_cache.GetSurfaceSubRect(src_params, false, true); + std::tie(src_surface, src_rect) = res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); if (src_surface == nullptr) return false; + dst_params.res_scale_width = src_surface->res_scale_width; + dst_params.res_scale_height = src_surface->res_scale_height; + MathUtil::Rectangle dst_rect; Surface dst_surface; - std::tie(dst_surface, dst_rect) = res_cache.GetSurfaceSubRect(dst_params, false, false); + std::tie(dst_surface, dst_rect) = res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, false); if (dst_surface == nullptr) return false; @@ -1016,12 +1019,14 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon dst_params.stride = (output_width + output_gap) * src_surface->stride / src_params.stride; dst_params.width = output_width * src_surface->stride / src_params.stride; dst_params.height = src_surface->is_tiled ? src_params.height * 8 : src_params.height; + dst_params.res_scale_width = src_surface->res_scale_width; + dst_params.res_scale_height = src_surface->res_scale_height; dst_params.UpdateParams(); const bool load_gap = output_gap != 0; // Since we are going to invalidate the gap if there is one, we will have to load it first MathUtil::Rectangle dst_rect; Surface dst_surface; - std::tie(dst_surface, dst_rect) = res_cache.GetSurfaceSubRect(dst_params, false, load_gap); + std::tie(dst_surface, dst_rect) = res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, load_gap); if (src_surface == nullptr) return false; @@ -1060,7 +1065,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con MathUtil::Rectangle src_rect; Surface src_surface; - std::tie(src_surface, src_rect) = res_cache.GetSurfaceSubRect(src_params, false, true); + std::tie(src_surface, src_rect) = res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); if (src_surface == nullptr) { return false; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 87edca9d3..2fdddf2bb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -313,6 +313,7 @@ static bool FillSurface(const Surface& surface, const u8* fill_data) { } SurfaceParams::SurfaceParams() { + // todo: metroid needs that for now, "breaks" other stuff // Set the internal resolution, assume the same scaling factor for top and bottom screens float resolution_scale_factor = Settings::values.resolution_factor; if (resolution_scale_factor == 0.0f) { @@ -394,6 +395,28 @@ bool CachedSurface::CanCopy(const SurfaceParams& dest_surface) const { return false; } +bool CachedSurface::CanTexCopy(const SurfaceParams& texcopy_params) const { + if (pixel_format == PixelFormat::Invalid || + addr > texcopy_params.addr || end < texcopy_params.end || + ((texcopy_params.addr - addr) * 8) % SurfaceParams::GetFormatBpp(pixel_format) != 0 || + (texcopy_params.width * 8) % SurfaceParams::GetFormatBpp(pixel_format) != 0 || + (texcopy_params.stride * 8) % SurfaceParams::GetFormatBpp(pixel_format) != 0) + return false; + + const u32 begin_pixel_index = PixelsInBytes(texcopy_params.addr - addr); + const int x0 = begin_pixel_index % width; + const int y0 = begin_pixel_index / width; + + if (!is_tiled) + return (PixelsInBytes(texcopy_params.stride) == stride && + x0 + PixelsInBytes(texcopy_params.width) <= stride); + + return (PixelsInBytes(texcopy_params.addr - addr) % 64 == 0 && + PixelsInBytes(texcopy_params.width) % 64 == 0 && + PixelsInBytes(texcopy_params.stride) == stride * 8 && + x0 + PixelsInBytes(texcopy_params.width / 8) <= stride); +} + static void CopySurface(const Surface& src_surface, const Surface& dest_surface) { if (src_surface == dest_surface) return; @@ -426,6 +449,13 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { if (texture_src_data == nullptr) return; + //TODO: Should probably be done in ::Memory:: and check for other regions too + if (load_start <= Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END) + load_end = Memory::VRAM_VADDR_END; + + if (load_start < Memory::VRAM_VADDR && load_end >= Memory::VRAM_VADDR) + load_start = Memory::VRAM_VADDR; + MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); ASSERT(load_start >= addr && load_end <= end); @@ -474,6 +504,14 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { if (dst_buffer == nullptr) return; + //TODO: Should probably be done in ::Memory:: and check for other regions too + //same as loadglbuffer() + if (flush_start <= Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) + flush_end = Memory::VRAM_VADDR_END; + + if (flush_start < Memory::VRAM_VADDR && flush_end >= Memory::VRAM_VADDR) + flush_start = Memory::VRAM_VADDR; + MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); ASSERT(flush_start >= addr && flush_end <= end); @@ -593,8 +631,9 @@ enum MatchType : int { SubRect = (1 << 1), // Surface encompasses params Invalid = (1 << 2), // Flag that can be applied to other match types, invalid matches require validation before they can be used Copy = (1 << 3), // Surface we can copy from - TexCopy = (1 << 4), // Surface that will match a display transfer "texture copy" parameters - All = (1 << 5) - 1, + //PartialCopy = (1 << 4), // Surface we can partialy copy from + TexCopy = (1 << 5), // Surface that will match a display transfer "texture copy" parameters + All = (1 << 6) - 1, None = 0, }; @@ -603,40 +642,37 @@ constexpr MatchType operator | (MatchType lhs, MatchType rhs) { } /// Get the best surface match (and its match type) for the given flags, higher flag value meaning lower priority +/// NEW:: no more match type priority, only region validity matters and then scaling comparison template -std::tuple FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, bool match_res_scale) { +Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, ScaleMatch match_res_scale) { constexpr float MATCH_GOODNESS_RESET = -1.f; Surface match_surface = nullptr; - MatchType match_type = MatchType::All; // Starting from lowest possible priority + bool match_valid = false; float best_match_goodness = MATCH_GOODNESS_RESET; for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { for (auto& surface : pair.second) { - const bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height); + const bool res_scale_match = match_res_scale == ScaleMatch::Exact ? + (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height) : + (params.res_scale_width <= surface->res_scale_width && params.res_scale_height <= surface->res_scale_height); const float match_goodness = surface->res_scale_width * surface->res_scale_height; - const MatchType invalid_mask = surface->IsRegionValid(params.GetInterval()) ? MatchType::None : MatchType::Invalid; + const bool is_valid = surface->IsRegionValid(params.GetInterval()); - if (!(find_flags & MatchType::Invalid) && invalid_mask == MatchType::Invalid) + if (!(find_flags & MatchType::Invalid) && !is_valid) continue; const auto IsMatch_Helper = [&](MatchType check_type, auto match_fn) { if (!(find_flags & check_type)) return false; - check_type = check_type | invalid_mask; - - // Lower flag value means higher priority - if (match_type < check_type) // We already have a better match type - return true; // Return true to skip to the next surface - if (!match_fn()) return false; - if (!match_res_scale || res_scale_match || surface->type == SurfaceType::Fill) { // Found a match - if (match_type > check_type) { + if (match_res_scale == ScaleMatch::Ignore || res_scale_match || surface->type == SurfaceType::Fill) { // Found a match + if (is_valid && !match_valid) { best_match_goodness = MATCH_GOODNESS_RESET; - match_type = check_type; + match_valid = true; } if (match_goodness > best_match_goodness) { best_match_goodness = match_goodness; @@ -645,37 +681,13 @@ std::tuple FindMatch(const SurfaceCache& surface_cache, cons } return false; }; - if (IsMatch_Helper(MatchType::Exact, [&] { return surface->ExactMatch(params); })) - continue; - if (IsMatch_Helper(MatchType::SubRect, [&] { return surface->CanSubRect(params); })) - continue; - if (IsMatch_Helper(MatchType::Copy, [&] { return surface->CanCopy(params); })) - continue; - if (IsMatch_Helper(MatchType::TexCopy, [&] { - if (surface->pixel_format == PixelFormat::Invalid || - surface->addr > params.addr || surface->end < params.end || - ((params.addr - surface->addr) * 8) % SurfaceParams::GetFormatBpp(surface->pixel_format) != 0 || - (params.width * 8) % SurfaceParams::GetFormatBpp(surface->pixel_format) != 0 || - (params.stride * 8) % SurfaceParams::GetFormatBpp(surface->pixel_format) != 0) - return false; - - const u32 begin_pixel_index = surface->PixelsInBytes(params.addr - surface->addr); - const int x0 = begin_pixel_index % surface->width; - const int y0 = begin_pixel_index / surface->width; - - if (!surface->is_tiled) - return (surface->PixelsInBytes(params.stride) == surface->stride && - x0 + surface->PixelsInBytes(params.width) <= surface->stride); - - return (surface->PixelsInBytes(params.addr - surface->addr) % 64 == 0 && - surface->PixelsInBytes(params.width) % 64 == 0 && - surface->PixelsInBytes(params.stride) == surface->stride * 8 && - x0 + surface->PixelsInBytes(params.width / 8) <= surface->stride); - })) - continue; + IsMatch_Helper(MatchType::Exact, [&] { return surface->ExactMatch(params); }); + IsMatch_Helper(MatchType::SubRect, [&] { return surface->CanSubRect(params); }); + IsMatch_Helper(MatchType::Copy, [&] { return surface->CanCopy(params); }); + IsMatch_Helper(MatchType::TexCopy, [&] { return surface->CanTexCopy(params); }); } } - return std::make_tuple(match_surface, (match_surface == nullptr) ? MatchType::None : match_type); + return match_surface; } RasterizerCacheOpenGL::RasterizerCacheOpenGL() { @@ -704,19 +716,16 @@ bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, src_surface->type); } -Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool match_res_scale, bool load_if_create) { +Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, bool load_if_create) { if (params.addr == 0 || params.height * params.width == 0) { return nullptr; } // Check for an exact or subrect match in existing surfaces - Surface surface_match; - MatchType match_type; - std::tie(surface_match, match_type) = - FindMatch(surface_cache, params, match_res_scale); + Surface surface_match = FindMatch(surface_cache, params, match_res_scale); if (surface_match != nullptr) { - if (load_if_create && (match_type & MatchType::Invalid)) { + if (load_if_create) { ValidateSurface(surface_match, params.addr, params.size); } return surface_match; @@ -734,7 +743,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool matc } SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, - bool match_res_scale, + ScaleMatch match_res_scale, bool load_if_create) { MathUtil::Rectangle out_rect{}; @@ -743,20 +752,19 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& } // Attempt to find encompassing surface - Surface subrect_match; - MatchType match_type; - std::tie(subrect_match, match_type) = FindMatch(surface_cache, params, match_res_scale); + Surface subrect_match = FindMatch(surface_cache, params, match_res_scale); // Return the best subrect surface if found if (subrect_match != nullptr) { + if (load_if_create) { + ValidateSurface(subrect_match, params.addr, params.size); + } + out_rect = subrect_match->GetScaledSubRect(params); // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory. if (params.is_tiled) std::swap(out_rect.top, out_rect.bottom); - if (load_if_create && (match_type & MatchType::Invalid)) - ValidateSurface(subrect_match, params.addr, params.size); - return std::make_tuple(subrect_match, out_rect); } @@ -791,7 +799,13 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Pica::TexturingRegs::Full params.is_tiled = true; params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(info.format); params.UpdateParams(); - return GetSurface(params, false, true); + return GetSurface(params, ScaleMatch::Ignore, true); +} + +constexpr float GetResolutionScaleFactor() { + return Settings::values.resolution_factor == 0.0f ? + VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() : + Settings::values.resolution_factor; } SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb, @@ -819,19 +833,31 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool usin SurfaceParams depth_params; color_params.is_tiled = depth_params.is_tiled = true; + // update resolution_scale_factor and reset cache if changed + static float resolution_scale_factor = GetResolutionScaleFactor(); + if (resolution_scale_factor != GetResolutionScaleFactor()) { + resolution_scale_factor = GetResolutionScaleFactor(); + FlushAll(); + InvalidateRegion(0, 0xffffffff, nullptr); + } + color_params.addr = config.GetColorBufferPhysicalAddress(); color_params.width = depth_params.width = config.GetWidth(); color_params.height = depth_params.height = config.GetHeight(); color_params.pixel_format = SurfaceParams::PixelFormatFromColorFormat(config.color_format); + color_params.res_scale_height = resolution_scale_factor; + color_params.res_scale_width = resolution_scale_factor; color_params.UpdateParams(); MathUtil::Rectangle rect{}; Surface color_surface = nullptr; if (using_color_fb) - std::tie(color_surface, rect) = GetSurfaceSubRect(color_params, true, true); + std::tie(color_surface, rect) = GetSurfaceSubRect(color_params, ScaleMatch::Exact, true); depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); depth_params.addr = config.GetDepthBufferPhysicalAddress(); + depth_params.res_scale_height = resolution_scale_factor; + depth_params.res_scale_width = resolution_scale_factor; depth_params.UpdateParams(); Surface depth_surface = nullptr; @@ -844,11 +870,11 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool usin depth_params.height = color_surface->height; depth_params.UpdateParams(); - depth_surface = GetSurface(depth_params, true, false); + depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); ValidateSurface(depth_surface, validate_addr, validate_size); } else if (using_depth_fb) { - std::tie(depth_surface, rect) = GetSurfaceSubRect(depth_params, true, true); + std::tie(depth_surface, rect) = GetSurfaceSubRect(depth_params, ScaleMatch::Exact, true); } return std::make_tuple(color_surface, depth_surface, rect); @@ -876,14 +902,11 @@ Surface RasterizerCacheOpenGL::GetFillSurface(const GPU::Regs::MemoryFillConfig& SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { MathUtil::Rectangle rect{}; - Surface match_surface; - MatchType match_type; - std::tie(match_surface, match_type) = FindMatch(surface_cache, params, false); - - if (match_type & MatchType::Invalid) - ValidateSurface(match_surface, params.addr, params.size); + Surface match_surface = FindMatch(surface_cache, params, ScaleMatch::Ignore); if (match_surface != nullptr) { + ValidateSurface(match_surface, params.addr, params.size); + SurfaceParams match_subrect = params; match_subrect.width = match_surface->PixelsInBytes(params.width); match_subrect.stride = match_surface->PixelsInBytes(params.stride); @@ -941,28 +964,26 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, } params.UpdateParams(); - Surface match_surface; - MatchType match_type; - std::tie(match_surface, match_type) = - FindMatch(surface_cache, params, true); - - if (match_type == MatchType::Copy) { - // Need to call CopySurface and possibly create a new one first, which GetSurface will do for us - if (params.GetInterval() == surface->GetInterval()) { - CopySurface(match_surface, surface); - surface->invalid_regions.clear(); - return; - } - Surface tmp_surface = GetSurface(params, true, false); - if (tmp_surface != nullptr) - CopySurface(match_surface, tmp_surface); - match_surface = tmp_surface; - } + Surface match_surface = FindMatch(surface_cache, params, ScaleMatch::Ignore); if (match_surface != nullptr) { - const auto src_rect = (match_type == MatchType::SubRect) ? - match_surface->GetScaledSubRect(params) : - match_surface->GetScaledRect(); + if (!match_surface->CanSubRect(params)) { + // Need to call CopySurface and possibly create a new one first, which GetSurface will do for us + if (params.GetInterval() == surface->GetInterval()) { + CopySurface(match_surface, surface); + surface->invalid_regions.clear(); + return; + } + Surface tmp_surface = GetSurface(params, ScaleMatch::Upscale, false); + if (tmp_surface != nullptr) { + CopySurface(match_surface, tmp_surface); + tmp_surface->invalid_regions.erase(params.GetInterval()); + match_surface = tmp_surface; + } + } + + ASSERT(match_surface->CanSubRect(params)); + const auto src_rect = match_surface->GetScaledSubRect(params); const auto dest_rect = surface->GetScaledSubRect(params); BlitSurfaces(match_surface, src_rect, surface, dest_rect); @@ -985,7 +1006,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, surface->UploadGLTexture(); } -void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size) { +void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { if (size == 0) return; @@ -994,6 +1015,9 @@ void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size) { const auto interval = pair.first & flush_interval; auto& surface = pair.second; + if (flush_surface != nullptr && surface != flush_surface) + continue; + // Sanity check, this surface is the last one that marked this region dirty ASSERT(surface->IsRegionValid(interval)); surface->DownloadGLTexture(); @@ -1033,9 +1057,11 @@ void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface // to (likely) mark the memory pages as uncached // but before that we have to flush its region that is still valid if (region_owner == nullptr) { + // If that surface has modified data outside of the invalidated range + // have to flush it first const auto flush_intervals = SurfaceRegions(cached_surface->GetInterval()) - invalid_interval; for (const auto& interval : flush_intervals) { - FlushRegion(boost::icl::first(interval), boost::icl::length(interval)); + FlushRegion(boost::icl::first(interval), boost::icl::length(interval), cached_surface); } remove_surfaces.emplace(cached_surface); continue; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 4b84e8ad6..1249b6e42 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -44,6 +44,12 @@ using SurfaceSurfaceRect_Tuple = std::tuple; +enum class ScaleMatch { + Exact, // only accept same res scale + Upscale, // only allow higher scale than params + Ignore // accept every scaled res +}; + struct SurfaceParams { explicit SurfaceParams(); @@ -233,6 +239,8 @@ struct CachedSurface : SurfaceParams { bool ExactMatch(const SurfaceParams& other_surface) const; bool CanSubRect(const SurfaceParams& sub_surface) const; bool CanCopy(const SurfaceParams& dest_surface) const; + //bool CanPartialCopy(const SurfaceParams& dest_surface) const; + bool CanTexCopy(const SurfaceParams& texcopy_params) const; MathUtil::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; MathUtil::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; @@ -279,11 +287,11 @@ public: const Surface& dst_surface, const MathUtil::Rectangle& dst_rect); /// Load a texture from 3DS memory to OpenGL and cache it (if not already cached) - Surface GetSurface(const SurfaceParams& params, bool match_res_scale, bool load_if_create); + Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, bool load_if_create); /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from /// 3DS memory to OpenGL and caches it (if not already cached) - SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, bool match_res_scale, + SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, bool load_if_create); /// Get a surface based on the texture configuration @@ -299,7 +307,7 @@ public: SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); /// Write any cached resources overlapping the region back to memory (if dirty) - void FlushRegion(PAddr addr, u32 size); + void FlushRegion(PAddr addr, u32 size, Surface flush_surface = nullptr); /// Mark region as being invalidated by region_owner (nullptr if 3DS memory) void InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner); From 926abc6ddb51b92a7898b1a5457d70209d7ae389 Mon Sep 17 00:00:00 2001 From: Phantom Date: Fri, 6 Oct 2017 17:33:00 +0200 Subject: [PATCH 3/5] integer res_scale and cleanup --- src/citra/config.cpp | 2 +- src/citra_qt/configuration/config.cpp | 3 +- .../configuration/configure_graphics.cpp | 74 +--------- src/core/frontend/framebuffer_layout.cpp | 4 +- src/core/frontend/framebuffer_layout.h | 2 +- src/core/settings.h | 2 +- .../renderer_opengl/gl_rasterizer.cpp | 47 +++---- .../renderer_opengl/gl_rasterizer.h | 4 +- .../renderer_opengl/gl_rasterizer_cache.cpp | 128 ++++++++---------- .../renderer_opengl/gl_rasterizer_cache.h | 31 ++--- .../renderer_opengl/gl_shader_gen.cpp | 2 +- 11 files changed, 99 insertions(+), 200 deletions(-) diff --git a/src/citra/config.cpp b/src/citra/config.cpp index 45c28ad09..72bda0be0 100644 --- a/src/citra/config.cpp +++ b/src/citra/config.cpp @@ -88,7 +88,7 @@ void Config::ReadValues() { Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true); Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true); Settings::values.resolution_factor = - (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); + static_cast(sdl2_config->GetInteger("Renderer", "resolution_factor", 1)); Settings::values.use_vsync = sdl2_config->GetBoolean("Renderer", "use_vsync", false); Settings::values.toggle_framelimit = sdl2_config->GetBoolean("Renderer", "toggle_framelimit", true); diff --git a/src/citra_qt/configuration/config.cpp b/src/citra_qt/configuration/config.cpp index 5261f4c4c..718f305dc 100644 --- a/src/citra_qt/configuration/config.cpp +++ b/src/citra_qt/configuration/config.cpp @@ -73,7 +73,8 @@ void Config::ReadValues() { qt_config->beginGroup("Renderer"); Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", true).toBool(); Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool(); - Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat(); + Settings::values.resolution_factor = + static_cast(qt_config->value("resolution_factor", 1).toInt()); Settings::values.use_vsync = qt_config->value("use_vsync", false).toBool(); Settings::values.toggle_framelimit = qt_config->value("toggle_framelimit", true).toBool(); diff --git a/src/citra_qt/configuration/configure_graphics.cpp b/src/citra_qt/configuration/configure_graphics.cpp index b5a5ab1e1..22a8ca8ae 100644 --- a/src/citra_qt/configuration/configure_graphics.cpp +++ b/src/citra_qt/configuration/configure_graphics.cpp @@ -20,81 +20,11 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) ConfigureGraphics::~ConfigureGraphics() {} -enum class Resolution : int { - Auto, - Scale1x, - Scale2x, - Scale3x, - Scale4x, - Scale5x, - Scale6x, - Scale7x, - Scale8x, - Scale9x, - Scale10x, -}; - -float ToResolutionFactor(Resolution option) { - switch (option) { - case Resolution::Auto: - return 0.f; - case Resolution::Scale1x: - return 1.f; - case Resolution::Scale2x: - return 2.f; - case Resolution::Scale3x: - return 3.f; - case Resolution::Scale4x: - return 4.f; - case Resolution::Scale5x: - return 5.f; - case Resolution::Scale6x: - return 6.f; - case Resolution::Scale7x: - return 7.f; - case Resolution::Scale8x: - return 8.f; - case Resolution::Scale9x: - return 9.f; - case Resolution::Scale10x: - return 10.f; - } - return 0.f; -} - -Resolution FromResolutionFactor(float factor) { - if (factor == 0.f) { - return Resolution::Auto; - } else if (factor == 1.f) { - return Resolution::Scale1x; - } else if (factor == 2.f) { - return Resolution::Scale2x; - } else if (factor == 3.f) { - return Resolution::Scale3x; - } else if (factor == 4.f) { - return Resolution::Scale4x; - } else if (factor == 5.f) { - return Resolution::Scale5x; - } else if (factor == 6.f) { - return Resolution::Scale6x; - } else if (factor == 7.f) { - return Resolution::Scale7x; - } else if (factor == 8.f) { - return Resolution::Scale8x; - } else if (factor == 9.f) { - return Resolution::Scale9x; - } else if (factor == 10.f) { - return Resolution::Scale10x; - } - return Resolution::Auto; -} - void ConfigureGraphics::setConfiguration() { ui->toggle_hw_renderer->setChecked(Settings::values.use_hw_renderer); ui->resolution_factor_combobox->setEnabled(Settings::values.use_hw_renderer); ui->toggle_shader_jit->setChecked(Settings::values.use_shader_jit); - ui->resolution_factor_combobox->setCurrentIndex( - static_cast(FromResolutionFactor(Settings::values.resolution_factor))); + ui->resolution_factor_combobox->setCurrentIndex(Settings::values.resolution_factor); ui->toggle_vsync->setChecked(Settings::values.use_vsync); ui->toggle_framelimit->setChecked(Settings::values.toggle_framelimit); ui->layout_combobox->setCurrentIndex(static_cast(Settings::values.layout_option)); @@ -105,7 +35,7 @@ void ConfigureGraphics::applyConfiguration() { Settings::values.use_hw_renderer = ui->toggle_hw_renderer->isChecked(); Settings::values.use_shader_jit = ui->toggle_shader_jit->isChecked(); Settings::values.resolution_factor = - ToResolutionFactor(static_cast(ui->resolution_factor_combobox->currentIndex())); + static_cast(ui->resolution_factor_combobox->currentIndex()); Settings::values.use_vsync = ui->toggle_vsync->isChecked(); Settings::values.toggle_framelimit = ui->toggle_framelimit->isChecked(); Settings::values.layout_option = diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index e9f778fcb..7af9556b1 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp @@ -16,8 +16,8 @@ static const float TOP_SCREEN_ASPECT_RATIO = static const float BOT_SCREEN_ASPECT_RATIO = static_cast(Core::kScreenBottomHeight) / Core::kScreenBottomWidth; -float FramebufferLayout::GetScalingRatio() const { - return static_cast(top_screen.GetWidth()) / Core::kScreenTopWidth; +u16 FramebufferLayout::GetScalingRatio() const { + return static_cast(((top_screen.GetWidth() - 1) / Core::kScreenTopWidth) + 1); } // Finds the largest size subrectangle contained in window area that is confined to the aspect ratio diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h index 4983cf103..0d826be9e 100644 --- a/src/core/frontend/framebuffer_layout.h +++ b/src/core/frontend/framebuffer_layout.h @@ -21,7 +21,7 @@ struct FramebufferLayout { * Returns the ration of pixel size of the top screen, compared to the native size of the 3DS * screen. */ - float GetScalingRatio() const; + u16 GetScalingRatio() const; }; /** diff --git a/src/core/settings.h b/src/core/settings.h index 8d78cb424..34e5914c8 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -95,7 +95,7 @@ struct Values { // Renderer bool use_hw_renderer; bool use_shader_jit; - float resolution_factor; + u16 resolution_factor; bool use_vsync; bool toggle_framelimit; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 75fbc70a9..f2677b0e9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -245,7 +245,7 @@ void RasterizerOpenGL::DrawTriangles() { // Sync and bind the framebuffer surfaces Surface color_surface; Surface depth_surface; - MathUtil::Rectangle rect; + MathUtil::Rectangle rect; std::tie(color_surface, depth_surface, rect) = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb); @@ -279,37 +279,32 @@ void RasterizerOpenGL::DrawTriangles() { const GLsizei viewport_height = static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * 2); - const float res_scale_width = color_surface != nullptr ? color_surface->res_scale_width : - (depth_surface == nullptr ? 1.0f : depth_surface->res_scale_width); - const float res_scale_height = color_surface != nullptr ? color_surface->res_scale_height : - (depth_surface == nullptr ? 1.0f : depth_surface->res_scale_height); + const u16 res_scale = color_surface != nullptr ? color_surface->res_scale : + (depth_surface == nullptr ? 1u : depth_surface->res_scale); glViewport( - static_cast(rect.left + regs.rasterizer.viewport_corner.x * res_scale_width), - static_cast(rect.bottom + regs.rasterizer.viewport_corner.y * res_scale_height), - static_cast(viewport_width * res_scale_width), - static_cast(viewport_height * res_scale_height)); + static_cast(rect.left + regs.rasterizer.viewport_corner.x * res_scale), + static_cast(rect.bottom + regs.rasterizer.viewport_corner.y * res_scale), + viewport_width * res_scale, + viewport_height * res_scale); - if (uniform_block_data.data.framebuffer_scale[0] != res_scale_width || - uniform_block_data.data.framebuffer_scale[1] != res_scale_height) { - - uniform_block_data.data.framebuffer_scale[0] = res_scale_width; - uniform_block_data.data.framebuffer_scale[1] = res_scale_height; + if (uniform_block_data.data.framebuffer_scale != res_scale) { + uniform_block_data.data.framebuffer_scale = res_scale; uniform_block_data.dirty = true; } // Scissor checks are window-, not viewport-relative, which means that if the cached texture // sub-rect changes, the scissor bounds also need to be updated. GLint scissor_x1 = static_cast( - rect.left + regs.rasterizer.scissor_test.x1 * res_scale_width); + rect.left + regs.rasterizer.scissor_test.x1 * res_scale); GLint scissor_y1 = static_cast( - rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale_height); + rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale); // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when // scaling or doing multisampling. GLint scissor_x2 = static_cast( - rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale_width); + rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale); GLint scissor_y2 = static_cast( - rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * res_scale_height); + rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * res_scale); if (uniform_block_data.data.scissor_x1 != scissor_x1 || uniform_block_data.data.scissor_x2 != scissor_x2 || @@ -959,16 +954,15 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe dst_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.output_format); dst_params.UpdateParams(); - MathUtil::Rectangle src_rect; + MathUtil::Rectangle src_rect; Surface src_surface; std::tie(src_surface, src_rect) = res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); if (src_surface == nullptr) return false; - dst_params.res_scale_width = src_surface->res_scale_width; - dst_params.res_scale_height = src_surface->res_scale_height; + dst_params.res_scale = src_surface->res_scale; - MathUtil::Rectangle dst_rect; + MathUtil::Rectangle dst_rect; Surface dst_surface; std::tie(dst_surface, dst_rect) = res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, false); if (dst_surface == nullptr) @@ -1004,7 +998,7 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width; src_params.end = src_params.addr + src_params.size; - MathUtil::Rectangle src_rect; + MathUtil::Rectangle src_rect; Surface src_surface; std::tie(src_surface, src_rect) = res_cache.GetTexCopySurface(src_params); if (src_surface == nullptr) @@ -1019,12 +1013,11 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon dst_params.stride = (output_width + output_gap) * src_surface->stride / src_params.stride; dst_params.width = output_width * src_surface->stride / src_params.stride; dst_params.height = src_surface->is_tiled ? src_params.height * 8 : src_params.height; - dst_params.res_scale_width = src_surface->res_scale_width; - dst_params.res_scale_height = src_surface->res_scale_height; + dst_params.res_scale = src_surface->res_scale; dst_params.UpdateParams(); const bool load_gap = output_gap != 0; // Since we are going to invalidate the gap if there is one, we will have to load it first - MathUtil::Rectangle dst_rect; + MathUtil::Rectangle dst_rect; Surface dst_surface; std::tie(dst_surface, dst_rect) = res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, load_gap); if (src_surface == nullptr) @@ -1063,7 +1056,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.color_format); src_params.UpdateParams(); - MathUtil::Rectangle src_rect; + MathUtil::Rectangle src_rect; Surface src_surface; std::tie(src_surface, src_rect) = res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index e83cb48fc..18808b1e4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -136,7 +136,7 @@ private: // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. // Not following that rule will cause problems on some AMD drivers. struct UniformData { - alignas(8) GLvec2 framebuffer_scale; + GLint framebuffer_scale; GLint alphatest_ref; GLfloat depth_scale; GLfloat depth_offset; @@ -156,7 +156,7 @@ private: }; static_assert( - sizeof(UniformData) == 0x470, + sizeof(UniformData) == 0x460, "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 2fdddf2bb..2e9bfbff6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -179,8 +179,8 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup cur_state.Apply(); } -static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rect, - GLuint dst_tex, const MathUtil::Rectangle& dst_rect, +static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rect, + GLuint dst_tex, const MathUtil::Rectangle& dst_rect, SurfaceType type) { OpenGLState cur_state = OpenGLState::GetCurState(); @@ -312,35 +312,23 @@ static bool FillSurface(const Surface& surface, const u8* fill_data) { return true; } -SurfaceParams::SurfaceParams() { - // todo: metroid needs that for now, "breaks" other stuff - // Set the internal resolution, assume the same scaling factor for top and bottom screens - float resolution_scale_factor = Settings::values.resolution_factor; - if (resolution_scale_factor == 0.0f) { - // Auto - scale resolution to the window size - resolution_scale_factor = VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio(); - } - res_scale_width = resolution_scale_factor; - res_scale_height = resolution_scale_factor; -} - -MathUtil::Rectangle CachedSurface::GetSubRect(const SurfaceParams& sub_surface) const { +MathUtil::Rectangle CachedSurface::GetSubRect(const SurfaceParams& sub_surface) const { const u32 begin_pixel_index = PixelsInBytes(sub_surface.addr - addr); const int x0 = begin_pixel_index % width; const int y0 = begin_pixel_index / width; if (is_tiled) - return MathUtil::Rectangle(x0, height - y0 - sub_surface.height, x0 + sub_surface.width, height - y0); // Bottom to top + return MathUtil::Rectangle(x0, height - y0 - sub_surface.height, x0 + sub_surface.width, height - y0); // Bottom to top - return MathUtil::Rectangle(x0, y0, x0 + sub_surface.width, y0 + sub_surface.height); // Top to bottom + return MathUtil::Rectangle(x0, y0, x0 + sub_surface.width, y0 + sub_surface.height); // Top to bottom } -MathUtil::Rectangle CachedSurface::GetScaledSubRect(const SurfaceParams& sub_surface) const { +MathUtil::Rectangle CachedSurface::GetScaledSubRect(const SurfaceParams& sub_surface) const { auto rect = GetSubRect(sub_surface); - rect.left = static_cast(rect.left * res_scale_width); - rect.right = static_cast(rect.right * res_scale_width); - rect.top = static_cast(rect.top * res_scale_height); - rect.bottom = static_cast(rect.bottom * res_scale_height); + rect.left = rect.left * res_scale; + rect.right = rect.right * res_scale; + rect.top = rect.top * res_scale; + rect.bottom = rect.bottom * res_scale; return rect; } @@ -577,7 +565,7 @@ void CachedSurface::UploadGLTexture() { cur_state.Apply(); // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface - if (res_scale_width != 1.f || res_scale_height != 1.f) { + if (res_scale != 1) { OGLTexture scaled_texture; scaled_texture.Create(); @@ -604,7 +592,7 @@ void CachedSurface::DownloadGLTexture() { // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush OGLTexture unscaled_tex; - if (res_scale_width != 1.f || res_scale_height != 1.f) { + if (res_scale != 1) { unscaled_tex.Create(); AllocateSurfaceTexture(unscaled_tex.handle, tuple, width, height); @@ -626,65 +614,58 @@ void CachedSurface::DownloadGLTexture() { gl_buffer_dirty = false; } -enum MatchType : int { - Exact = (1 << 0), // Surfaces perfectly match - SubRect = (1 << 1), // Surface encompasses params - Invalid = (1 << 2), // Flag that can be applied to other match types, invalid matches require validation before they can be used - Copy = (1 << 3), // Surface we can copy from - //PartialCopy = (1 << 4), // Surface we can partialy copy from - TexCopy = (1 << 5), // Surface that will match a display transfer "texture copy" parameters - All = (1 << 6) - 1, - None = 0, +enum MatchFlags { + Invalid = 1, // Flag that can be applied to other match types, invalid matches require validation before they can be used + Exact = 1 << 1, // Surfaces perfectly match + SubRect = 1 << 2, // Surface encompasses params + Copy = 1 << 3, // Surface we can copy from + TexCopy = 1 << 4 // Surface that will match a display transfer "texture copy" parameters }; -constexpr MatchType operator | (MatchType lhs, MatchType rhs) { - return static_cast(static_cast(lhs) | static_cast(rhs)); +constexpr MatchFlags operator | (MatchFlags lhs, MatchFlags rhs) { + return static_cast(static_cast(lhs) | static_cast(rhs)); } -/// Get the best surface match (and its match type) for the given flags, higher flag value meaning lower priority -/// NEW:: no more match type priority, only region validity matters and then scaling comparison -template -Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, ScaleMatch match_res_scale) { - constexpr float MATCH_GOODNESS_RESET = -1.f; - +/// Get the best surface match (and its match type) for the given flags +template +Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, ScaleMatch match_scale_type) { Surface match_surface = nullptr; bool match_valid = false; - float best_match_goodness = MATCH_GOODNESS_RESET; + u32 match_scale = 0; for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { for (auto& surface : pair.second) { - const bool res_scale_match = match_res_scale == ScaleMatch::Exact ? - (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height) : - (params.res_scale_width <= surface->res_scale_width && params.res_scale_height <= surface->res_scale_height); - const float match_goodness = surface->res_scale_width * surface->res_scale_height; + const bool res_scale_matched = match_scale_type == ScaleMatch::Exact ? + (params.res_scale == surface->res_scale) : + (params.res_scale <= surface->res_scale); const bool is_valid = surface->IsRegionValid(params.GetInterval()); - if (!(find_flags & MatchType::Invalid) && !is_valid) + if (!(find_flags & MatchFlags::Invalid) && !is_valid) continue; - const auto IsMatch_Helper = [&](MatchType check_type, auto match_fn) { + const auto IsMatch_Helper = [&](MatchFlags check_type, auto match_fn) { if (!(find_flags & check_type)) return false; if (!match_fn()) return false; - if (match_res_scale == ScaleMatch::Ignore || res_scale_match || surface->type == SurfaceType::Fill) { // Found a match + if (match_scale_type == ScaleMatch::Ignore || res_scale_matched || surface->type == SurfaceType::Fill) { // Found a match if (is_valid && !match_valid) { - best_match_goodness = MATCH_GOODNESS_RESET; + match_scale = 0; match_valid = true; } - if (match_goodness > best_match_goodness) { - best_match_goodness = match_goodness; + if (surface->res_scale > match_scale) { + match_scale = surface->res_scale; match_surface = surface; } } return false; }; - IsMatch_Helper(MatchType::Exact, [&] { return surface->ExactMatch(params); }); - IsMatch_Helper(MatchType::SubRect, [&] { return surface->CanSubRect(params); }); - IsMatch_Helper(MatchType::Copy, [&] { return surface->CanCopy(params); }); - IsMatch_Helper(MatchType::TexCopy, [&] { return surface->CanTexCopy(params); }); + IsMatch_Helper(MatchFlags::Exact, [&] { return surface->ExactMatch(params); }); + IsMatch_Helper(MatchFlags::SubRect, [&] { return surface->CanSubRect(params); }); + IsMatch_Helper(MatchFlags::Copy, [&] { return surface->CanCopy(params); }); + IsMatch_Helper(MatchFlags::TexCopy, [&] { return surface->CanTexCopy(params); }); } } return match_surface; @@ -704,9 +685,9 @@ RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { } bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, - const MathUtil::Rectangle& src_rect, + const MathUtil::Rectangle& src_rect, const Surface& dst_surface, - const MathUtil::Rectangle& dst_rect) { + const MathUtil::Rectangle& dst_rect) { if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) return false; @@ -722,7 +703,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc } // Check for an exact or subrect match in existing surfaces - Surface surface_match = FindMatch(surface_cache, params, match_res_scale); + Surface surface_match = FindMatch(surface_cache, params, match_res_scale); if (surface_match != nullptr) { if (load_if_create) { @@ -745,14 +726,14 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, bool load_if_create) { - MathUtil::Rectangle out_rect{}; + MathUtil::Rectangle out_rect{}; if (params.addr == 0 || params.height * params.width == 0) { return std::make_tuple(nullptr, out_rect); } // Attempt to find encompassing surface - Surface subrect_match = FindMatch(surface_cache, params, match_res_scale); + Surface subrect_match = FindMatch(surface_cache, params, match_res_scale); // Return the best subrect surface if found if (subrect_match != nullptr) { @@ -778,7 +759,7 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& std::swap(out_rect.top, out_rect.bottom); // If stride was bigger than width we need to adjust our output rect - out_rect.right = static_cast(params.width * new_params.res_scale_width); + out_rect.right = static_cast(params.width * new_params.res_scale); Surface new_surface = CreateSurface(new_params); if (load_if_create) @@ -802,8 +783,8 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Pica::TexturingRegs::Full return GetSurface(params, ScaleMatch::Ignore, true); } -constexpr float GetResolutionScaleFactor() { - return Settings::values.resolution_factor == 0.0f ? +constexpr u16 GetResolutionScaleFactor() { + return !Settings::values.resolution_factor ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() : Settings::values.resolution_factor; } @@ -834,7 +815,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool usin color_params.is_tiled = depth_params.is_tiled = true; // update resolution_scale_factor and reset cache if changed - static float resolution_scale_factor = GetResolutionScaleFactor(); + static u16 resolution_scale_factor = GetResolutionScaleFactor(); if (resolution_scale_factor != GetResolutionScaleFactor()) { resolution_scale_factor = GetResolutionScaleFactor(); FlushAll(); @@ -845,19 +826,17 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool usin color_params.width = depth_params.width = config.GetWidth(); color_params.height = depth_params.height = config.GetHeight(); color_params.pixel_format = SurfaceParams::PixelFormatFromColorFormat(config.color_format); - color_params.res_scale_height = resolution_scale_factor; - color_params.res_scale_width = resolution_scale_factor; + color_params.res_scale = resolution_scale_factor; color_params.UpdateParams(); - MathUtil::Rectangle rect{}; + MathUtil::Rectangle rect{}; Surface color_surface = nullptr; if (using_color_fb) std::tie(color_surface, rect) = GetSurfaceSubRect(color_params, ScaleMatch::Exact, true); depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); depth_params.addr = config.GetDepthBufferPhysicalAddress(); - depth_params.res_scale_height = resolution_scale_factor; - depth_params.res_scale_width = resolution_scale_factor; + depth_params.res_scale = resolution_scale_factor; depth_params.UpdateParams(); Surface depth_surface = nullptr; @@ -900,9 +879,9 @@ Surface RasterizerCacheOpenGL::GetFillSurface(const GPU::Regs::MemoryFillConfig& } SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { - MathUtil::Rectangle rect{}; + MathUtil::Rectangle rect{}; - Surface match_surface = FindMatch(surface_cache, params, ScaleMatch::Ignore); + Surface match_surface = FindMatch(surface_cache, params, ScaleMatch::Ignore); if (match_surface != nullptr) { ValidateSurface(match_surface, params.addr, params.size); @@ -964,7 +943,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, } params.UpdateParams(); - Surface match_surface = FindMatch(surface_cache, params, ScaleMatch::Ignore); + Surface match_surface = FindMatch(surface_cache, params, ScaleMatch::Ignore); if (match_surface != nullptr) { if (!match_surface->CanSubRect(params)) { @@ -1046,8 +1025,6 @@ void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface region_owner->invalid_regions.erase(invalid_interval); } - SurfaceSet remove_surfaces; - for (auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { for (auto& cached_surface : pair.second) { if (cached_surface == region_owner) @@ -1085,6 +1062,7 @@ void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface for (auto& remove_surface : remove_surfaces) UnregisterSurface(remove_surface); + remove_surfaces.clear(); } Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 1249b6e42..0a524213e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -39,8 +39,8 @@ using SurfaceInterval = SurfaceCache::interval_type; static_assert(std::is_same() && std::is_same(), "incorrect interval types"); -using SurfaceRect_Tuple = std::tuple>; -using SurfaceSurfaceRect_Tuple = std::tuple>; +using SurfaceRect_Tuple = std::tuple>; +using SurfaceSurfaceRect_Tuple = std::tuple>; using PageMap = boost::icl::interval_map; @@ -51,8 +51,6 @@ enum class ScaleMatch { }; struct SurfaceParams { - explicit SurfaceParams(); - enum class PixelFormat { // First 5 formats are shared between textures and color buffers RGBA8 = 0, @@ -200,19 +198,19 @@ struct SurfaceParams { } u32 GetScaledWidth() const { - return static_cast(width * res_scale_width); + return width * res_scale; } u32 GetScaledHeight() const { - return static_cast(height * res_scale_height); + return height * res_scale; } - MathUtil::Rectangle GetRect() const { - return MathUtil::Rectangle(0, 0, width, height); + MathUtil::Rectangle GetRect() const { + return { 0, 0, width, height }; } - MathUtil::Rectangle GetScaledRect() const { - return MathUtil::Rectangle(0, 0, GetScaledWidth(), GetScaledHeight()); + MathUtil::Rectangle GetScaledRect() const { + return { 0, 0, GetScaledWidth(), GetScaledHeight() }; } u32 PixelsInBytes(u32 size) const { @@ -226,8 +224,7 @@ struct SurfaceParams { u32 width = 0; u32 height = 0; u32 stride = 0; - float res_scale_width = 1.f; - float res_scale_height = 1.f; + u16 res_scale = 1; bool is_tiled = false; u32 bytes_per_pixel = 0; @@ -239,11 +236,10 @@ struct CachedSurface : SurfaceParams { bool ExactMatch(const SurfaceParams& other_surface) const; bool CanSubRect(const SurfaceParams& sub_surface) const; bool CanCopy(const SurfaceParams& dest_surface) const; - //bool CanPartialCopy(const SurfaceParams& dest_surface) const; bool CanTexCopy(const SurfaceParams& texcopy_params) const; - MathUtil::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; - MathUtil::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; + MathUtil::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; + MathUtil::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; bool IsRegionValid(const SurfaceInterval& interval) const { return (invalid_regions.find(interval) == invalid_regions.end()); @@ -283,8 +279,8 @@ public: ~RasterizerCacheOpenGL(); /// Blit one surface's texture to another - bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle& src_rect, - const Surface& dst_surface, const MathUtil::Rectangle& dst_rect); + bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle& src_rect, + const Surface& dst_surface, const MathUtil::Rectangle& dst_rect); /// Load a texture from 3DS memory to OpenGL and cache it (if not already cached) Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, bool load_if_create); @@ -334,4 +330,5 @@ private: SurfaceCache surface_cache; SurfaceMap dirty_regions; PageMap cached_pages; + SurfaceSet remove_surfaces; }; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 9fe183944..62f449a35 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -41,7 +41,7 @@ struct LightSrc { }; layout (std140) uniform shader_data { - vec2 framebuffer_scale; + int framebuffer_scale; int alphatest_ref; float depth_scale; float depth_offset; From 8969245e2ebd3ff64cb42cad367c4e4b60ee04f3 Mon Sep 17 00:00:00 2001 From: Phantom Date: Mon, 9 Oct 2017 04:24:15 +0200 Subject: [PATCH 4/5] res scale fixed + cleanup --- .../renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_rasterizer_cache.cpp | 222 ++++++++++++------ .../renderer_opengl/gl_rasterizer_cache.h | 8 +- 3 files changed, 152 insertions(+), 80 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f2677b0e9..7152f0716 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -240,7 +240,7 @@ void RasterizerOpenGL::DrawTriangles() { const bool using_color_fb = regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb; const bool using_depth_fb = regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && - (state.depth.test_enabled || write_depth_fb); + (write_depth_fb || state.depth.test_enabled || (has_stencil && state.stencil.test_enabled)); // Sync and bind the framebuffer surfaces Surface color_surface; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 2e9bfbff6..972818d93 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -314,8 +314,8 @@ static bool FillSurface(const Surface& surface, const u8* fill_data) { MathUtil::Rectangle CachedSurface::GetSubRect(const SurfaceParams& sub_surface) const { const u32 begin_pixel_index = PixelsInBytes(sub_surface.addr - addr); - const int x0 = begin_pixel_index % width; - const int y0 = begin_pixel_index / width; + const int x0 = begin_pixel_index % stride; + const int y0 = begin_pixel_index / stride; if (is_tiled) return MathUtil::Rectangle(x0, height - y0 - sub_surface.height, x0 + sub_surface.width, height - y0); // Bottom to top @@ -361,6 +361,9 @@ bool CachedSurface::CanCopy(const SurfaceParams& dest_surface) const { if (type == SurfaceType::Fill && IsRegionValid(dest_surface.GetInterval()) && dest_surface.addr >= addr && dest_surface.end <= end) { // dest_surface is within our fill range if (fill_size != dest_surface.bytes_per_pixel) { + if (dest_surface.is_tiled && GetFormatBpp(dest_surface.pixel_format) * 8 % fill_size != 0) + return false; + // Check if bits repeat for our fill_size const u32 dest_bytes_per_pixel = std::max(dest_surface.bytes_per_pixel, 1u); // Take care of 4bpp formats std::vector fill_test(fill_size * dest_bytes_per_pixel); @@ -383,17 +386,34 @@ bool CachedSurface::CanCopy(const SurfaceParams& dest_surface) const { return false; } +bool CachedSurface::CanExpand(const SurfaceParams& expanded_surface) const { + if (pixel_format != expanded_surface.pixel_format || + is_tiled != expanded_surface.is_tiled || + addr > expanded_surface.end || expanded_surface.addr > end || + stride != expanded_surface.stride) + return false; + + const u32 begin_pixel_index = + PixelsInBytes(std::max(expanded_surface.addr, addr) - + std::min(expanded_surface.addr, addr)); + const int x0 = begin_pixel_index % stride; + const int y0 = begin_pixel_index / stride; + + return x0 == 0 && (!is_tiled || y0 % 8 == 0); +} + bool CachedSurface::CanTexCopy(const SurfaceParams& texcopy_params) const { + // TODO: Accept "Fill" surfaces if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || end < texcopy_params.end || - ((texcopy_params.addr - addr) * 8) % SurfaceParams::GetFormatBpp(pixel_format) != 0 || - (texcopy_params.width * 8) % SurfaceParams::GetFormatBpp(pixel_format) != 0 || - (texcopy_params.stride * 8) % SurfaceParams::GetFormatBpp(pixel_format) != 0) + ((texcopy_params.addr - addr) * 8) % GetFormatBpp(pixel_format) != 0 || + (texcopy_params.width * 8) % GetFormatBpp(pixel_format) != 0 || + (texcopy_params.stride * 8) % GetFormatBpp(pixel_format) != 0) return false; const u32 begin_pixel_index = PixelsInBytes(texcopy_params.addr - addr); - const int x0 = begin_pixel_index % width; - const int y0 = begin_pixel_index / width; + const int x0 = begin_pixel_index % stride; + const int y0 = begin_pixel_index / stride; if (!is_tiled) return (PixelsInBytes(texcopy_params.stride) == stride && @@ -619,7 +639,8 @@ enum MatchFlags { Exact = 1 << 1, // Surfaces perfectly match SubRect = 1 << 2, // Surface encompasses params Copy = 1 << 3, // Surface we can copy from - TexCopy = 1 << 4 // Surface that will match a display transfer "texture copy" parameters + Expand = 1 << 4, // Surface that can expand params + TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters }; constexpr MatchFlags operator | (MatchFlags lhs, MatchFlags rhs) { @@ -632,6 +653,7 @@ Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params Surface match_surface = nullptr; bool match_valid = false; u32 match_scale = 0; + u32 match_size = 0; for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { for (auto& surface : pair.second) { @@ -643,28 +665,45 @@ Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params if (!(find_flags & MatchFlags::Invalid) && !is_valid) continue; - const auto IsMatch_Helper = [&](MatchFlags check_type, auto match_fn) { - if (!(find_flags & check_type)) - return false; + auto IsMatch_Helper = [&](MatchFlags check_type, auto match_fn) { + if (!(find_flags & check_type) || !match_fn()) + return; - if (!match_fn()) - return false; + if (!res_scale_matched && + match_scale_type != ScaleMatch::Ignore && + surface->type != SurfaceType::Fill) + return; - if (match_scale_type == ScaleMatch::Ignore || res_scale_matched || surface->type == SurfaceType::Fill) { // Found a match - if (is_valid && !match_valid) { - match_scale = 0; - match_valid = true; - } - if (surface->res_scale > match_scale) { - match_scale = surface->res_scale; - match_surface = surface; - } + // Found a match, update only if this is better than the previous one + auto UpdateMatch = [&] { + match_surface = surface; + match_valid = is_valid; + match_scale = surface->res_scale; + match_size = surface->size; + }; + + if (surface->res_scale > match_scale) { + UpdateMatch(); + return; + } else if (surface->res_scale < match_scale) { + return; + } + + if (is_valid && !match_valid) { + UpdateMatch(); + return; + } else if (is_valid != match_valid) { + return; + } + + if (surface->size > match_size) { + UpdateMatch(); } - return false; }; IsMatch_Helper(MatchFlags::Exact, [&] { return surface->ExactMatch(params); }); IsMatch_Helper(MatchFlags::SubRect, [&] { return surface->CanSubRect(params); }); IsMatch_Helper(MatchFlags::Copy, [&] { return surface->CanCopy(params); }); + IsMatch_Helper(MatchFlags::Expand, [&] { return surface->CanExpand(params); }); IsMatch_Helper(MatchFlags::TexCopy, [&] { return surface->CanTexCopy(params); }); } } @@ -702,72 +741,105 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc return nullptr; } - // Check for an exact or subrect match in existing surfaces - Surface surface_match = FindMatch(surface_cache, params, match_res_scale); - - if (surface_match != nullptr) { - if (load_if_create) { - ValidateSurface(surface_match, params.addr, params.size); - } - return surface_match; - } - ASSERT(params.width == params.stride); // Use GetSurfaceSubRect instead - Surface new_surface = CreateSurface(params); - if (load_if_create) - ValidateSurface(new_surface, params.addr, params.size); + // Check for an exact match in existing surfaces + Surface surface = FindMatch(surface_cache, params, match_res_scale); - RegisterSurface(new_surface); + Surface expandable = FindMatch(surface_cache, params, match_res_scale); + u16 target_res_scale = surface == nullptr ? params.res_scale : surface->res_scale; + if (match_res_scale != ScaleMatch::Exact && + expandable != nullptr && + expandable->res_scale > params.res_scale) { + target_res_scale = expandable->res_scale; + } - return new_surface; + if (surface == nullptr || target_res_scale != surface->res_scale) { + SurfaceParams new_params = params; + new_params.res_scale = target_res_scale; + + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + + if (load_if_create) { + ValidateSurface(surface, params.addr, params.size); + } + + return surface; } SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, bool load_if_create) { - MathUtil::Rectangle out_rect{}; + Surface surface = nullptr; + MathUtil::Rectangle rect{}; if (params.addr == 0 || params.height * params.width == 0) { - return std::make_tuple(nullptr, out_rect); + return { surface, rect }; } // Attempt to find encompassing surface - Surface subrect_match = FindMatch(surface_cache, params, match_res_scale); + surface = FindMatch(surface_cache, params, match_res_scale); - // Return the best subrect surface if found - if (subrect_match != nullptr) { - if (load_if_create) { - ValidateSurface(subrect_match, params.addr, params.size); + // Check if FindMatch failed because of res scaling + // If that's the case create a new surface with + // the dimensions of the lower res_scale surface + // to suggest it should not be used again + if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { + surface = FindMatch(surface_cache, params, ScaleMatch::Ignore); + if (surface != nullptr) { + ASSERT(surface->res_scale < params.res_scale); + SurfaceParams new_params = *surface; + new_params.res_scale = params.res_scale; + + surface = CreateSurface(new_params); + RegisterSurface(surface); } + } - out_rect = subrect_match->GetScaledSubRect(params); - // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory. - if (params.is_tiled) - std::swap(out_rect.top, out_rect.bottom); + // Check for a surface we can expand before creating a new one + if (surface == nullptr) { + surface = FindMatch(surface_cache, params, match_res_scale); + if (surface != nullptr) { + SurfaceParams new_params = *surface; + new_params.addr = std::min(params.addr, surface->addr); + new_params.end = std::max(params.end, surface->end); + new_params.size = new_params.end - new_params.addr; + new_params.height = new_params.size / (SurfaceParams::GetFormatBpp(params.pixel_format) * params.stride / 8); - return std::make_tuple(subrect_match, out_rect); + Surface new_surface = CreateSurface(new_params); + RegisterSurface(new_surface); + + // TODO: Delete the expanded surface, this can't be done safely yet + // because it may still be in use + BlitSurfaces(surface, surface->GetScaledRect(), new_surface, new_surface->GetScaledSubRect(*surface)); + new_surface->invalid_regions -= surface->GetInterval(); + new_surface->invalid_regions += surface->invalid_regions; + surface = new_surface; + } } // No subrect found - create and return a new surface - SurfaceParams new_params = params; - new_params.width = params.stride; // Can't have gaps in a surface - new_params.UpdateParams(); + if (surface == nullptr) { + SurfaceParams new_params = params; + new_params.width = params.stride; // Can't have gaps in a surface + new_params.UpdateParams(); - out_rect = new_params.GetScaledRect(); - if (new_params.is_tiled) - std::swap(out_rect.top, out_rect.bottom); + surface = CreateSurface(new_params); + RegisterSurface(surface); + } - // If stride was bigger than width we need to adjust our output rect - out_rect.right = static_cast(params.width * new_params.res_scale); + if (load_if_create) { + ValidateSurface(surface, params.addr, params.size); + } - Surface new_surface = CreateSurface(new_params); - if (load_if_create) - ValidateSurface(new_surface, new_params.addr, new_params.size); + rect = surface->GetScaledSubRect(params); + // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory. + if (surface->is_tiled) + std::swap(rect.top, rect.bottom); - RegisterSurface(new_surface); - - return std::make_tuple(new_surface, out_rect); + return { surface, rect }; } Surface RasterizerCacheOpenGL::GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config) { @@ -796,15 +868,14 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool usin // Make sur that framebuffers don't overlap if both color and depth are being used u32 fb_area = config.GetWidth() * config.GetHeight(); - bool framebuffers_overlap = config.GetColorBufferPhysicalAddress() != 0 && - config.GetDepthBufferPhysicalAddress() != 0 && + bool framebuffers_overlap = using_color_fb && using_depth_fb && MathUtil::IntervalsIntersect( config.GetColorBufferPhysicalAddress(), fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())), config.GetDepthBufferPhysicalAddress(), fb_area * Pica::FramebufferRegs::BytesPerDepthPixel(config.depth_format)); - if (framebuffers_overlap && using_color_fb && using_depth_fb) { + if (framebuffers_overlap) { LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!"); using_depth_fb = false; } @@ -856,7 +927,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool usin std::tie(depth_surface, rect) = GetSurfaceSubRect(depth_params, ScaleMatch::Exact, true); } - return std::make_tuple(color_surface, depth_surface, rect); + return { color_surface, depth_surface, rect }; } Surface RasterizerCacheOpenGL::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) { @@ -901,7 +972,7 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& std::swap(rect.top, rect.bottom); } - return std::make_tuple(match_surface, rect); + return { match_surface, rect }; } void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { @@ -1015,6 +1086,8 @@ void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface if (size == 0) return; + SurfaceSet remove_surfaces; + const auto invalid_interval = SurfaceInterval::right_open(addr, addr + size); if (region_owner != nullptr) { @@ -1056,13 +1129,12 @@ void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface } if (region_owner != nullptr) - dirty_regions.set(std::make_pair(invalid_interval, region_owner)); + dirty_regions.set({ invalid_interval, region_owner }); else dirty_regions.erase(invalid_interval); for (auto& remove_surface : remove_surfaces) UnregisterSurface(remove_surface); - remove_surfaces.clear(); } Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { @@ -1090,13 +1162,13 @@ Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { } void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { - surface_cache.add(std::make_pair(surface->GetInterval(), SurfaceSet({ surface }))); + surface_cache.add({ surface->GetInterval(), SurfaceSet{ surface } }); UpdatePagesCachedCount(surface->addr, surface->size, 1); } void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { UpdatePagesCachedCount(surface->addr, surface->size, -1); - surface_cache.subtract(std::make_pair(surface->GetInterval(), SurfaceSet({ surface }))); + surface_cache.subtract({ surface->GetInterval(), SurfaceSet{ surface } }); } void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) { @@ -1107,7 +1179,7 @@ void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u32 size, int del // Interval maps will erase segments if count reaches 0, so if delta is negative we have to subtract after iterating const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); if (delta > 0) - cached_pages.add(std::make_pair(pages_interval, delta)); + cached_pages.add({ pages_interval, delta }); for (auto& pair : RangeFromInterval(cached_pages, pages_interval)) { const auto interval = pair.first & pages_interval; @@ -1126,5 +1198,5 @@ void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u32 size, int del } if (delta < 0) - cached_pages.add(std::make_pair(pages_interval, delta)); + cached_pages.add({ pages_interval, delta }); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 0a524213e..56cdfb0de 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -89,7 +89,7 @@ struct SurfaceParams { }; static unsigned int GetFormatBpp(SurfaceParams::PixelFormat format) { - static const std::array bpp_table = { + static constexpr std::array bpp_table = { 32, // RGBA8 24, // RGB8 16, // RGB5A1 @@ -110,8 +110,8 @@ struct SurfaceParams { 32, // D24S8 }; - ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table)); - return bpp_table[(unsigned int)format]; + ASSERT(static_cast(format) < bpp_table.size()); + return bpp_table[static_cast(format)]; } static PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) { @@ -236,6 +236,7 @@ struct CachedSurface : SurfaceParams { bool ExactMatch(const SurfaceParams& other_surface) const; bool CanSubRect(const SurfaceParams& sub_surface) const; bool CanCopy(const SurfaceParams& dest_surface) const; + bool CanExpand(const SurfaceParams& expanded_surface) const; bool CanTexCopy(const SurfaceParams& texcopy_params) const; MathUtil::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; @@ -330,5 +331,4 @@ private: SurfaceCache surface_cache; SurfaceMap dirty_regions; PageMap cached_pages; - SurfaceSet remove_surfaces; }; From 779185841cda71c00a76292490d4b144af3d768d Mon Sep 17 00:00:00 2001 From: Phantom Date: Tue, 10 Oct 2017 03:28:05 +0200 Subject: [PATCH 5/5] viewport fix --- .../renderer_opengl/gl_rasterizer.cpp | 110 ++++++---- .../renderer_opengl/gl_rasterizer_cache.cpp | 195 ++++++++++-------- .../renderer_opengl/gl_rasterizer_cache.h | 32 ++- src/video_core/renderer_opengl/gl_state.cpp | 22 ++ src/video_core/renderer_opengl/gl_state.h | 8 + 5 files changed, 228 insertions(+), 139 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7152f0716..d2db44629 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -242,13 +242,37 @@ void RasterizerOpenGL::DrawTriangles() { const bool using_depth_fb = regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && (write_depth_fb || state.depth.test_enabled || (has_stencil && state.stencil.test_enabled)); - // Sync and bind the framebuffer surfaces + MathUtil::Rectangle viewport_rect_unscaled{ + // These registers hold half-width and half-height, so must be multiplied by 2 + regs.rasterizer.viewport_corner.x, // left + regs.rasterizer.viewport_corner.y + // top + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * 2), + regs.rasterizer.viewport_corner.x + // right + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * 2), + regs.rasterizer.viewport_corner.y // bottom + }; + Surface color_surface; Surface depth_surface; - MathUtil::Rectangle rect; - std::tie(color_surface, depth_surface, rect) = - res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb); + MathUtil::Rectangle surfaces_rect; + std::tie(color_surface, depth_surface, surfaces_rect) = + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled); + const u16 res_scale = color_surface != nullptr ? color_surface->res_scale : + (depth_surface == nullptr ? 1u : depth_surface->res_scale); + + MathUtil::Rectangle draw_rect{ + MathUtil::Clamp(surfaces_rect.left + viewport_rect_unscaled.left * res_scale, // left + surfaces_rect.left, surfaces_rect.right), + MathUtil::Clamp(surfaces_rect.bottom + viewport_rect_unscaled.GetHeight() * res_scale, // top + surfaces_rect.bottom, surfaces_rect.top), + MathUtil::Clamp(surfaces_rect.left + viewport_rect_unscaled.GetWidth() * res_scale, // right + surfaces_rect.left, surfaces_rect.right), + MathUtil::Clamp(surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale, // bottom + surfaces_rect.bottom, surfaces_rect.top) + }; + + // Bind the framebuffer surfaces state.draw.draw_framebuffer = framebuffer.handle; state.Apply(); @@ -273,20 +297,11 @@ void RasterizerOpenGL::DrawTriangles() { } // Sync the viewport - // These registers hold half-width and half-height, so must be multiplied by 2 - const GLsizei viewport_width = - static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * 2); - const GLsizei viewport_height = - static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * 2); - - const u16 res_scale = color_surface != nullptr ? color_surface->res_scale : - (depth_surface == nullptr ? 1u : depth_surface->res_scale); - glViewport( - static_cast(rect.left + regs.rasterizer.viewport_corner.x * res_scale), - static_cast(rect.bottom + regs.rasterizer.viewport_corner.y * res_scale), - viewport_width * res_scale, - viewport_height * res_scale); + static_cast(surfaces_rect.left + viewport_rect_unscaled.left * res_scale), + static_cast(surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale), + static_cast(viewport_rect_unscaled.GetWidth() * res_scale), + static_cast(viewport_rect_unscaled.GetHeight() * res_scale)); if (uniform_block_data.data.framebuffer_scale != res_scale) { uniform_block_data.data.framebuffer_scale = res_scale; @@ -296,15 +311,15 @@ void RasterizerOpenGL::DrawTriangles() { // Scissor checks are window-, not viewport-relative, which means that if the cached texture // sub-rect changes, the scissor bounds also need to be updated. GLint scissor_x1 = static_cast( - rect.left + regs.rasterizer.scissor_test.x1 * res_scale); + surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale); GLint scissor_y1 = static_cast( - rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale); + surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale); // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when // scaling or doing multisampling. GLint scissor_x2 = static_cast( - rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale); + surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale); GLint scissor_y2 = static_cast( - rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * res_scale); + surfaces_rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * res_scale); if (uniform_block_data.data.scissor_x1 != scissor_x1 || uniform_block_data.data.scissor_x2 != scissor_x2 || @@ -394,6 +409,15 @@ void RasterizerOpenGL::DrawTriangles() { uniform_block_data.dirty = false; } + // Viewport can have negative offsets or larger + // dimensions than our framebuffer sub-rect. + // Enable scissor test to prevent drawing + // outside of the framebuffer region + state.scissor.enabled = true; + state.scissor.x = draw_rect.left; + state.scissor.y = draw_rect.bottom; + state.scissor.width = draw_rect.GetWidth(); + state.scissor.height = draw_rect.GetHeight(); state.Apply(); // Draw the vertex batch @@ -401,29 +425,8 @@ void RasterizerOpenGL::DrawTriangles() { GL_STREAM_DRAW); glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); - // Mark framebuffer surfaces as dirty - const u32 viewport_offset = - ((regs.framebuffer.framebuffer.GetHeight() - regs.rasterizer.viewport_corner.y - viewport_height) - * regs.framebuffer.framebuffer.GetWidth()) - + regs.rasterizer.viewport_corner.x; - - const u32 viewport_size = ((viewport_height - 1) * regs.framebuffer.framebuffer.GetWidth()) - + viewport_width; - - if (color_surface != nullptr && write_color_fb) { - res_cache.InvalidateRegion( - regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() - + (viewport_offset * color_surface->bytes_per_pixel), - viewport_size * color_surface->bytes_per_pixel, - color_surface); - } - if (depth_surface != nullptr && write_depth_fb) { - res_cache.InvalidateRegion( - regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() - + (viewport_offset * depth_surface->bytes_per_pixel), - viewport_size * depth_surface->bytes_per_pixel, - depth_surface); - } + // Disable scissor test + state.scissor.enabled = false; vertex_batch.clear(); @@ -432,6 +435,25 @@ void RasterizerOpenGL::DrawTriangles() { state.texture_units[texture_index].texture_2d = 0; } state.Apply(); + + // Mark framebuffer surfaces as dirty + MathUtil::Rectangle draw_rect_unscaled{ + draw_rect.left / res_scale, draw_rect.top / res_scale, + draw_rect.right / res_scale, draw_rect.bottom / res_scale + }; + + if (color_surface != nullptr && write_color_fb) { + auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), + boost::icl::length(interval), + color_surface); + } + if (depth_surface != nullptr && write_depth_fb) { + auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), + boost::icl::length(interval), + depth_surface); + } } void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 972818d93..4ee164ae0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -11,6 +11,7 @@ #include #include #include +#include "common/alignment.h" #include "common/bit_field.h" #include "common/color.h" #include "common/logging/log.h" @@ -312,7 +313,26 @@ static bool FillSurface(const Surface& surface, const u8* fill_data) { return true; } -MathUtil::Rectangle CachedSurface::GetSubRect(const SurfaceParams& sub_surface) const { +SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle unscaled_rect) const { + if (unscaled_rect.top > unscaled_rect.bottom) { + std::swap(unscaled_rect.top, unscaled_rect.bottom); + } + if (is_tiled) { + unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8); + unscaled_rect.top = Common::AlignDown(unscaled_rect.top, 8); + unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8); + unscaled_rect.bottom = Common::AlignUp(unscaled_rect.bottom, 8); + } + + const u32 pixel_offset = unscaled_rect.left + stride * + (!is_tiled ? unscaled_rect.top : height - unscaled_rect.top - unscaled_rect.GetHeight()); + + const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride + unscaled_rect.GetWidth(); + + return { addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels) }; +} + +MathUtil::Rectangle SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { const u32 begin_pixel_index = PixelsInBytes(sub_surface.addr - addr); const int x0 = begin_pixel_index % stride; const int y0 = begin_pixel_index / stride; @@ -323,7 +343,7 @@ MathUtil::Rectangle CachedSurface::GetSubRect(const SurfaceParams& sub_surf return MathUtil::Rectangle(x0, y0, x0 + sub_surface.width, y0 + sub_surface.height); // Top to bottom } -MathUtil::Rectangle CachedSurface::GetScaledSubRect(const SurfaceParams& sub_surface) const { +MathUtil::Rectangle SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { auto rect = GetSubRect(sub_surface); rect.left = rect.left * res_scale; rect.right = rect.right * res_scale; @@ -332,7 +352,7 @@ MathUtil::Rectangle CachedSurface::GetScaledSubRect(const SurfaceParams& su return rect; } -bool CachedSurface::ExactMatch(const SurfaceParams& other_surface) const { +bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { return (other_surface.addr == addr && other_surface.width == width && other_surface.height == height && @@ -341,7 +361,7 @@ bool CachedSurface::ExactMatch(const SurfaceParams& other_surface) const { other_surface.is_tiled == is_tiled); } -bool CachedSurface::CanSubRect(const SurfaceParams& sub_surface) const { +bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { if (sub_surface.addr < addr || sub_surface.end > end || sub_surface.stride != stride || sub_surface.pixel_format != pixel_format || sub_surface.is_tiled != is_tiled) return false; @@ -357,36 +377,7 @@ bool CachedSurface::CanSubRect(const SurfaceParams& sub_surface) const { return true; } -bool CachedSurface::CanCopy(const SurfaceParams& dest_surface) const { - if (type == SurfaceType::Fill && IsRegionValid(dest_surface.GetInterval()) && - dest_surface.addr >= addr && dest_surface.end <= end) { // dest_surface is within our fill range - if (fill_size != dest_surface.bytes_per_pixel) { - if (dest_surface.is_tiled && GetFormatBpp(dest_surface.pixel_format) * 8 % fill_size != 0) - return false; - - // Check if bits repeat for our fill_size - const u32 dest_bytes_per_pixel = std::max(dest_surface.bytes_per_pixel, 1u); // Take care of 4bpp formats - std::vector fill_test(fill_size * dest_bytes_per_pixel); - - for (u32 i = 0; i < dest_bytes_per_pixel; ++i) - std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); - - for (u32 i = 0; i < fill_size; ++i) - if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], dest_bytes_per_pixel) != 0) - return false; - - if (dest_surface.bytes_per_pixel == 0 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) // 4bpp compare - return false; - } - return true; - } - if (CanSubRect(dest_surface) && dest_surface.width == stride) - return true; - - return false; -} - -bool CachedSurface::CanExpand(const SurfaceParams& expanded_surface) const { +bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { if (pixel_format != expanded_surface.pixel_format || is_tiled != expanded_surface.is_tiled || addr > expanded_surface.end || expanded_surface.addr > end || @@ -402,7 +393,7 @@ bool CachedSurface::CanExpand(const SurfaceParams& expanded_surface) const { return x0 == 0 && (!is_tiled || y0 % 8 == 0); } -bool CachedSurface::CanTexCopy(const SurfaceParams& texcopy_params) const { +bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { // TODO: Accept "Fill" surfaces if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || end < texcopy_params.end || @@ -425,6 +416,35 @@ bool CachedSurface::CanTexCopy(const SurfaceParams& texcopy_params) const { x0 + PixelsInBytes(texcopy_params.width / 8) <= stride); } +bool CachedSurface::CanCopy(const SurfaceParams& dest_surface) const { + if (type == SurfaceType::Fill && IsRegionValid(dest_surface.GetInterval()) && + dest_surface.addr >= addr && dest_surface.end <= end) { // dest_surface is within our fill range + if (fill_size != dest_surface.BytesPerPixel()) { + if (dest_surface.is_tiled && BytesInPixels(8 * 8) % fill_size != 0) + return false; + + // Check if bits repeat for our fill_size + const u32 dest_bytes_per_pixel = std::max(dest_surface.BytesPerPixel(), 1u); // Take care of 4bpp formats + std::vector fill_test(fill_size * dest_bytes_per_pixel); + + for (u32 i = 0; i < dest_bytes_per_pixel; ++i) + std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); + + for (u32 i = 0; i < fill_size; ++i) + if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], dest_bytes_per_pixel) != 0) + return false; + + if (dest_surface.BytesPerPixel() == 0 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) // 4bpp compare + return false; + } + return true; + } + if (CanSubRect(dest_surface) && dest_surface.width == stride) + return true; + + return false; +} + static void CopySurface(const Surface& src_surface, const Surface& dest_surface) { if (src_surface == dest_surface) return; @@ -492,7 +512,7 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { } else { size_t copyfn_offset = MortonCopyFlags::MortonToGl; - copyfn_offset |= (bytes_per_pixel - 1) << MortonCopyFlags::BytesPerPixelBits; + copyfn_offset |= (BytesPerPixel() - 1) << MortonCopyFlags::BytesPerPixelBits; copyfn_offset |= (gl_bytes_per_pixel - 1) << MortonCopyFlags::GLBytesPerPixelBits; if (load_start != addr || load_end != end) @@ -544,7 +564,7 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); } else { - size_t copyfn_offset = (bytes_per_pixel - 1) << MortonCopyFlags::BytesPerPixelBits; + size_t copyfn_offset = (BytesPerPixel() - 1) << MortonCopyFlags::BytesPerPixelBits; copyfn_offset |= (gl_bytes_per_pixel - 1) << MortonCopyFlags::GLBytesPerPixelBits; if (flush_start != addr || flush_end != end) @@ -806,7 +826,7 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& new_params.addr = std::min(params.addr, surface->addr); new_params.end = std::max(params.end, surface->end); new_params.size = new_params.end - new_params.addr; - new_params.height = new_params.size / (SurfaceParams::GetFormatBpp(params.pixel_format) * params.stride / 8); + new_params.height = new_params.size / params.BytesInPixels(params.stride); Surface new_surface = CreateSurface(new_params); RegisterSurface(new_surface); @@ -861,30 +881,12 @@ constexpr u16 GetResolutionScaleFactor() { Settings::values.resolution_factor; } -SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb, - bool using_depth_fb) { +SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( + bool using_color_fb, bool using_depth_fb, + const MathUtil::Rectangle& viewport_rect) { const auto& regs = Pica::g_state.regs; const auto& config = regs.framebuffer.framebuffer; - // Make sur that framebuffers don't overlap if both color and depth are being used - u32 fb_area = config.GetWidth() * config.GetHeight(); - bool framebuffers_overlap = using_color_fb && using_depth_fb && - MathUtil::IntervalsIntersect( - config.GetColorBufferPhysicalAddress(), - fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())), - config.GetDepthBufferPhysicalAddress(), - fb_area * Pica::FramebufferRegs::BytesPerDepthPixel(config.depth_format)); - - if (framebuffers_overlap) { - LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!"); - using_depth_fb = false; - } - - // get color and depth surfaces - SurfaceParams color_params; - SurfaceParams depth_params; - color_params.is_tiled = depth_params.is_tiled = true; - // update resolution_scale_factor and reset cache if changed static u16 resolution_scale_factor = GetResolutionScaleFactor(); if (resolution_scale_factor != GetResolutionScaleFactor()) { @@ -893,38 +895,63 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool usin InvalidateRegion(0, 0xffffffff, nullptr); } - color_params.addr = config.GetColorBufferPhysicalAddress(); - color_params.width = depth_params.width = config.GetWidth(); - color_params.height = depth_params.height = config.GetHeight(); - color_params.pixel_format = SurfaceParams::PixelFormatFromColorFormat(config.color_format); + MathUtil::Rectangle viewport_clamped{ + static_cast(MathUtil::Clamp(viewport_rect.left, 0, static_cast(config.GetWidth()))), + static_cast(MathUtil::Clamp(viewport_rect.top, 0, static_cast(config.GetHeight()))), + static_cast(MathUtil::Clamp(viewport_rect.right, 0, static_cast(config.GetWidth()))), + static_cast(MathUtil::Clamp(viewport_rect.bottom, 0, static_cast(config.GetHeight()))) + }; + + // get color and depth surfaces + SurfaceParams color_params; + color_params.is_tiled = true; color_params.res_scale = resolution_scale_factor; + color_params.width = config.GetWidth(); + color_params.height = config.GetHeight(); + SurfaceParams depth_params = color_params; + + color_params.addr = config.GetColorBufferPhysicalAddress(); + color_params.pixel_format = SurfaceParams::PixelFormatFromColorFormat(config.color_format); color_params.UpdateParams(); + depth_params.addr = config.GetDepthBufferPhysicalAddress(); + depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); + depth_params.UpdateParams(); + + auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); + auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); + + // Make sur that framebuffers don't overlap if both color and depth are being used + if (using_color_fb && using_depth_fb && + boost::icl::length(color_vp_interval & depth_vp_interval)) { + LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!"); + using_depth_fb = false; + } + MathUtil::Rectangle rect{}; Surface color_surface = nullptr; - if (using_color_fb) - std::tie(color_surface, rect) = GetSurfaceSubRect(color_params, ScaleMatch::Exact, true); - - depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); - depth_params.addr = config.GetDepthBufferPhysicalAddress(); - depth_params.res_scale = resolution_scale_factor; - depth_params.UpdateParams(); - Surface depth_surface = nullptr; - if (using_depth_fb && color_surface != nullptr) { - const PAddr validate_addr = depth_params.addr; - const u32 validate_size = depth_params.size; + if (using_color_fb) + std::tie(color_surface, rect) = GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + if (using_depth_fb && color_surface != nullptr) { // Can't specify separate color and depth viewport offsets in OpenGL, so make sure depth_surface will have the same offsets - depth_params.addr -= color_surface->PixelsInBytes(color_params.addr - color_surface->addr) * depth_params.bytes_per_pixel; + depth_params.addr -= depth_params.BytesInPixels( + color_surface->PixelsInBytes(color_params.addr - color_surface->addr)); depth_params.height = color_surface->height; depth_params.UpdateParams(); depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); - ValidateSurface(depth_surface, validate_addr, validate_size); } else if (using_depth_fb) { - std::tie(depth_surface, rect) = GetSurfaceSubRect(depth_params, ScaleMatch::Exact, true); + std::tie(depth_surface, rect) = GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + } + + if (color_surface != nullptr) { + ValidateSurface(color_surface, boost::icl::first(color_vp_interval), boost::icl::length(color_vp_interval)); + } + if (depth_surface != nullptr) { + ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), boost::icl::length(depth_vp_interval)); } return { color_surface, depth_surface, rect }; @@ -1003,14 +1030,14 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, SurfaceParams params = *surface; const u32 pixel_offset = params.PixelsInBytes(interval_start - params.addr); if (!params.is_tiled) { - params.addr += (pixel_offset - (pixel_offset % params.width)) * - SurfaceParams::GetFormatBpp(params.pixel_format) / 8; // Start of the row - params.height = (params.PixelsInBytes(interval_end - params.addr - 1) / params.width) + 1; + // Start of the row + params.addr += params.BytesInPixels(pixel_offset - (pixel_offset % params.stride)); + params.height = (params.PixelsInBytes(interval_end - params.addr - 1) / params.stride) + 1; } else { - params.addr += (pixel_offset - (pixel_offset % (params.width * 8))) * - SurfaceParams::GetFormatBpp(params.pixel_format) / 8; // Start of the tiled row - params.height = ((params.PixelsInBytes(interval_end - params.addr - 1) / (params.width * 8)) + 1) * 8; + // Start of the tiled row + params.addr += params.BytesInPixels(pixel_offset - (pixel_offset % (params.stride * 8))); + params.height = ((params.PixelsInBytes(interval_end - params.addr - 1) / (params.stride * 8)) + 1) * 8; } params.UpdateParams(); @@ -1147,7 +1174,7 @@ Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { surface->gl_bytes_per_pixel = (surface->pixel_format == PixelFormat::D24 || surface->type == SurfaceType::Texture) ? 4 : - surface->bytes_per_pixel; + surface->BytesPerPixel(); surface->gl_buffer_offset = (surface->pixel_format == PixelFormat::D24) ? 1 : 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 56cdfb0de..0b3cc5c09 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -179,7 +179,7 @@ struct SurfaceParams { return SurfaceType::Invalid; } - /// Update the params "size", "end", "bytes_per_pixel" and "type" from the already set "addr", "width", "height" and "pixel_format" + /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" and "pixel_format" void UpdateParams() { size = width * height * GetFormatBpp(pixel_format) / 8; @@ -190,13 +190,14 @@ struct SurfaceParams { end = addr + size; type = GetFormatType(pixel_format); - bytes_per_pixel = GetFormatBpp(pixel_format) / 8; } SurfaceInterval GetInterval() const { return SurfaceInterval::right_open(addr, end); } + SurfaceInterval GetSubRectInterval(MathUtil::Rectangle unscaled_rect) const; + u32 GetScaledWidth() const { return width * res_scale; } @@ -217,6 +218,22 @@ struct SurfaceParams { return size * 8 / GetFormatBpp(pixel_format); } + u32 BytesInPixels(u32 pixels) const { + return pixels * GetFormatBpp(pixel_format) / 8; + } + + u32 BytesPerPixel() const { + return BytesInPixels(1); + } + + bool ExactMatch(const SurfaceParams& other_surface) const; + bool CanSubRect(const SurfaceParams& sub_surface) const; + bool CanExpand(const SurfaceParams& expanded_surface) const; + bool CanTexCopy(const SurfaceParams& texcopy_params) const; + + MathUtil::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; + MathUtil::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; + PAddr addr = 0; PAddr end = 0; u32 size = 0; @@ -227,20 +244,12 @@ struct SurfaceParams { u16 res_scale = 1; bool is_tiled = false; - u32 bytes_per_pixel = 0; PixelFormat pixel_format = PixelFormat::Invalid; SurfaceType type = SurfaceType::Invalid; }; struct CachedSurface : SurfaceParams { - bool ExactMatch(const SurfaceParams& other_surface) const; - bool CanSubRect(const SurfaceParams& sub_surface) const; bool CanCopy(const SurfaceParams& dest_surface) const; - bool CanExpand(const SurfaceParams& expanded_surface) const; - bool CanTexCopy(const SurfaceParams& texcopy_params) const; - - MathUtil::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; - MathUtil::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; bool IsRegionValid(const SurfaceInterval& interval) const { return (invalid_regions.find(interval) == invalid_regions.end()); @@ -295,7 +304,8 @@ public: Surface GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); /// Get the color and depth surfaces based on the framebuffer configuration - SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb); + SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, + const MathUtil::Rectangle& viewport_rect); /// Get a surface that matches the fill config Surface GetFillSurface(const GPU::Regs::MemoryFillConfig& config); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 06a905766..93c6477e1 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -69,6 +69,12 @@ OpenGLState::OpenGLState() { draw.uniform_buffer = 0; draw.shader_program = 0; + scissor.enabled = false; + scissor.x = 0; + scissor.y = 0; + scissor.width = 0; + scissor.height = 0; + clip_distance = {}; } @@ -263,6 +269,22 @@ void OpenGLState::Apply() const { glUseProgram(draw.shader_program); } + // Scissor test + if (scissor.enabled != cur_state.scissor.enabled) { + if (scissor.enabled) { + glEnable(GL_SCISSOR_TEST); + } else { + glDisable(GL_SCISSOR_TEST); + } + } + + if (scissor.x != cur_state.scissor.x || + scissor.y != cur_state.scissor.y || + scissor.width != cur_state.scissor.width || + scissor.height != cur_state.scissor.height) { + glScissor(scissor.x, scissor.y, scissor.width, scissor.height); + } + // Clip distance for (size_t i = 0; i < clip_distance.size(); ++i) { if (clip_distance[i] != cur_state.clip_distance[i]) { diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 437fe34c4..a6bd9476e 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -124,6 +124,14 @@ public: GLuint shader_program; // GL_CURRENT_PROGRAM } draw; + struct { + bool enabled; // GL_SCISSOR_TEST + GLint x; + GLint y; + GLsizei width; + GLsizei height; + } scissor; + std::array clip_distance; // GL_CLIP_DISTANCE OpenGLState();