diff --git a/src/citra/config.cpp b/src/citra/config.cpp index 45c28ad09..72bda0be0 100644 --- a/src/citra/config.cpp +++ b/src/citra/config.cpp @@ -88,7 +88,7 @@ void Config::ReadValues() { Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true); Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true); Settings::values.resolution_factor = - (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); + static_cast(sdl2_config->GetInteger("Renderer", "resolution_factor", 1)); Settings::values.use_vsync = sdl2_config->GetBoolean("Renderer", "use_vsync", false); Settings::values.toggle_framelimit = sdl2_config->GetBoolean("Renderer", "toggle_framelimit", true); diff --git a/src/citra_qt/configuration/config.cpp b/src/citra_qt/configuration/config.cpp index 601c8b0e3..7cbe262b6 100644 --- a/src/citra_qt/configuration/config.cpp +++ b/src/citra_qt/configuration/config.cpp @@ -73,7 +73,8 @@ void Config::ReadValues() { qt_config->beginGroup("Renderer"); Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", true).toBool(); Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool(); - Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat(); + Settings::values.resolution_factor = + static_cast(qt_config->value("resolution_factor", 1).toInt()); Settings::values.use_vsync = qt_config->value("use_vsync", false).toBool(); Settings::values.toggle_framelimit = qt_config->value("toggle_framelimit", true).toBool(); diff --git a/src/citra_qt/configuration/configure_graphics.cpp b/src/citra_qt/configuration/configure_graphics.cpp index 36c5f52eb..6420f633a 100644 --- a/src/citra_qt/configuration/configure_graphics.cpp +++ b/src/citra_qt/configuration/configure_graphics.cpp @@ -32,81 +32,11 @@ void ConfigureGraphics::showLayoutBackgroundDialog() { } } -enum class Resolution : int { - Auto, - Scale1x, - Scale2x, - Scale3x, - Scale4x, - Scale5x, - Scale6x, - Scale7x, - Scale8x, - Scale9x, - Scale10x, -}; - -float ToResolutionFactor(Resolution option) { - switch (option) { - case Resolution::Auto: - return 0.f; - case Resolution::Scale1x: - return 1.f; - case Resolution::Scale2x: - return 2.f; - case Resolution::Scale3x: - return 3.f; - case Resolution::Scale4x: - return 4.f; - case Resolution::Scale5x: - return 5.f; - case Resolution::Scale6x: - return 6.f; - case Resolution::Scale7x: - return 7.f; - case Resolution::Scale8x: - return 8.f; - case Resolution::Scale9x: - return 9.f; - case Resolution::Scale10x: - return 10.f; - } - return 0.f; -} - -Resolution FromResolutionFactor(float factor) { - if (factor == 0.f) { - return Resolution::Auto; - } else if (factor == 1.f) { - return Resolution::Scale1x; - } else if (factor == 2.f) { - return Resolution::Scale2x; - } else if (factor == 3.f) { - return Resolution::Scale3x; - } else if (factor == 4.f) { - return Resolution::Scale4x; - } else if (factor == 5.f) { - return Resolution::Scale5x; - } else if (factor == 6.f) { - return Resolution::Scale6x; - } else if (factor == 7.f) { - return Resolution::Scale7x; - } else if (factor == 8.f) { - return Resolution::Scale8x; - } else if (factor == 9.f) { - return Resolution::Scale9x; - } else if (factor == 10.f) { - return Resolution::Scale10x; - } - return Resolution::Auto; -} - void ConfigureGraphics::setConfiguration() { ui->toggle_hw_renderer->setChecked(Settings::values.use_hw_renderer); ui->resolution_factor_combobox->setEnabled(Settings::values.use_hw_renderer); ui->toggle_shader_jit->setChecked(Settings::values.use_shader_jit); - ui->resolution_factor_combobox->setCurrentIndex( - static_cast(FromResolutionFactor(Settings::values.resolution_factor))); + ui->resolution_factor_combobox->setCurrentIndex(Settings::values.resolution_factor); ui->toggle_vsync->setChecked(Settings::values.use_vsync); { bg_color.setRgbF(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue); @@ -121,7 +51,7 @@ void ConfigureGraphics::applyConfiguration() { Settings::values.use_hw_renderer = ui->toggle_hw_renderer->isChecked(); Settings::values.use_shader_jit = ui->toggle_shader_jit->isChecked(); Settings::values.resolution_factor = - ToResolutionFactor(static_cast(ui->resolution_factor_combobox->currentIndex())); + static_cast(ui->resolution_factor_combobox->currentIndex()); Settings::values.use_vsync = ui->toggle_vsync->isChecked(); Settings::values.bg_red = bg_color.redF(); Settings::values.bg_green = bg_color.greenF(); diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index e9f778fcb..7af9556b1 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp @@ -16,8 +16,8 @@ static const float TOP_SCREEN_ASPECT_RATIO = static const float BOT_SCREEN_ASPECT_RATIO = static_cast(Core::kScreenBottomHeight) / Core::kScreenBottomWidth; -float FramebufferLayout::GetScalingRatio() const { - return static_cast(top_screen.GetWidth()) / Core::kScreenTopWidth; +u16 FramebufferLayout::GetScalingRatio() const { + return static_cast(((top_screen.GetWidth() - 1) / Core::kScreenTopWidth) + 1); } // Finds the largest size subrectangle contained in window area that is confined to the aspect ratio diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h index 4983cf103..0d826be9e 100644 --- a/src/core/frontend/framebuffer_layout.h +++ b/src/core/frontend/framebuffer_layout.h @@ -21,7 +21,7 @@ struct FramebufferLayout { * Returns the ration of pixel size of the top screen, compared to the native size of the 3DS * screen. */ - float GetScalingRatio() const; + u16 GetScalingRatio() const; }; /** diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 7a007c065..2d6af0cbb 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -58,7 +58,6 @@ void VMManager::Reset() { page_table.pointers.fill(nullptr); page_table.attributes.fill(Memory::PageType::Unmapped); - page_table.cached_res_count.fill(0); UpdatePageTableForVMA(initial_vma); } diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index 88684b82d..be95718e9 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp @@ -476,10 +476,11 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { // TODO: Consider attempting rasterizer-accelerated surface blit if that usage is ever // possible/likely Memory::RasterizerFlushVirtualRegion(command.dma_request.source_address, - command.dma_request.size, Memory::FlushMode::Flush); + command.dma_request.size, + Memory::FlushMode::Flush); Memory::RasterizerFlushVirtualRegion(command.dma_request.dest_address, command.dma_request.size, - Memory::FlushMode::FlushAndInvalidate); + Memory::FlushMode::Invalidate); // TODO(Subv): These memory accesses should not go through the application's memory mapping. // They should go through the GSP module's memory mapping. diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 09061ba49..28a4e2314 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -96,20 +96,11 @@ static void MemoryFill(const Regs::MemoryFillConfig& config) { u8* start = Memory::GetPhysicalPointer(start_addr); u8* end = Memory::GetPhysicalPointer(end_addr); - // TODO: Consider always accelerating and returning vector of - // regions that the accelerated fill did not cover to - // reduce/eliminate the fill that the cpu has to do. - // This would also mean that the flush below is not needed. - // Fill should first flush all surfaces that touch but are - // not completely within the fill range. - // Then fill all completely covered surfaces, and return the - // regions that were between surfaces or within the touching - // ones for cpu to manually fill here. if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) return; - Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), - config.GetEndAddress() - config.GetStartAddress()); + Memory::RasterizerInvalidateRegion(config.GetStartAddress(), + config.GetEndAddress() - config.GetStartAddress()); if (config.fill_24bit) { // fill with 24-bit values @@ -199,7 +190,7 @@ static void DisplayTransfer(const Regs::DisplayTransferConfig& config) { u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); - Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); + Memory::RasterizerInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); for (u32 y = 0; y < output_height; ++y) { for (u32 x = 0; x < output_width; ++x) { @@ -367,8 +358,12 @@ static void TextureCopy(const Regs::DisplayTransferConfig& config) { size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); - Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), - static_cast(contiguous_output_size)); + // Only need to flush output if it has a gap + const auto FlushInvalidate_fn = (output_gap != 0) ? + Memory::RasterizerFlushAndInvalidateRegion : + Memory::RasterizerInvalidateRegion; + FlushInvalidate_fn(config.GetPhysicalOutputAddress(), + static_cast(contiguous_output_size)); u32 remaining_input = input_width; u32 remaining_output = output_width; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 7f58be6de..ae6a3544e 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -42,7 +42,7 @@ static void MapPages(PageTable& page_table, u32 base, u32 size, u8* memory, Page (base + size) * PAGE_SIZE); RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE, - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); u32 end = base + size; while (base != end) { @@ -50,7 +50,6 @@ static void MapPages(PageTable& page_table, u32 base, u32 size, u8* memory, Page page_table.attributes[base] = type; page_table.pointers[base] = memory; - page_table.cached_res_count[base] = 0; base += 1; if (memory != nullptr) @@ -200,7 +199,7 @@ void Write(const VAddr vaddr, const T data) { ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); break; case PageType::RasterizerCachedMemory: { - RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate); + RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T)); break; } @@ -208,7 +207,7 @@ void Write(const VAddr vaddr, const T data) { WriteMMIO(GetMMIOHandler(vaddr), vaddr, data); break; case PageType::RasterizerCachedSpecial: { - RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate); + RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); WriteMMIO(GetMMIOHandler(vaddr), vaddr, data); break; } @@ -334,7 +333,7 @@ u8* GetPhysicalPointer(PAddr address) { return target_pointer; } -void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { +void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached) { if (start == 0) { return; } @@ -355,14 +354,10 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { } VAddr vaddr = *maybe_vaddr; - u8& res_count = current_page_table->cached_res_count[vaddr >> PAGE_BITS]; - ASSERT_MSG(count_delta <= UINT8_MAX - res_count, - "Rasterizer resource cache counter overflow!"); - ASSERT_MSG(count_delta >= -res_count, "Rasterizer resource cache counter underflow!"); + PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; - // Switch page type to cached if now cached - if (res_count == 0) { - PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; + if (cached) { + // Switch page type to cached switch (page_type) { case PageType::Unmapped: // It is not necessary for a process to have this region mapped into its address @@ -380,11 +375,8 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { } } - res_count += count_delta; - - // Switch page type to uncached if now uncached - if (res_count == 0) { - PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; + else { + // Switch page type to uncached switch (page_type) { case PageType::Unmapped: // It is not necessary for a process to have this region mapped into its address @@ -413,6 +405,12 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { } } +void RasterizerInvalidateRegion(PAddr start, u32 size) { + if (VideoCore::g_renderer != nullptr) { + VideoCore::g_renderer->Rasterizer()->InvalidateRegion(start, size); + } +} + void RasterizerFlushRegion(PAddr start, u32 size) { if (VideoCore::g_renderer != nullptr) { VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size); @@ -450,6 +448,9 @@ void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) { case FlushMode::Flush: rasterizer->FlushRegion(physical_start, overlap_size); break; + case FlushMode::Invalidate: + rasterizer->InvalidateRegion(physical_start, overlap_size); + break; case FlushMode::FlushAndInvalidate: rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size); break; @@ -588,7 +589,7 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi } case PageType::RasterizerCachedMemory: { RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount); break; } @@ -596,7 +597,7 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr); DEBUG_ASSERT(handler); RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); handler->WriteBlock(current_vaddr, src_buffer, copy_amount); break; } @@ -647,14 +648,14 @@ void ZeroBlock(const VAddr dest_addr, const size_t size) { } case PageType::RasterizerCachedMemory: { RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); std::memset(GetPointerFromVMA(current_vaddr), 0, copy_amount); break; } case PageType::RasterizerCachedSpecial: { DEBUG_ASSERT(GetMMIOHandler(current_vaddr)); RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), - FlushMode::FlushAndInvalidate); + FlushMode::Invalidate); GetMMIOHandler(current_vaddr)->WriteBlock(current_vaddr, zeros.data(), copy_amount); break; } diff --git a/src/core/memory.h b/src/core/memory.h index 252584af4..7d099c472 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -72,12 +72,6 @@ struct PageTable { * the corresponding entry in `pointers` MUST be set to null. */ std::array attributes; - - /** - * Indicates the number of externally cached resources touching a page that should be - * flushed before the memory is accessed - */ - std::array cached_res_count; }; /// Physical memory regions as seen from the ARM11 @@ -244,16 +238,20 @@ boost::optional PhysicalToVirtualAddress(PAddr addr); u8* GetPhysicalPointer(PAddr address); /** - * Adds the supplied value to the rasterizer resource cache counter of each - * page touching the region. + * Mark each page touching the region as cached. */ -void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta); +void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached); /** - * Flushes any externally cached rasterizer resources touching the given region. - */ +* Flushes any externally cached rasterizer resources touching the given region. +*/ void RasterizerFlushRegion(PAddr start, u32 size); +/** +* Invalidates any externally cached rasterizer resources touching the given region. +*/ +void RasterizerInvalidateRegion(PAddr start, u32 size); + /** * Flushes and invalidates any externally cached rasterizer resources touching the given region. */ @@ -262,6 +260,8 @@ void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size); enum class FlushMode { /// Write back modified surfaces to RAM Flush, + /// Remove region from the cache + Invalidate, /// Write back modified surfaces to RAM, and also remove them from the cache FlushAndInvalidate, }; diff --git a/src/core/settings.h b/src/core/settings.h index 8d78cb424..34e5914c8 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -95,7 +95,7 @@ struct Values { // Renderer bool use_hw_renderer; bool use_shader_jit; - float resolution_factor; + u16 resolution_factor; bool use_vsync; bool toggle_framelimit; diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp index 484713a92..8520f53b2 100644 --- a/src/tests/core/arm/arm_test_common.cpp +++ b/src/tests/core/arm/arm_test_common.cpp @@ -20,7 +20,6 @@ TestEnvironment::TestEnvironment(bool mutable_memory_) page_table->pointers.fill(nullptr); page_table->attributes.fill(Memory::PageType::Unmapped); - page_table->cached_res_count.fill(0); Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory); Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 8ef7e74c7..1d4c98189 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -38,6 +38,9 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory virtual void FlushRegion(PAddr addr, u32 size) = 0; + /// Notify rasterizer that any caches of the specified region should be invalidated + virtual void InvalidateRegion(PAddr addr, u32 size) = 0; + /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory /// and invalidated virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7e09e4712..d2db44629 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -8,7 +8,6 @@ #include #include #include "common/assert.h" -#include "common/color.h" #include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" @@ -23,6 +22,9 @@ #include "video_core/renderer_opengl/pica_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" +using PixelFormat = SurfaceParams::PixelFormat; +using SurfaceType = SurfaceParams::SurfaceType; + MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); @@ -225,21 +227,59 @@ void RasterizerOpenGL::DrawTriangles() { MICROPROFILE_SCOPE(OpenGL_Drawing); const auto& regs = Pica::g_state.regs; - // Sync and bind the framebuffer surfaces - CachedSurface* color_surface; - CachedSurface* depth_surface; - MathUtil::Rectangle rect; - std::tie(color_surface, depth_surface, rect) = - res_cache.GetFramebufferSurfaces(regs.framebuffer.framebuffer); + const bool has_stencil = regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; + const bool write_color_fb = state.color_mask.red_enabled == GL_TRUE || + state.color_mask.green_enabled == GL_TRUE || + state.color_mask.blue_enabled == GL_TRUE || + state.color_mask.alpha_enabled == GL_TRUE; + + const bool write_depth_fb = state.depth.write_mask == GL_TRUE || + (has_stencil && state.stencil.write_mask != 0); + + const bool using_color_fb = regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && + write_color_fb; + const bool using_depth_fb = regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && + (write_depth_fb || state.depth.test_enabled || (has_stencil && state.stencil.test_enabled)); + + MathUtil::Rectangle viewport_rect_unscaled{ + // These registers hold half-width and half-height, so must be multiplied by 2 + regs.rasterizer.viewport_corner.x, // left + regs.rasterizer.viewport_corner.y + // top + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * 2), + regs.rasterizer.viewport_corner.x + // right + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * 2), + regs.rasterizer.viewport_corner.y // bottom + }; + + Surface color_surface; + Surface depth_surface; + MathUtil::Rectangle surfaces_rect; + std::tie(color_surface, depth_surface, surfaces_rect) = + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled); + + const u16 res_scale = color_surface != nullptr ? color_surface->res_scale : + (depth_surface == nullptr ? 1u : depth_surface->res_scale); + + MathUtil::Rectangle draw_rect{ + MathUtil::Clamp(surfaces_rect.left + viewport_rect_unscaled.left * res_scale, // left + surfaces_rect.left, surfaces_rect.right), + MathUtil::Clamp(surfaces_rect.bottom + viewport_rect_unscaled.GetHeight() * res_scale, // top + surfaces_rect.bottom, surfaces_rect.top), + MathUtil::Clamp(surfaces_rect.left + viewport_rect_unscaled.GetWidth() * res_scale, // right + surfaces_rect.left, surfaces_rect.right), + MathUtil::Clamp(surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale, // bottom + surfaces_rect.bottom, surfaces_rect.top) + }; + + // Bind the framebuffer surfaces state.draw.draw_framebuffer = framebuffer.handle; state.Apply(); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0); if (depth_surface != nullptr) { - if (regs.framebuffer.framebuffer.depth_format == - Pica::FramebufferRegs::DepthFormat::D24S8) { + if (has_stencil) { // attach both depth and stencil glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depth_surface->texture.handle, 0); @@ -257,38 +297,29 @@ void RasterizerOpenGL::DrawTriangles() { } // Sync the viewport - // These registers hold half-width and half-height, so must be multiplied by 2 - GLsizei viewport_width = - (GLsizei)Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * 2; - GLsizei viewport_height = - (GLsizei)Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * 2; - glViewport( - (GLint)(rect.left + regs.rasterizer.viewport_corner.x * color_surface->res_scale_width), - (GLint)(rect.bottom + regs.rasterizer.viewport_corner.y * color_surface->res_scale_height), - (GLsizei)(viewport_width * color_surface->res_scale_width), - (GLsizei)(viewport_height * color_surface->res_scale_height)); + static_cast(surfaces_rect.left + viewport_rect_unscaled.left * res_scale), + static_cast(surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale), + static_cast(viewport_rect_unscaled.GetWidth() * res_scale), + static_cast(viewport_rect_unscaled.GetHeight() * res_scale)); - if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width || - uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) { - - uniform_block_data.data.framebuffer_scale[0] = color_surface->res_scale_width; - uniform_block_data.data.framebuffer_scale[1] = color_surface->res_scale_height; + if (uniform_block_data.data.framebuffer_scale != res_scale) { + uniform_block_data.data.framebuffer_scale = res_scale; uniform_block_data.dirty = true; } // Scissor checks are window-, not viewport-relative, which means that if the cached texture // sub-rect changes, the scissor bounds also need to be updated. GLint scissor_x1 = static_cast( - rect.left + regs.rasterizer.scissor_test.x1 * color_surface->res_scale_width); + surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale); GLint scissor_y1 = static_cast( - rect.bottom + regs.rasterizer.scissor_test.y1 * color_surface->res_scale_height); + surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale); // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when // scaling or doing multisampling. GLint scissor_x2 = static_cast( - rect.left + (regs.rasterizer.scissor_test.x2 + 1) * color_surface->res_scale_width); + surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale); GLint scissor_y2 = static_cast( - rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * color_surface->res_scale_height); + surfaces_rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * res_scale); if (uniform_block_data.data.scissor_x1 != scissor_x1 || uniform_block_data.data.scissor_x2 != scissor_x2 || @@ -309,7 +340,7 @@ void RasterizerOpenGL::DrawTriangles() { if (texture.enabled) { texture_samplers[texture_index].SyncWithConfig(texture.config); - CachedSurface* surface = res_cache.GetTextureSurface(texture); + Surface surface = res_cache.GetTextureSurface(texture); if (surface != nullptr) { state.texture_units[texture_index].texture_2d = surface->texture.handle; } else { @@ -378,6 +409,15 @@ void RasterizerOpenGL::DrawTriangles() { uniform_block_data.dirty = false; } + // Viewport can have negative offsets or larger + // dimensions than our framebuffer sub-rect. + // Enable scissor test to prevent drawing + // outside of the framebuffer region + state.scissor.enabled = true; + state.scissor.x = draw_rect.left; + state.scissor.y = draw_rect.bottom; + state.scissor.width = draw_rect.GetWidth(); + state.scissor.height = draw_rect.GetHeight(); state.Apply(); // Draw the vertex batch @@ -385,16 +425,8 @@ void RasterizerOpenGL::DrawTriangles() { GL_STREAM_DRAW); glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); - // Mark framebuffer surfaces as dirty - // TODO: Restrict invalidation area to the viewport - if (color_surface != nullptr) { - color_surface->dirty = true; - res_cache.FlushRegion(color_surface->addr, color_surface->size, color_surface, true); - } - if (depth_surface != nullptr) { - depth_surface->dirty = true; - res_cache.FlushRegion(depth_surface->addr, depth_surface->size, depth_surface, true); - } + // Disable scissor test + state.scissor.enabled = false; vertex_batch.clear(); @@ -403,6 +435,25 @@ void RasterizerOpenGL::DrawTriangles() { state.texture_units[texture_index].texture_2d = 0; } state.Apply(); + + // Mark framebuffer surfaces as dirty + MathUtil::Rectangle draw_rect_unscaled{ + draw_rect.left / res_scale, draw_rect.top / res_scale, + draw_rect.right / res_scale, draw_rect.bottom / res_scale + }; + + if (color_surface != nullptr && write_color_fb) { + auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), + boost::icl::length(interval), + color_surface); + } + if (depth_surface != nullptr && write_depth_fb) { + auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), + boost::icl::length(interval), + depth_surface); + } } void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { @@ -891,227 +942,122 @@ void RasterizerOpenGL::FlushAll() { void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushRegion(addr, size, nullptr, false); + res_cache.FlushRegion(addr, size); +} + +void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.InvalidateRegion(addr, size, nullptr); } void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushRegion(addr, size, nullptr, true); + res_cache.FlushRegion(addr, size); + res_cache.InvalidateRegion(addr, size, nullptr); } bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { MICROPROFILE_SCOPE(OpenGL_Blits); - CachedSurface src_params; + SurfaceParams src_params; src_params.addr = config.GetPhysicalInputAddress(); - // It's important to use the correct source input width to properly skip over parts of the input - // image which will be cropped from the output but still affect the stride of the input image. - src_params.width = config.input_width; - // Using the output's height is fine because we don't read or skip over the remaining part of - // the image, and it allows for smaller texture cache lookup rectangles. + src_params.width = config.output_width; + src_params.stride = config.input_width; src_params.height = config.output_height; src_params.is_tiled = !config.input_linear; - src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format); + src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.input_format); + src_params.UpdateParams(); - CachedSurface dst_params; + SurfaceParams dst_params; dst_params.addr = config.GetPhysicalOutputAddress(); - dst_params.width = - config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value(); - dst_params.height = - config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value(); + dst_params.width = config.scaling != config.NoScale ? config.output_width.Value() / 2 : config.output_width.Value(); + dst_params.height = config.scaling == config.ScaleXY ? config.output_height.Value() / 2 : config.output_height.Value(); dst_params.is_tiled = config.input_linear != config.dont_swizzle; - dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format); + dst_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.output_format); + dst_params.UpdateParams(); - MathUtil::Rectangle src_rect; - CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect); - - if (src_surface == nullptr) { + MathUtil::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); + if (src_surface == nullptr) return false; - } - // Adjust the source rectangle to take into account parts of the input lines being cropped - if (config.input_width > config.output_width) { - src_rect.right -= static_cast((config.input_width - config.output_width) * - src_surface->res_scale_width); - } + dst_params.res_scale = src_surface->res_scale; - // Require destination surface to have same resolution scale as source to preserve scaling - dst_params.res_scale_width = src_surface->res_scale_width; - dst_params.res_scale_height = src_surface->res_scale_height; - - MathUtil::Rectangle dst_rect; - CachedSurface* dst_surface = res_cache.GetSurfaceRect(dst_params, true, false, dst_rect); - - if (dst_surface == nullptr) { + MathUtil::Rectangle dst_rect; + Surface dst_surface; + std::tie(dst_surface, dst_rect) = res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, false); + if (dst_surface == nullptr) return false; - } - // Don't accelerate if the src and dst surfaces are the same - if (src_surface == dst_surface) { + if (config.flip_vertically) + std::swap(src_rect.top, src_rect.bottom); + + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) return false; - } - if (config.flip_vertically) { - std::swap(dst_rect.top, dst_rect.bottom); - } - - if (!res_cache.TryBlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) { - return false; - } - - u32 dst_size = dst_params.width * dst_params.height * - CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8; - dst_surface->dirty = true; - res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true); + res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); return true; } bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { - // TODO(tfarley): Try to hardware accelerate this - return false; + const u32 input_width = config.texture_copy.input_width * 16; + const u32 input_gap = config.texture_copy.input_gap * 16; + const u32 output_width = config.texture_copy.output_width * 16; + const u32 output_gap = config.texture_copy.output_gap * 16; + + if (config.texture_copy.size == 0) + return true; + + if (input_width != output_width || config.texture_copy.size % input_width != 0) + return false; + + SurfaceParams src_params; + src_params.addr = config.GetPhysicalInputAddress(); + src_params.stride = input_width + input_gap; // stride in bytes + src_params.width = input_width; // width in bytes + src_params.height = config.texture_copy.size / input_width; + src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width; + src_params.end = src_params.addr + src_params.size; + + MathUtil::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = res_cache.GetTexCopySurface(src_params); + if (src_surface == nullptr) + return false; + + if ((output_gap * 8) % SurfaceParams::GetFormatBpp(src_surface->pixel_format) != 0 || + (src_surface->is_tiled && src_surface->PixelsInBytes(output_gap) % 64 != 0)) + return false; + + SurfaceParams dst_params = *src_surface; + dst_params.addr = config.GetPhysicalOutputAddress(); + dst_params.stride = (output_width + output_gap) * src_surface->stride / src_params.stride; + dst_params.width = output_width * src_surface->stride / src_params.stride; + dst_params.height = src_surface->is_tiled ? src_params.height * 8 : src_params.height; + dst_params.res_scale = src_surface->res_scale; + dst_params.UpdateParams(); + + const bool load_gap = output_gap != 0; // Since we are going to invalidate the gap if there is one, we will have to load it first + MathUtil::Rectangle dst_rect; + Surface dst_surface; + std::tie(dst_surface, dst_rect) = res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, load_gap); + if (src_surface == nullptr) + return false; + + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) + return false; + + res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); + return true; } bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { - MICROPROFILE_SCOPE(OpenGL_Blits); - using PixelFormat = CachedSurface::PixelFormat; - using SurfaceType = CachedSurface::SurfaceType; - - CachedSurface* dst_surface = res_cache.TryGetFillSurface(config); - - if (dst_surface == nullptr) { + Surface dst_surface = res_cache.GetFillSurface(config); + if (dst_surface == nullptr) return false; - } - OpenGLState cur_state = OpenGLState::GetCurState(); - - SurfaceType dst_type = CachedSurface::GetFormatType(dst_surface->pixel_format); - - GLuint old_fb = cur_state.draw.draw_framebuffer; - cur_state.draw.draw_framebuffer = framebuffer.handle; - // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so - // Clear call isn't affected - cur_state.Apply(); - - if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - dst_surface->texture.handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - - // TODO: Handle additional pixel format and fill value size combinations to accelerate more - // cases - // For instance, checking if fill value's bytes/bits repeat to allow filling - // I8/A8/I4/A4/... - // Currently only handles formats that are multiples of the fill value size - - if (config.fill_24bit) { - switch (dst_surface->pixel_format) { - case PixelFormat::RGB8: - color_values[0] = config.value_24bit_r / 255.0f; - color_values[1] = config.value_24bit_g / 255.0f; - color_values[2] = config.value_24bit_b / 255.0f; - break; - default: - return false; - } - } else if (config.fill_32bit) { - u32 value = config.value_32bit; - - switch (dst_surface->pixel_format) { - case PixelFormat::RGBA8: - color_values[0] = (value >> 24) / 255.0f; - color_values[1] = ((value >> 16) & 0xFF) / 255.0f; - color_values[2] = ((value >> 8) & 0xFF) / 255.0f; - color_values[3] = (value & 0xFF) / 255.0f; - break; - default: - return false; - } - } else { - u16 value_16bit = config.value_16bit.Value(); - Math::Vec4 color; - - switch (dst_surface->pixel_format) { - case PixelFormat::RGBA8: - color_values[0] = (value_16bit >> 8) / 255.0f; - color_values[1] = (value_16bit & 0xFF) / 255.0f; - color_values[2] = color_values[0]; - color_values[3] = color_values[1]; - break; - case PixelFormat::RGB5A1: - color = Color::DecodeRGB5A1((const u8*)&value_16bit); - color_values[0] = color[0] / 31.0f; - color_values[1] = color[1] / 31.0f; - color_values[2] = color[2] / 31.0f; - color_values[3] = color[3]; - break; - case PixelFormat::RGB565: - color = Color::DecodeRGB565((const u8*)&value_16bit); - color_values[0] = color[0] / 31.0f; - color_values[1] = color[1] / 63.0f; - color_values[2] = color[2] / 31.0f; - break; - case PixelFormat::RGBA4: - color = Color::DecodeRGBA4((const u8*)&value_16bit); - color_values[0] = color[0] / 15.0f; - color_values[1] = color[1] / 15.0f; - color_values[2] = color[2] / 15.0f; - color_values[3] = color[3] / 15.0f; - break; - case PixelFormat::IA8: - case PixelFormat::RG8: - color_values[0] = (value_16bit >> 8) / 255.0f; - color_values[1] = (value_16bit & 0xFF) / 255.0f; - break; - default: - return false; - } - } - - cur_state.color_mask.red_enabled = GL_TRUE; - cur_state.color_mask.green_enabled = GL_TRUE; - cur_state.color_mask.blue_enabled = GL_TRUE; - cur_state.color_mask.alpha_enabled = GL_TRUE; - cur_state.Apply(); - glClearBufferfv(GL_COLOR, 0, color_values); - } else if (dst_type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - dst_surface->texture.handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - GLfloat value_float; - if (dst_surface->pixel_format == CachedSurface::PixelFormat::D16) { - value_float = config.value_32bit / 65535.0f; // 2^16 - 1 - } else if (dst_surface->pixel_format == CachedSurface::PixelFormat::D24) { - value_float = config.value_32bit / 16777215.0f; // 2^24 - 1 - } - - cur_state.depth.write_mask = GL_TRUE; - cur_state.Apply(); - glClearBufferfv(GL_DEPTH, 0, &value_float); - } else if (dst_type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - dst_surface->texture.handle, 0); - - GLfloat value_float = (config.value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 - GLint value_int = (config.value_32bit >> 24); - - cur_state.depth.write_mask = GL_TRUE; - cur_state.stencil.write_mask = 0xFF; - cur_state.Apply(); - glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int); - } - - cur_state.draw.draw_framebuffer = old_fb; - // TODO: Return scissor test to previous value when scissor test is implemented - cur_state.Apply(); - - dst_surface->dirty = true; - res_cache.FlushRegion(dst_surface->addr, dst_surface->size, dst_surface, true); + res_cache.InvalidateRegion(dst_surface->addr, dst_surface->size, dst_surface); return true; } @@ -1123,16 +1069,18 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con } MICROPROFILE_SCOPE(OpenGL_CacheManagement); - CachedSurface src_params; + SurfaceParams src_params; src_params.addr = framebuffer_addr; - src_params.width = config.width; + src_params.width = std::min(config.width.Value(), pixel_stride); src_params.height = config.height; - src_params.pixel_stride = pixel_stride; + src_params.stride = pixel_stride; src_params.is_tiled = false; - src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format); + src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.color_format); + src_params.UpdateParams(); - MathUtil::Rectangle src_rect; - CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect); + MathUtil::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); if (src_surface == nullptr) { return false; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 46c62961c..18808b1e4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -43,6 +43,7 @@ public: void NotifyPicaRegisterChanged(u32 id) override; void FlushAll() override; void FlushRegion(PAddr addr, u32 size) override; + void InvalidateRegion(PAddr addr, u32 size) override; void FlushAndInvalidateRegion(PAddr addr, u32 size) override; bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; @@ -135,7 +136,7 @@ private: // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. // Not following that rule will cause problems on some AMD drivers. struct UniformData { - alignas(8) GLvec2 framebuffer_scale; + GLint framebuffer_scale; GLint alphatest_ref; GLfloat depth_scale; GLfloat depth_offset; @@ -155,7 +156,7 @@ private: }; static_assert( - sizeof(UniformData) == 0x470, + sizeof(UniformData) == 0x460, "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index f37894e7a..4ee164ae0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -9,11 +9,15 @@ #include #include #include +#include #include +#include "common/alignment.h" #include "common/bit_field.h" +#include "common/color.h" #include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" +#include "common/scope_exit.h" #include "common/vector_math.h" #include "core/frontend/emu_window.h" #include "core/memory.h" @@ -25,13 +29,18 @@ #include "video_core/utils.h" #include "video_core/video_core.h" +using SurfaceType = SurfaceParams::SurfaceType; +using PixelFormat = SurfaceParams::PixelFormat; + +static std::array transfer_framebuffers; + struct FormatTuple { GLint internal_format; GLenum format; GLenum type; }; -static const std::array fb_format_tuples = {{ +static constexpr std::array fb_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8 {GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1 @@ -39,86 +48,152 @@ static const std::array fb_format_tuples = {{ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 }}; -static const std::array depth_format_tuples = {{ +static constexpr std::array depth_format_tuples = {{ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16 {}, {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 }}; -RasterizerCacheOpenGL::RasterizerCacheOpenGL() { - transfer_framebuffers[0].Create(); - transfer_framebuffers[1].Create(); -} +static constexpr FormatTuple tex_tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE }; -RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { - FlushAll(); -} - -static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, - u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, - u8* gl_data, bool morton_to_gl) { - using PixelFormat = CachedSurface::PixelFormat; - - u8* data_ptrs[2]; - u32 depth_stencil_shifts[2] = {24, 8}; - - if (morton_to_gl) { - std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]); +static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { + const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); + if (type == SurfaceType::Color) { + ASSERT((size_t)pixel_format < fb_format_tuples.size()); + return fb_format_tuples[(unsigned int)pixel_format]; } + else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { + size_t tuple_idx = (size_t)pixel_format - 14; + ASSERT(tuple_idx < depth_format_tuples.size()); + return depth_format_tuples[tuple_idx]; + } + else { + return tex_tuple; + } +} - if (pixel_format == PixelFormat::D24S8) { - for (unsigned y = 0; y < height; ++y) { - for (unsigned x = 0; x < width; ++x) { - const u32 coarse_y = y & ~7; - u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + - coarse_y * width * bytes_per_pixel; - u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; +template +constexpr auto RangeFromInterval(Map& map, const Interval& interval) { + return boost::make_iterator_range(map.equal_range(interval)); +} - data_ptrs[morton_to_gl] = morton_data + morton_offset; - data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; +enum MortonCopyFlags : int { + MortonToGl = (1 << 0), + CheckRange = (1 << 1), + D24S8Format = (1 << 2), + BytesPerPixelBits = 3, // bits 3-4 + GLBytesPerPixelBits = 5, // bits 5-6 + MaxValue = (1 << 7) - 1, +}; +template +static void MortonCopyPixels(u32 width, u32 height, const u8* in_data, u8* out_data, PAddr base, PAddr start, PAddr end) { + constexpr bool check_range = (flags & MortonCopyFlags::CheckRange) ? true : false; + constexpr bool morton_to_gl = (flags & MortonCopyFlags::MortonToGl) ? true : false; + + constexpr bool D24S8format = (flags & MortonCopyFlags::D24S8Format) ? true : false; + + constexpr u32 bytes_per_pixel = u32(((flags) >> MortonCopyFlags::BytesPerPixelBits) & 0x3) + 1; // 2bits, starting with value 1 + constexpr u32 gl_bytes_per_pixel = u32(((flags) >> MortonCopyFlags::GLBytesPerPixelBits) & 0x3) + 1; // 2bits, starting with value 1 + + if (check_range) + ASSERT(start >= base && end <= (base + (width * height * bytes_per_pixel))); + const u32 start_offset = start - base; + const u32 end_offset = end - base; + + for (u32 x = 0; x < width; ++x) { + for (u32 y = 0; y < height; ++y) { + const u32 coarse_x = x & ~7; + const u32 coarse_y = y & ~7; + u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; + u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; + + if (check_range) { + if (morton_offset >= end_offset && coarse_x == 0 && coarse_y == 0) // Out of range and new tile + return; + if (morton_offset < start_offset || morton_offset >= end_offset) // Out of range + continue; + } + + const size_t copy_bytes = check_range ? std::min(end_offset - morton_offset, bytes_per_pixel) : bytes_per_pixel; + + const u8* const in_ptr = &in_data[morton_to_gl ? morton_offset : gl_pixel_index]; + u8* const out_ptr = &out_data[morton_to_gl ? gl_pixel_index : morton_offset]; + + if (D24S8format) { // Swap depth and stencil value ordering since 3DS does not match OpenGL - u32 depth_stencil; - memcpy(&depth_stencil, data_ptrs[1], sizeof(u32)); - depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | - (depth_stencil >> depth_stencil_shifts[1]); - - memcpy(data_ptrs[0], &depth_stencil, sizeof(u32)); + constexpr size_t swap_offset = morton_to_gl ? 3 : 1; + std::array swap_buf; + std::memcpy(&swap_buf[4 - swap_offset], &in_ptr[0], swap_offset); + std::memcpy(&swap_buf[0], &in_ptr[swap_offset], 4 - swap_offset); + std::memcpy(out_ptr, &swap_buf[0], copy_bytes); } - } - } else { - for (unsigned y = 0; y < height; ++y) { - for (unsigned x = 0; x < width; ++x) { - const u32 coarse_y = y & ~7; - u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + - coarse_y * width * bytes_per_pixel; - u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; - - data_ptrs[morton_to_gl] = morton_data + morton_offset; - data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; - - memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); + else { + std::memcpy(out_ptr, in_ptr, copy_bytes); } } } } -void RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, - CachedSurface::SurfaceType type, - const MathUtil::Rectangle& src_rect, - const MathUtil::Rectangle& dst_rect) { - using SurfaceType = CachedSurface::SurfaceType; +template +class FunctionTable { +public: + FunctionTable() { + FillArray(); + } + const auto& operator [](size_t pos) const { + return table[pos]; + } +private: + template + void FillArray() { + table[P - 1] = &MortonCopyPixels

; + FillArray

(); + } + template <> + void FillArray<0>() {} + std::array), size> table; +}; +static const FunctionTable MortonCopyFnTable; +// Allocate an uninitialized texture of appropriate size and format for the surface +static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tuple, u32 width, u32 height) { OpenGLState cur_state = OpenGLState::GetCurState(); + // Keep track of previous texture bindings + GLuint old_tex = cur_state.texture_units[0].texture_2d; + cur_state.texture_units[0].texture_2d = texture; + cur_state.Apply(); + glActiveTexture(GL_TEXTURE0); + + glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0, + format_tuple.format, format_tuple.type, nullptr); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + // Restore previous texture bindings + cur_state.texture_units[0].texture_2d = old_tex; + cur_state.Apply(); +} + +static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rect, + GLuint dst_tex, const MathUtil::Rectangle& dst_rect, + SurfaceType type) { + OpenGLState cur_state = OpenGLState::GetCurState(); + + OpenGLState prev_state = cur_state; + SCOPE_EXIT({ prev_state.Apply(); }); + // Make sure textures aren't bound to texture units, since going to bind them to framebuffer // components OpenGLState::ResetTexture(src_tex); OpenGLState::ResetTexture(dst_tex); // Keep track of previous framebuffer bindings - GLuint old_fbs[2] = {cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer}; cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle; cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle; cur_state.Apply(); @@ -163,637 +238,992 @@ void RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); - // Restore previous framebuffer bindings - cur_state.draw.read_framebuffer = old_fbs[0]; - cur_state.draw.draw_framebuffer = old_fbs[1]; - cur_state.Apply(); -} - -bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, - const MathUtil::Rectangle& src_rect, - CachedSurface* dst_surface, - const MathUtil::Rectangle& dst_rect) { - - if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, - dst_surface->pixel_format)) { - return false; - } - - BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, - CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect); return true; } -static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, - u32 width, u32 height) { - // Allocate an uninitialized texture of appropriate size and format for the surface - using SurfaceType = CachedSurface::SurfaceType; - +static bool FillSurface(const Surface& surface, const u8* fill_data) { OpenGLState cur_state = OpenGLState::GetCurState(); - // Keep track of previous texture bindings - GLuint old_tex = cur_state.texture_units[0].texture_2d; - cur_state.texture_units[0].texture_2d = texture; + OpenGLState prev_state = cur_state; + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState::ResetTexture(surface->texture.handle); + + cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle; cur_state.Apply(); - glActiveTexture(GL_TEXTURE0); - SurfaceType type = CachedSurface::GetFormatType(pixel_format); + if (surface->type == SurfaceType::Color || surface->type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - FormatTuple tuple; - if (type == SurfaceType::Color) { - ASSERT((size_t)pixel_format < fb_format_tuples.size()); - tuple = fb_format_tuples[(unsigned int)pixel_format]; - } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { - size_t tuple_idx = (size_t)pixel_format - 14; - ASSERT(tuple_idx < depth_format_tuples.size()); - tuple = depth_format_tuples[tuple_idx]; - } else { - tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; + Pica::Texture::TextureInfo tex_info{}; + tex_info.format = static_cast(surface->pixel_format); + Math::Vec4 color = Pica::Texture::LookupTexture(fill_data, 0, 0, tex_info); + + std::array color_values = { + color.x / 255.f, + color.y / 255.f, + color.z / 255.f, + color.w / 255.f + }; + + cur_state.color_mask.red_enabled = GL_TRUE; + cur_state.color_mask.green_enabled = GL_TRUE; + cur_state.color_mask.blue_enabled = GL_TRUE; + cur_state.color_mask.alpha_enabled = GL_TRUE; + cur_state.Apply(); + glClearBufferfv(GL_COLOR, 0, &color_values[0]); + } + else if (surface->type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + u32 value_32bit = 0; + GLfloat value_float; + + if (surface->pixel_format == SurfaceParams::PixelFormat::D16) { + std::memcpy(&value_32bit, fill_data, 2); + value_float = value_32bit / 65535.0f; // 2^16 - 1 + } + else if (surface->pixel_format == SurfaceParams::PixelFormat::D24) { + std::memcpy(&value_32bit, fill_data, 3); + value_float = value_32bit / 16777215.0f; // 2^24 - 1 + } + + cur_state.depth.write_mask = GL_TRUE; + cur_state.Apply(); + glClearBufferfv(GL_DEPTH, 0, &value_float); + } + else if (surface->type == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, surface->texture.handle, 0); + + u32 value_32bit; + std::memcpy(&value_32bit, fill_data, 4); + + GLfloat value_float = (value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 + GLint value_int = (value_32bit >> 24); + + cur_state.depth.write_mask = GL_TRUE; + cur_state.stencil.write_mask = -1; + cur_state.Apply(); + glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int); + } + return true; +} + +SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle unscaled_rect) const { + if (unscaled_rect.top > unscaled_rect.bottom) { + std::swap(unscaled_rect.top, unscaled_rect.bottom); + } + if (is_tiled) { + unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8); + unscaled_rect.top = Common::AlignDown(unscaled_rect.top, 8); + unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8); + unscaled_rect.bottom = Common::AlignUp(unscaled_rect.bottom, 8); } - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, tuple.format, - tuple.type, nullptr); + const u32 pixel_offset = unscaled_rect.left + stride * + (!is_tiled ? unscaled_rect.top : height - unscaled_rect.top - unscaled_rect.GetHeight()); + + const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride + unscaled_rect.GetWidth(); + + return { addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels) }; +} + +MathUtil::Rectangle SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { + const u32 begin_pixel_index = PixelsInBytes(sub_surface.addr - addr); + const int x0 = begin_pixel_index % stride; + const int y0 = begin_pixel_index / stride; + + if (is_tiled) + return MathUtil::Rectangle(x0, height - y0 - sub_surface.height, x0 + sub_surface.width, height - y0); // Bottom to top + + return MathUtil::Rectangle(x0, y0, x0 + sub_surface.width, y0 + sub_surface.height); // Top to bottom +} + +MathUtil::Rectangle SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { + auto rect = GetSubRect(sub_surface); + rect.left = rect.left * res_scale; + rect.right = rect.right * res_scale; + rect.top = rect.top * res_scale; + rect.bottom = rect.bottom * res_scale; + return rect; +} + +bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { + return (other_surface.addr == addr && + other_surface.width == width && + other_surface.height == height && + other_surface.stride == stride && + other_surface.pixel_format == pixel_format && + other_surface.is_tiled == is_tiled); +} + +bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { + if (sub_surface.addr < addr || sub_surface.end > end || sub_surface.stride != stride || + sub_surface.pixel_format != pixel_format || sub_surface.is_tiled != is_tiled) + return false; + + auto rect = GetSubRect(sub_surface); + + if (rect.left + sub_surface.width > stride) + return false; + + if (is_tiled) + return ((height - rect.bottom) % 8 == 0 && rect.left % 8 == 0); + + return true; +} + +bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { + if (pixel_format != expanded_surface.pixel_format || + is_tiled != expanded_surface.is_tiled || + addr > expanded_surface.end || expanded_surface.addr > end || + stride != expanded_surface.stride) + return false; + + const u32 begin_pixel_index = + PixelsInBytes(std::max(expanded_surface.addr, addr) - + std::min(expanded_surface.addr, addr)); + const int x0 = begin_pixel_index % stride; + const int y0 = begin_pixel_index / stride; + + return x0 == 0 && (!is_tiled || y0 % 8 == 0); +} + +bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { + // TODO: Accept "Fill" surfaces + if (pixel_format == PixelFormat::Invalid || + addr > texcopy_params.addr || end < texcopy_params.end || + ((texcopy_params.addr - addr) * 8) % GetFormatBpp(pixel_format) != 0 || + (texcopy_params.width * 8) % GetFormatBpp(pixel_format) != 0 || + (texcopy_params.stride * 8) % GetFormatBpp(pixel_format) != 0) + return false; + + const u32 begin_pixel_index = PixelsInBytes(texcopy_params.addr - addr); + const int x0 = begin_pixel_index % stride; + const int y0 = begin_pixel_index / stride; + + if (!is_tiled) + return (PixelsInBytes(texcopy_params.stride) == stride && + x0 + PixelsInBytes(texcopy_params.width) <= stride); + + return (PixelsInBytes(texcopy_params.addr - addr) % 64 == 0 && + PixelsInBytes(texcopy_params.width) % 64 == 0 && + PixelsInBytes(texcopy_params.stride) == stride * 8 && + x0 + PixelsInBytes(texcopy_params.width / 8) <= stride); +} + +bool CachedSurface::CanCopy(const SurfaceParams& dest_surface) const { + if (type == SurfaceType::Fill && IsRegionValid(dest_surface.GetInterval()) && + dest_surface.addr >= addr && dest_surface.end <= end) { // dest_surface is within our fill range + if (fill_size != dest_surface.BytesPerPixel()) { + if (dest_surface.is_tiled && BytesInPixels(8 * 8) % fill_size != 0) + return false; + + // Check if bits repeat for our fill_size + const u32 dest_bytes_per_pixel = std::max(dest_surface.BytesPerPixel(), 1u); // Take care of 4bpp formats + std::vector fill_test(fill_size * dest_bytes_per_pixel); + + for (u32 i = 0; i < dest_bytes_per_pixel; ++i) + std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); + + for (u32 i = 0; i < fill_size; ++i) + if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], dest_bytes_per_pixel) != 0) + return false; + + if (dest_surface.BytesPerPixel() == 0 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) // 4bpp compare + return false; + } + return true; + } + if (CanSubRect(dest_surface) && dest_surface.width == stride) + return true; + + return false; +} + +static void CopySurface(const Surface& src_surface, const Surface& dest_surface) { + if (src_surface == dest_surface) + return; + + // This is only called when CanCopy is true, no need to run checks here + if (src_surface->type == SurfaceType::Fill) { + // FillSurface needs a 4 bytes buffer + const u32 fill_offset = (dest_surface->addr - src_surface->addr) % src_surface->fill_size; + std::array fill_buffer; + + u32 fill_buff_pos = fill_offset; + for (int i : {0, 1, 2, 3}) + fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; + + FillSurface(dest_surface, &fill_buffer[0]); + } + if (src_surface->CanSubRect(*dest_surface)) { + BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(*dest_surface), + dest_surface->texture.handle, dest_surface->GetScaledRect(), + src_surface->type); + } + dest_surface->gl_buffer_dirty = true; +} + +MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); +void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { + ASSERT(type != SurfaceType::Fill); + + const u8* const texture_src_data = Memory::GetPhysicalPointer(addr); + if (texture_src_data == nullptr) + return; + + //TODO: Should probably be done in ::Memory:: and check for other regions too + if (load_start <= Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END) + load_end = Memory::VRAM_VADDR_END; + + if (load_start < Memory::VRAM_VADDR && load_end >= Memory::VRAM_VADDR) + load_start = Memory::VRAM_VADDR; + + MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); + + ASSERT(load_start >= addr && load_end <= end); + const u32 start_offset = load_start - addr; + + if (!is_tiled) { + ASSERT(type == SurfaceType::Color); + std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, load_end - load_start); + } + else { + if (type == SurfaceType::Texture) { + Pica::Texture::TextureInfo tex_info{}; + tex_info.width = width; + tex_info.height = height; + tex_info.format = static_cast(pixel_format); + tex_info.SetDefaultStride(); + tex_info.physical_address = addr; + + for (unsigned y = 0; y < height; ++y) { + for (unsigned x = 0; x < width; ++x) { + auto vec4 = Pica::Texture::LookupTexture(texture_src_data, x, height - 1 - y, tex_info); + const size_t offset = (x + (width * y)) * 4; + std::memcpy(&gl_buffer[offset], vec4.AsArray(), 4); + } + } + } + else { + size_t copyfn_offset = MortonCopyFlags::MortonToGl; + copyfn_offset |= (BytesPerPixel() - 1) << MortonCopyFlags::BytesPerPixelBits; + copyfn_offset |= (gl_bytes_per_pixel - 1) << MortonCopyFlags::GLBytesPerPixelBits; + + if (load_start != addr || load_end != end) + copyfn_offset |= MortonCopyFlags::CheckRange; + if (pixel_format == PixelFormat::D24S8) + copyfn_offset |= MortonCopyFlags::D24S8Format; + + MortonCopyFnTable[copyfn_offset](width, height, + texture_src_data, &gl_buffer[gl_buffer_offset], addr, load_start, load_end); + } + } +} + +MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); +void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { + u8* const dst_buffer = Memory::GetPhysicalPointer(addr); + if (dst_buffer == nullptr) + return; + + //TODO: Should probably be done in ::Memory:: and check for other regions too + //same as loadglbuffer() + if (flush_start <= Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) + flush_end = Memory::VRAM_VADDR_END; + + if (flush_start < Memory::VRAM_VADDR && flush_end >= Memory::VRAM_VADDR) + flush_start = Memory::VRAM_VADDR; + + MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); + + ASSERT(flush_start >= addr && flush_end <= end); + const u32 start_offset = flush_start - addr; + const u32 end_offset = flush_end - addr; + + if (type == SurfaceType::Fill) { + const u32 coarse_start_offset = start_offset - (start_offset % fill_size); + const u32 backup_bytes = start_offset % fill_size; + std::array backup_data; + if (backup_bytes) + std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); + + for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) + std::memcpy(&dst_buffer[offset], &fill_data[0], std::min(fill_size, end_offset - offset)); + + if (backup_bytes) + std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); + } + else if (!is_tiled) { + ASSERT(type == SurfaceType::Color); + std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); + } + else { + size_t copyfn_offset = (BytesPerPixel() - 1) << MortonCopyFlags::BytesPerPixelBits; + copyfn_offset |= (gl_bytes_per_pixel - 1) << MortonCopyFlags::GLBytesPerPixelBits; + + if (flush_start != addr || flush_end != end) + copyfn_offset |= MortonCopyFlags::CheckRange; + if (pixel_format == PixelFormat::D24S8) + copyfn_offset |= MortonCopyFlags::D24S8Format; + + MortonCopyFnTable[copyfn_offset](width, height, + &gl_buffer[gl_buffer_offset], dst_buffer, addr, flush_start, flush_end); + } +} + +void CachedSurface::UploadGLTexture() { + if (type == SurfaceType::Fill) + return; + + ASSERT(gl_buffer.size() == width * height * gl_bytes_per_pixel); + + const FormatTuple& tuple = GetFormatTuple(pixel_format); + + // Load data from memory to the surface + OpenGLState cur_state = OpenGLState::GetCurState(); + + GLuint old_tex = cur_state.texture_units[0].texture_2d; + cur_state.texture_units[0].texture_2d = texture.handle; + cur_state.Apply(); + + glActiveTexture(GL_TEXTURE0); + glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, + tuple.format, tuple.type, &gl_buffer[0]); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - // Restore previous texture bindings cur_state.texture_units[0].texture_2d = old_tex; cur_state.Apply(); + + // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface + if (res_scale != 1) { + OGLTexture scaled_texture; + scaled_texture.Create(); + + AllocateSurfaceTexture(scaled_texture.handle, tuple, GetScaledWidth(), GetScaledHeight()); + BlitTextures(texture.handle, GetRect(), scaled_texture.handle, GetScaledRect(), type); + + std::swap(texture.handle, scaled_texture.handle); + } + + gl_buffer_dirty = false; } -MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192)); -CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, - bool load_if_create) { - using PixelFormat = CachedSurface::PixelFormat; - using SurfaceType = CachedSurface::SurfaceType; +void CachedSurface::DownloadGLTexture() { + if (gl_buffer.size() == 0) + gl_buffer.resize(width * height * gl_bytes_per_pixel); - if (params.addr == 0) { - return nullptr; - } - - u32 params_size = - params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8; - - // Check for an exact match in existing surfaces - CachedSurface* best_exact_surface = nullptr; - float exact_surface_goodness = -1.f; - - auto surface_interval = - boost::icl::interval::right_open(params.addr, params.addr + params_size); - auto range = surface_cache.equal_range(surface_interval); - for (auto it = range.first; it != range.second; ++it) { - for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { - CachedSurface* surface = it2->get(); - - // Check if the request matches the surface exactly - if (params.addr == surface->addr && params.width == surface->width && - params.height == surface->height && params.pixel_format == surface->pixel_format) { - // Make sure optional param-matching criteria are fulfilled - bool tiling_match = (params.is_tiled == surface->is_tiled); - bool res_scale_match = (params.res_scale_width == surface->res_scale_width && - params.res_scale_height == surface->res_scale_height); - if (!match_res_scale || res_scale_match) { - // Prioritize same-tiling and highest resolution surfaces - float match_goodness = - (float)tiling_match + surface->res_scale_width * surface->res_scale_height; - if (match_goodness > exact_surface_goodness || surface->dirty) { - exact_surface_goodness = match_goodness; - best_exact_surface = surface; - } - } - } - } - } - - // Return the best exact surface if found - if (best_exact_surface != nullptr) { - return best_exact_surface; - } - - // No matching surfaces found, so create a new one - u8* texture_src_data = Memory::GetPhysicalPointer(params.addr); - if (texture_src_data == nullptr) { - return nullptr; - } - - MICROPROFILE_SCOPE(OpenGL_SurfaceUpload); - - // Stride only applies to linear images. - ASSERT(params.pixel_stride == 0 || !params.is_tiled); - - std::shared_ptr new_surface = std::make_shared(); - - new_surface->addr = params.addr; - new_surface->size = params_size; - - new_surface->texture.Create(); - new_surface->width = params.width; - new_surface->height = params.height; - new_surface->pixel_stride = params.pixel_stride; - new_surface->res_scale_width = params.res_scale_width; - new_surface->res_scale_height = params.res_scale_height; - - new_surface->is_tiled = params.is_tiled; - new_surface->pixel_format = params.pixel_format; - new_surface->dirty = false; - - if (!load_if_create) { - // Don't load any data; just allocate the surface's texture - AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, - new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); - } else { - // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead - // of memory upload below if that's a common scenario in some game - - Memory::RasterizerFlushRegion(params.addr, params_size); - - // Load data from memory to the new surface - OpenGLState cur_state = OpenGLState::GetCurState(); - - GLuint old_tex = cur_state.texture_units[0].texture_2d; - cur_state.texture_units[0].texture_2d = new_surface->texture.handle; - cur_state.Apply(); - glActiveTexture(GL_TEXTURE0); - - if (!new_surface->is_tiled) { - // TODO: Ensure this will always be a color format, not a depth or other format - ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size()); - const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format]; - - glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->pixel_stride); - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, - tuple.format, tuple.type, texture_src_data); - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - } else { - SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format); - if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) { - FormatTuple tuple; - if ((size_t)params.pixel_format < fb_format_tuples.size()) { - tuple = fb_format_tuples[(unsigned int)params.pixel_format]; - } else { - // Texture - tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; - } - - std::vector> tex_buffer(params.width * params.height); - - Pica::Texture::TextureInfo tex_info; - tex_info.width = params.width; - tex_info.height = params.height; - tex_info.format = (Pica::TexturingRegs::TextureFormat)params.pixel_format; - tex_info.SetDefaultStride(); - tex_info.physical_address = params.addr; - - for (unsigned y = 0; y < params.height; ++y) { - for (unsigned x = 0; x < params.width; ++x) { - tex_buffer[x + params.width * y] = Pica::Texture::LookupTexture( - texture_src_data, x, params.height - 1 - y, tex_info); - } - } - - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, - 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data()); - } else { - // Depth/Stencil formats need special treatment since they aren't sampleable using - // LookupTexture and can't use RGBA format - size_t tuple_idx = (size_t)params.pixel_format - 14; - ASSERT(tuple_idx < depth_format_tuples.size()); - const FormatTuple& tuple = depth_format_tuples[tuple_idx]; - - u32 bytes_per_pixel = CachedSurface::GetFormatBpp(params.pixel_format) / 8; - - // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type - bool use_4bpp = (params.pixel_format == PixelFormat::D24); - - u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; - - std::vector temp_fb_depth_buffer(params.width * params.height * - gl_bytes_per_pixel); - - u8* temp_fb_depth_buffer_ptr = - use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data(); - - MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, - gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, - true); - - glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, - 0, tuple.format, tuple.type, temp_fb_depth_buffer.data()); - } - } - - // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface - if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) { - OGLTexture scaled_texture; - scaled_texture.Create(); - - AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, - new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); - BlitTextures(new_surface->texture.handle, scaled_texture.handle, - CachedSurface::GetFormatType(new_surface->pixel_format), - MathUtil::Rectangle(0, 0, new_surface->width, new_surface->height), - MathUtil::Rectangle(0, 0, new_surface->GetScaledWidth(), - new_surface->GetScaledHeight())); - - new_surface->texture.Release(); - new_surface->texture.handle = scaled_texture.handle; - scaled_texture.handle = 0; - cur_state.texture_units[0].texture_2d = new_surface->texture.handle; - cur_state.Apply(); - } - - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - - cur_state.texture_units[0].texture_2d = old_tex; - cur_state.Apply(); - } - - Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1); - surface_cache.add(std::make_pair(boost::icl::interval::right_open( - new_surface->addr, new_surface->addr + new_surface->size), - std::set>({new_surface}))); - return new_surface.get(); -} - -CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, - bool match_res_scale, bool load_if_create, - MathUtil::Rectangle& out_rect) { - if (params.addr == 0) { - return nullptr; - } - - u32 total_pixels = params.width * params.height; - u32 params_size = total_pixels * CachedSurface::GetFormatBpp(params.pixel_format) / 8; - - // Attempt to find encompassing surfaces - CachedSurface* best_subrect_surface = nullptr; - float subrect_surface_goodness = -1.f; - - auto surface_interval = - boost::icl::interval::right_open(params.addr, params.addr + params_size); - auto cache_upper_bound = surface_cache.upper_bound(surface_interval); - for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { - for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { - CachedSurface* surface = it2->get(); - - // Check if the request is contained in the surface - if (params.addr >= surface->addr && - params.addr + params_size - 1 <= surface->addr + surface->size - 1 && - params.pixel_format == surface->pixel_format) { - // Make sure optional param-matching criteria are fulfilled - bool tiling_match = (params.is_tiled == surface->is_tiled); - bool res_scale_match = (params.res_scale_width == surface->res_scale_width && - params.res_scale_height == surface->res_scale_height); - if (!match_res_scale || res_scale_match) { - // Prioritize same-tiling and highest resolution surfaces - float match_goodness = - (float)tiling_match + surface->res_scale_width * surface->res_scale_height; - if (match_goodness > subrect_surface_goodness || surface->dirty) { - subrect_surface_goodness = match_goodness; - best_subrect_surface = surface; - } - } - } - } - } - - // Return the best subrect surface if found - if (best_subrect_surface != nullptr) { - unsigned int bytes_per_pixel = - (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8); - - int x0, y0; - - if (!params.is_tiled) { - u32 begin_pixel_index = (params.addr - best_subrect_surface->addr) / bytes_per_pixel; - x0 = begin_pixel_index % best_subrect_surface->width; - y0 = begin_pixel_index / best_subrect_surface->width; - - out_rect = MathUtil::Rectangle(x0, y0, x0 + params.width, y0 + params.height); - } else { - u32 bytes_per_tile = 8 * 8 * bytes_per_pixel; - u32 tiles_per_row = best_subrect_surface->width / 8; - - u32 begin_tile_index = (params.addr - best_subrect_surface->addr) / bytes_per_tile; - x0 = begin_tile_index % tiles_per_row * 8; - y0 = begin_tile_index / tiles_per_row * 8; - - // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory. - out_rect = - MathUtil::Rectangle(x0, best_subrect_surface->height - y0, x0 + params.width, - best_subrect_surface->height - (y0 + params.height)); - } - - out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width); - out_rect.right = (int)(out_rect.right * best_subrect_surface->res_scale_width); - out_rect.top = (int)(out_rect.top * best_subrect_surface->res_scale_height); - out_rect.bottom = (int)(out_rect.bottom * best_subrect_surface->res_scale_height); - - return best_subrect_surface; - } - - // No subrect found - create and return a new surface - if (!params.is_tiled) { - out_rect = MathUtil::Rectangle(0, 0, (int)(params.width * params.res_scale_width), - (int)(params.height * params.res_scale_height)); - } else { - out_rect = MathUtil::Rectangle(0, (int)(params.height * params.res_scale_height), - (int)(params.width * params.res_scale_width), 0); - } - - return GetSurface(params, match_res_scale, load_if_create); -} - -CachedSurface* RasterizerCacheOpenGL::GetTextureSurface( - const Pica::TexturingRegs::FullTextureConfig& config) { - - Pica::Texture::TextureInfo info = - Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); - - CachedSurface params; - params.addr = info.physical_address; - params.width = info.width; - params.height = info.height; - params.is_tiled = true; - params.pixel_format = CachedSurface::PixelFormatFromTextureFormat(info.format); - return GetSurface(params, false, true); -} - -std::tuple> -RasterizerCacheOpenGL::GetFramebufferSurfaces( - const Pica::FramebufferRegs::FramebufferConfig& config) { - - const auto& regs = Pica::g_state.regs; - - // Make sur that framebuffers don't overlap if both color and depth are being used - u32 fb_area = config.GetWidth() * config.GetHeight(); - bool framebuffers_overlap = - config.GetColorBufferPhysicalAddress() != 0 && - config.GetDepthBufferPhysicalAddress() != 0 && - MathUtil::IntervalsIntersect( - config.GetColorBufferPhysicalAddress(), - fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())), - config.GetDepthBufferPhysicalAddress(), - fb_area * Pica::FramebufferRegs::BytesPerDepthPixel(config.depth_format)); - bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0; - bool depth_write_enable = regs.framebuffer.output_merger.depth_write_enable && - regs.framebuffer.framebuffer.allow_depth_stencil_write; - bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && - (regs.framebuffer.output_merger.depth_test_enable || depth_write_enable || - !framebuffers_overlap); - - if (framebuffers_overlap && using_color_fb && using_depth_fb) { - LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " - "overlapping framebuffers not supported!"); - using_depth_fb = false; - } - - // get color and depth surfaces - CachedSurface color_params; - CachedSurface depth_params; - color_params.width = depth_params.width = config.GetWidth(); - color_params.height = depth_params.height = config.GetHeight(); - color_params.is_tiled = depth_params.is_tiled = true; - - // Set the internal resolution, assume the same scaling factor for top and bottom screens - float resolution_scale_factor = Settings::values.resolution_factor; - if (resolution_scale_factor == 0.0f) { - // Auto - scale resolution to the window size - resolution_scale_factor = VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio(); - } - // Scale the resolution by the specified factor - color_params.res_scale_width = resolution_scale_factor; - depth_params.res_scale_width = resolution_scale_factor; - color_params.res_scale_height = resolution_scale_factor; - depth_params.res_scale_height = resolution_scale_factor; - - color_params.addr = config.GetColorBufferPhysicalAddress(); - color_params.pixel_format = CachedSurface::PixelFormatFromColorFormat(config.color_format); - - depth_params.addr = config.GetDepthBufferPhysicalAddress(); - depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format); - - MathUtil::Rectangle color_rect; - CachedSurface* color_surface = - using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr; - - MathUtil::Rectangle depth_rect; - CachedSurface* depth_surface = - using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr; - - // Sanity check to make sure found surfaces aren't the same - if (using_depth_fb && using_color_fb && color_surface == depth_surface) { - LOG_CRITICAL( - Render_OpenGL, - "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!"); - using_depth_fb = false; - depth_surface = nullptr; - } - - MathUtil::Rectangle rect; - - if (color_surface != nullptr && depth_surface != nullptr && - (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) { - // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if - // they don't match - if (color_rect.left != 0 || color_rect.top != 0) { - color_surface = GetSurface(color_params, true, true); - } - - if (depth_rect.left != 0 || depth_rect.top != 0) { - depth_surface = GetSurface(depth_params, true, true); - } - - if (!color_surface->is_tiled) { - rect = MathUtil::Rectangle( - 0, 0, (int)(color_params.width * color_params.res_scale_width), - (int)(color_params.height * color_params.res_scale_height)); - } else { - rect = MathUtil::Rectangle( - 0, (int)(color_params.height * color_params.res_scale_height), - (int)(color_params.width * color_params.res_scale_width), 0); - } - } else if (color_surface != nullptr) { - rect = color_rect; - } else if (depth_surface != nullptr) { - rect = depth_rect; - } else { - rect = MathUtil::Rectangle(0, 0, 0, 0); - } - - return std::make_tuple(color_surface, depth_surface, rect); -} - -CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) { - auto surface_interval = - boost::icl::interval::right_open(config.GetStartAddress(), config.GetEndAddress()); - auto range = surface_cache.equal_range(surface_interval); - for (auto it = range.first; it != range.second; ++it) { - for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { - int bits_per_value = 0; - if (config.fill_24bit) { - bits_per_value = 24; - } else if (config.fill_32bit) { - bits_per_value = 32; - } else { - bits_per_value = 16; - } - - CachedSurface* surface = it2->get(); - - if (surface->addr == config.GetStartAddress() && - CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value && - (surface->width * surface->height * - CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == - (config.GetEndAddress() - config.GetStartAddress())) { - return surface; - } - } - } - - return nullptr; -} - -MICROPROFILE_DEFINE(OpenGL_SurfaceDownload, "OpenGL", "Surface Download", MP_RGB(128, 192, 64)); -void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) { - using PixelFormat = CachedSurface::PixelFormat; - using SurfaceType = CachedSurface::SurfaceType; - - if (!surface->dirty) { + if (!gl_buffer_dirty || type == SurfaceType::Fill) return; - } - MICROPROFILE_SCOPE(OpenGL_SurfaceDownload); - - u8* dst_buffer = Memory::GetPhysicalPointer(surface->addr); - if (dst_buffer == nullptr) { - return; - } + const FormatTuple& tuple = GetFormatTuple(pixel_format); OpenGLState cur_state = OpenGLState::GetCurState(); GLuint old_tex = cur_state.texture_units[0].texture_2d; - OGLTexture unscaled_tex; - GLuint texture_to_flush = surface->texture.handle; - // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush - if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) { + OGLTexture unscaled_tex; + if (res_scale != 1) { unscaled_tex.Create(); - AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, - surface->height); - BlitTextures( - surface->texture.handle, unscaled_tex.handle, - CachedSurface::GetFormatType(surface->pixel_format), - MathUtil::Rectangle(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()), - MathUtil::Rectangle(0, 0, surface->width, surface->height)); + AllocateSurfaceTexture(unscaled_tex.handle, tuple, width, height); + BlitTextures(texture.handle, GetScaledRect(), unscaled_tex.handle, GetRect(), type); - texture_to_flush = unscaled_tex.handle; + cur_state.texture_units[0].texture_2d = unscaled_tex.handle; + } + else { + cur_state.texture_units[0].texture_2d = texture.handle; } - - cur_state.texture_units[0].texture_2d = texture_to_flush; cur_state.Apply(); + glActiveTexture(GL_TEXTURE0); - - if (!surface->is_tiled) { - // TODO: Ensure this will always be a color format, not a depth or other format - ASSERT((size_t)surface->pixel_format < fb_format_tuples.size()); - const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format]; - - glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->pixel_stride); - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); - } else { - SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format); - if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) { - ASSERT((size_t)surface->pixel_format < fb_format_tuples.size()); - const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format]; - - u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8; - - std::vector temp_gl_buffer(surface->width * surface->height * bytes_per_pixel); - - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); - - // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion - // is necessary. - MortonCopyPixels(surface->pixel_format, surface->width, surface->height, - bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), - false); - } else { - // Depth/Stencil formats need special treatment since they aren't sampleable using - // LookupTexture and can't use RGBA format - size_t tuple_idx = (size_t)surface->pixel_format - 14; - ASSERT(tuple_idx < depth_format_tuples.size()); - const FormatTuple& tuple = depth_format_tuples[tuple_idx]; - - u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8; - - // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type - bool use_4bpp = (surface->pixel_format == PixelFormat::D24); - - u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; - - std::vector temp_gl_buffer(surface->width * surface->height * gl_bytes_per_pixel); - - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); - - u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data(); - - MortonCopyPixels(surface->pixel_format, surface->width, surface->height, - bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, - false); - } - } - - surface->dirty = false; + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[0]); cur_state.texture_units[0].texture_2d = old_tex; cur_state.Apply(); + + gl_buffer_dirty = false; } -void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, - bool invalidate) { - if (size == 0) { +enum MatchFlags { + Invalid = 1, // Flag that can be applied to other match types, invalid matches require validation before they can be used + Exact = 1 << 1, // Surfaces perfectly match + SubRect = 1 << 2, // Surface encompasses params + Copy = 1 << 3, // Surface we can copy from + Expand = 1 << 4, // Surface that can expand params + TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters +}; + +constexpr MatchFlags operator | (MatchFlags lhs, MatchFlags rhs) { + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + +/// Get the best surface match (and its match type) for the given flags +template +Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, ScaleMatch match_scale_type) { + Surface match_surface = nullptr; + bool match_valid = false; + u32 match_scale = 0; + u32 match_size = 0; + + for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { + for (auto& surface : pair.second) { + const bool res_scale_matched = match_scale_type == ScaleMatch::Exact ? + (params.res_scale == surface->res_scale) : + (params.res_scale <= surface->res_scale); + const bool is_valid = surface->IsRegionValid(params.GetInterval()); + + if (!(find_flags & MatchFlags::Invalid) && !is_valid) + continue; + + auto IsMatch_Helper = [&](MatchFlags check_type, auto match_fn) { + if (!(find_flags & check_type) || !match_fn()) + return; + + if (!res_scale_matched && + match_scale_type != ScaleMatch::Ignore && + surface->type != SurfaceType::Fill) + return; + + // Found a match, update only if this is better than the previous one + auto UpdateMatch = [&] { + match_surface = surface; + match_valid = is_valid; + match_scale = surface->res_scale; + match_size = surface->size; + }; + + if (surface->res_scale > match_scale) { + UpdateMatch(); + return; + } else if (surface->res_scale < match_scale) { + return; + } + + if (is_valid && !match_valid) { + UpdateMatch(); + return; + } else if (is_valid != match_valid) { + return; + } + + if (surface->size > match_size) { + UpdateMatch(); + } + }; + IsMatch_Helper(MatchFlags::Exact, [&] { return surface->ExactMatch(params); }); + IsMatch_Helper(MatchFlags::SubRect, [&] { return surface->CanSubRect(params); }); + IsMatch_Helper(MatchFlags::Copy, [&] { return surface->CanCopy(params); }); + IsMatch_Helper(MatchFlags::Expand, [&] { return surface->CanExpand(params); }); + IsMatch_Helper(MatchFlags::TexCopy, [&] { return surface->CanTexCopy(params); }); + } + } + return match_surface; +} + +RasterizerCacheOpenGL::RasterizerCacheOpenGL() { + transfer_framebuffers[0].Create(); + transfer_framebuffers[1].Create(); +} + +RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { + FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); + transfer_framebuffers[0].Release(); + transfer_framebuffers[1].Release(); +} + +bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, + const MathUtil::Rectangle& src_rect, + const Surface& dst_surface, + const MathUtil::Rectangle& dst_rect) { + if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, + dst_surface->pixel_format)) + return false; + + return BlitTextures(src_surface->texture.handle, src_rect, + dst_surface->texture.handle, dst_rect, + src_surface->type); +} + +Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return nullptr; + } + + ASSERT(params.width == params.stride); // Use GetSurfaceSubRect instead + + // Check for an exact match in existing surfaces + Surface surface = FindMatch(surface_cache, params, match_res_scale); + + Surface expandable = FindMatch(surface_cache, params, match_res_scale); + u16 target_res_scale = surface == nullptr ? params.res_scale : surface->res_scale; + if (match_res_scale != ScaleMatch::Exact && + expandable != nullptr && + expandable->res_scale > params.res_scale) { + target_res_scale = expandable->res_scale; + } + + if (surface == nullptr || target_res_scale != surface->res_scale) { + SurfaceParams new_params = params; + new_params.res_scale = target_res_scale; + + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + + if (load_if_create) { + ValidateSurface(surface, params.addr, params.size); + } + + return surface; +} + +SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, + ScaleMatch match_res_scale, + bool load_if_create) { + Surface surface = nullptr; + MathUtil::Rectangle rect{}; + + if (params.addr == 0 || params.height * params.width == 0) { + return { surface, rect }; + } + + // Attempt to find encompassing surface + surface = FindMatch(surface_cache, params, match_res_scale); + + // Check if FindMatch failed because of res scaling + // If that's the case create a new surface with + // the dimensions of the lower res_scale surface + // to suggest it should not be used again + if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { + surface = FindMatch(surface_cache, params, ScaleMatch::Ignore); + if (surface != nullptr) { + ASSERT(surface->res_scale < params.res_scale); + SurfaceParams new_params = *surface; + new_params.res_scale = params.res_scale; + + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + } + + // Check for a surface we can expand before creating a new one + if (surface == nullptr) { + surface = FindMatch(surface_cache, params, match_res_scale); + if (surface != nullptr) { + SurfaceParams new_params = *surface; + new_params.addr = std::min(params.addr, surface->addr); + new_params.end = std::max(params.end, surface->end); + new_params.size = new_params.end - new_params.addr; + new_params.height = new_params.size / params.BytesInPixels(params.stride); + + Surface new_surface = CreateSurface(new_params); + RegisterSurface(new_surface); + + // TODO: Delete the expanded surface, this can't be done safely yet + // because it may still be in use + BlitSurfaces(surface, surface->GetScaledRect(), new_surface, new_surface->GetScaledSubRect(*surface)); + new_surface->invalid_regions -= surface->GetInterval(); + new_surface->invalid_regions += surface->invalid_regions; + surface = new_surface; + } + } + + // No subrect found - create and return a new surface + if (surface == nullptr) { + SurfaceParams new_params = params; + new_params.width = params.stride; // Can't have gaps in a surface + new_params.UpdateParams(); + + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + + if (load_if_create) { + ValidateSurface(surface, params.addr, params.size); + } + + rect = surface->GetScaledSubRect(params); + // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory. + if (surface->is_tiled) + std::swap(rect.top, rect.bottom); + + return { surface, rect }; +} + +Surface RasterizerCacheOpenGL::GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config) { + Pica::Texture::TextureInfo info = Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); + + SurfaceParams params; + params.addr = info.physical_address; + params.width = info.width; + params.height = info.height; + params.is_tiled = true; + params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(info.format); + params.UpdateParams(); + return GetSurface(params, ScaleMatch::Ignore, true); +} + +constexpr u16 GetResolutionScaleFactor() { + return !Settings::values.resolution_factor ? + VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() : + Settings::values.resolution_factor; +} + +SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( + bool using_color_fb, bool using_depth_fb, + const MathUtil::Rectangle& viewport_rect) { + const auto& regs = Pica::g_state.regs; + const auto& config = regs.framebuffer.framebuffer; + + // update resolution_scale_factor and reset cache if changed + static u16 resolution_scale_factor = GetResolutionScaleFactor(); + if (resolution_scale_factor != GetResolutionScaleFactor()) { + resolution_scale_factor = GetResolutionScaleFactor(); + FlushAll(); + InvalidateRegion(0, 0xffffffff, nullptr); + } + + MathUtil::Rectangle viewport_clamped{ + static_cast(MathUtil::Clamp(viewport_rect.left, 0, static_cast(config.GetWidth()))), + static_cast(MathUtil::Clamp(viewport_rect.top, 0, static_cast(config.GetHeight()))), + static_cast(MathUtil::Clamp(viewport_rect.right, 0, static_cast(config.GetWidth()))), + static_cast(MathUtil::Clamp(viewport_rect.bottom, 0, static_cast(config.GetHeight()))) + }; + + // get color and depth surfaces + SurfaceParams color_params; + color_params.is_tiled = true; + color_params.res_scale = resolution_scale_factor; + color_params.width = config.GetWidth(); + color_params.height = config.GetHeight(); + SurfaceParams depth_params = color_params; + + color_params.addr = config.GetColorBufferPhysicalAddress(); + color_params.pixel_format = SurfaceParams::PixelFormatFromColorFormat(config.color_format); + color_params.UpdateParams(); + + depth_params.addr = config.GetDepthBufferPhysicalAddress(); + depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); + depth_params.UpdateParams(); + + auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); + auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); + + // Make sur that framebuffers don't overlap if both color and depth are being used + if (using_color_fb && using_depth_fb && + boost::icl::length(color_vp_interval & depth_vp_interval)) { + LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!"); + using_depth_fb = false; + } + + MathUtil::Rectangle rect{}; + Surface color_surface = nullptr; + Surface depth_surface = nullptr; + if (using_color_fb) + std::tie(color_surface, rect) = GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + + if (using_depth_fb && color_surface != nullptr) { + // Can't specify separate color and depth viewport offsets in OpenGL, so make sure depth_surface will have the same offsets + depth_params.addr -= depth_params.BytesInPixels( + color_surface->PixelsInBytes(color_params.addr - color_surface->addr)); + depth_params.height = color_surface->height; + depth_params.UpdateParams(); + + depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); + } + else if (using_depth_fb) { + std::tie(depth_surface, rect) = GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + } + + if (color_surface != nullptr) { + ValidateSurface(color_surface, boost::icl::first(color_vp_interval), boost::icl::length(color_vp_interval)); + } + if (depth_surface != nullptr) { + ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), boost::icl::length(depth_vp_interval)); + } + + return { color_surface, depth_surface, rect }; +} + +Surface RasterizerCacheOpenGL::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) { + Surface new_surface = std::make_shared(); + + new_surface->addr = config.GetStartAddress(); + new_surface->end = config.GetEndAddress(); + new_surface->size = new_surface->end - new_surface->addr; + new_surface->type = SurfaceType::Fill; + std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); + if (config.fill_32bit) + new_surface->fill_size = 4; + else if (config.fill_24bit) + new_surface->fill_size = 3; + else + new_surface->fill_size = 2; + + RegisterSurface(new_surface); + return new_surface; +} + +SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { + MathUtil::Rectangle rect{}; + + Surface match_surface = FindMatch(surface_cache, params, ScaleMatch::Ignore); + + if (match_surface != nullptr) { + ValidateSurface(match_surface, params.addr, params.size); + + SurfaceParams match_subrect = params; + match_subrect.width = match_surface->PixelsInBytes(params.width); + match_subrect.stride = match_surface->PixelsInBytes(params.stride); + + if (match_surface->is_tiled) { + match_subrect.width /= 8; + match_subrect.stride /= 8; + match_subrect.height *= 8; + } + + rect = match_surface->GetScaledSubRect(match_subrect); + if (match_surface->is_tiled) + std::swap(rect.top, rect.bottom); + } + + return { match_surface, rect }; +} + +void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { + if (size == 0) + return; + + bool upload_texture = false; + const auto validate_interval = (surface->type != SurfaceType::Texture) ? + SurfaceInterval::right_open(addr, addr + size) : + surface->GetInterval(); + + if (surface->type == SurfaceType::Fill) { + // Sanity check, fill surfaces will always be valid when used + ASSERT(surface->IsRegionValid(validate_interval)); return; } - // Gather up unique surfaces that touch the region - std::unordered_set> touching_surfaces; + for (;;) { + const auto it = surface->invalid_regions.find(validate_interval); + if (it == surface->invalid_regions.end()) + break; - auto surface_interval = boost::icl::interval::right_open(addr, addr + size); - auto cache_upper_bound = surface_cache.upper_bound(surface_interval); - for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) { - std::copy_if(it->second.begin(), it->second.end(), - std::inserter(touching_surfaces, touching_surfaces.end()), - [skip_surface](std::shared_ptr surface) { - return (surface.get() != skip_surface); - }); - } + const auto interval = *it & validate_interval; + const PAddr interval_start = boost::icl::first(interval); + const PAddr interval_end = boost::icl::last_next(interval); - // Flush and invalidate surfaces - for (auto surface : touching_surfaces) { - FlushSurface(surface.get()); - if (invalidate) { - Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1); - surface_cache.subtract( - std::make_pair(boost::icl::interval::right_open( - surface->addr, surface->addr + surface->size), - std::set>({surface}))); + // Look for a valid surface to blit + SurfaceParams params = *surface; + const u32 pixel_offset = params.PixelsInBytes(interval_start - params.addr); + if (!params.is_tiled) { + // Start of the row + params.addr += params.BytesInPixels(pixel_offset - (pixel_offset % params.stride)); + params.height = (params.PixelsInBytes(interval_end - params.addr - 1) / params.stride) + 1; } + else { + // Start of the tiled row + params.addr += params.BytesInPixels(pixel_offset - (pixel_offset % (params.stride * 8))); + params.height = ((params.PixelsInBytes(interval_end - params.addr - 1) / (params.stride * 8)) + 1) * 8; + } + params.UpdateParams(); + + Surface match_surface = FindMatch(surface_cache, params, ScaleMatch::Ignore); + + if (match_surface != nullptr) { + if (!match_surface->CanSubRect(params)) { + // Need to call CopySurface and possibly create a new one first, which GetSurface will do for us + if (params.GetInterval() == surface->GetInterval()) { + CopySurface(match_surface, surface); + surface->invalid_regions.clear(); + return; + } + Surface tmp_surface = GetSurface(params, ScaleMatch::Upscale, false); + if (tmp_surface != nullptr) { + CopySurface(match_surface, tmp_surface); + tmp_surface->invalid_regions.erase(params.GetInterval()); + match_surface = tmp_surface; + } + } + + ASSERT(match_surface->CanSubRect(params)); + const auto src_rect = match_surface->GetScaledSubRect(params); + const auto dest_rect = surface->GetScaledSubRect(params); + + BlitSurfaces(match_surface, src_rect, surface, dest_rect); + surface->gl_buffer_dirty = true; + + surface->invalid_regions.erase(params.GetInterval()); + continue; + } + + // Load data from 3DS memory + FlushRegion(interval_start, interval_end - interval_start); + surface->DownloadGLTexture(); + surface->LoadGLBuffer(interval_start, interval_end); + upload_texture = true; + + surface->invalid_regions.erase(interval); } + + if (upload_texture) + surface->UploadGLTexture(); +} + +void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { + if (size == 0) + return; + + const auto flush_interval = SurfaceInterval::right_open(addr, addr + size); + for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { + const auto interval = pair.first & flush_interval; + auto& surface = pair.second; + + if (flush_surface != nullptr && surface != flush_surface) + continue; + + // Sanity check, this surface is the last one that marked this region dirty + ASSERT(surface->IsRegionValid(interval)); + surface->DownloadGLTexture(); + surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); + } + + // Reset dirty regions + dirty_regions.erase(flush_interval); } void RasterizerCacheOpenGL::FlushAll() { - for (auto& surfaces : surface_cache) { - for (auto& surface : surfaces.second) { - FlushSurface(surface.get()); + FlushRegion(0, 0xFFFFFFFF); +} + +void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) { + if (size == 0) + return; + + SurfaceSet remove_surfaces; + + const auto invalid_interval = SurfaceInterval::right_open(addr, addr + size); + + if (region_owner != nullptr) { + ASSERT(region_owner->type != SurfaceType::Texture); + ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); + ASSERT(region_owner->width == region_owner->stride); // Surfaces can't have a gap + region_owner->gl_buffer_dirty = true; + region_owner->invalid_regions.erase(invalid_interval); + } + + for (auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { + for (auto& cached_surface : pair.second) { + if (cached_surface == region_owner) + continue; + + // If cpu is invalidating this region we want to remove it + // to (likely) mark the memory pages as uncached + // but before that we have to flush its region that is still valid + if (region_owner == nullptr) { + // If that surface has modified data outside of the invalidated range + // have to flush it first + const auto flush_intervals = SurfaceRegions(cached_surface->GetInterval()) - invalid_interval; + for (const auto& interval : flush_intervals) { + FlushRegion(boost::icl::first(interval), boost::icl::length(interval), cached_surface); + } + remove_surfaces.emplace(cached_surface); + continue; + } + + const auto interval = cached_surface->GetInterval() & invalid_interval; + + cached_surface->invalid_regions.insert(interval); + + // Remove only "empty" fill surfaces to avoid destroying and recreating OGL textures + if (cached_surface->type == SurfaceType::Fill && + !cached_surface->IsRegionPartiallyValid(cached_surface->GetInterval())) + remove_surfaces.emplace(cached_surface); } } + + if (region_owner != nullptr) + dirty_regions.set({ invalid_interval, region_owner }); + else + dirty_regions.erase(invalid_interval); + + for (auto& remove_surface : remove_surfaces) + UnregisterSurface(remove_surface); +} + +Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { + Surface surface = std::make_shared(); + static_cast(*surface) = params; + + surface->texture.Create(); + + // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type + surface->gl_bytes_per_pixel = + (surface->pixel_format == PixelFormat::D24 || surface->type == SurfaceType::Texture) ? + 4 : + surface->BytesPerPixel(); + + surface->gl_buffer_offset = (surface->pixel_format == PixelFormat::D24) ? 1 : 0; + + surface->gl_buffer_dirty = false; + surface->invalid_regions.insert(surface->GetInterval()); + AllocateSurfaceTexture(surface->texture.handle, + GetFormatTuple(surface->pixel_format), + surface->GetScaledWidth(), + surface->GetScaledHeight()); + + return surface; +} + +void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { + surface_cache.add({ surface->GetInterval(), SurfaceSet{ surface } }); + UpdatePagesCachedCount(surface->addr, surface->size, 1); +} + +void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { + UpdatePagesCachedCount(surface->addr, surface->size, -1); + surface_cache.subtract({ surface->GetInterval(), SurfaceSet{ surface } }); +} + +void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) { + const u32 num_pages = ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1; + const u32 page_start = addr >> Memory::PAGE_BITS; + const u32 page_end = page_start + num_pages; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to subtract after iterating + const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) + cached_pages.add({ pages_interval, delta }); + + for (auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const PAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; + const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const u32 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); + else if (delta < 0 && count == -delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + else + ASSERT(count >= 0); + } + + if (delta < 0) + cached_pages.add({ pages_interval, delta }); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index aea20c693..0b3cc5c09 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -12,6 +12,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-local-typedef" #endif +#include #include #ifdef __GNUC__ #pragma GCC diagnostic pop @@ -20,21 +21,36 @@ #include "common/assert.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/math_util.h" #include "core/hw/gpu.h" #include "video_core/regs_framebuffer.h" #include "video_core/regs_texturing.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -namespace MathUtil { -template -struct Rectangle; -} - struct CachedSurface; +using Surface = std::shared_ptr; +using SurfaceSet = std::set; -using SurfaceCache = boost::icl::interval_map>>; +using SurfaceRegions = boost::icl::interval_set; +using SurfaceMap = boost::icl::interval_map; +using SurfaceCache = boost::icl::interval_map; -struct CachedSurface { +using SurfaceInterval = SurfaceCache::interval_type; +static_assert(std::is_same() && + std::is_same(), "incorrect interval types"); + +using SurfaceRect_Tuple = std::tuple>; +using SurfaceSurfaceRect_Tuple = std::tuple>; + +using PageMap = boost::icl::interval_map; + +enum class ScaleMatch { + Exact, // only accept same res scale + Upscale, // only allow higher scale than params + Ignore // accept every scaled res +}; + +struct SurfaceParams { enum class PixelFormat { // First 5 formats are shared between textures and color buffers RGBA8 = 0, @@ -68,11 +84,12 @@ struct CachedSurface { Texture = 1, Depth = 2, DepthStencil = 3, - Invalid = 4, + Fill = 4, + Invalid = 5 }; - static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { - static const std::array bpp_table = { + static unsigned int GetFormatBpp(SurfaceParams::PixelFormat format) { + static constexpr std::array bpp_table = { 32, // RGBA8 24, // RGB8 16, // RGB5A1 @@ -93,8 +110,8 @@ struct CachedSurface { 32, // D24S8 }; - ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table)); - return bpp_table[(unsigned int)format]; + ASSERT(static_cast(format) < bpp_table.size()); + return bpp_table[static_cast(format)]; } static PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) { @@ -162,31 +179,108 @@ struct CachedSurface { return SurfaceType::Invalid; } + /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" and "pixel_format" + void UpdateParams() { + size = width * height * GetFormatBpp(pixel_format) / 8; + + if (stride == 0) + stride = width; + else + size += (stride - width) * (height - 1) * GetFormatBpp(pixel_format) / 8; + + end = addr + size; + type = GetFormatType(pixel_format); + } + + SurfaceInterval GetInterval() const { + return SurfaceInterval::right_open(addr, end); + } + + SurfaceInterval GetSubRectInterval(MathUtil::Rectangle unscaled_rect) const; + u32 GetScaledWidth() const { - return (u32)(width * res_scale_width); + return width * res_scale; } u32 GetScaledHeight() const { - return (u32)(height * res_scale_height); + return height * res_scale; } - PAddr addr; - u32 size; + MathUtil::Rectangle GetRect() const { + return { 0, 0, width, height }; + } - PAddr min_valid; - PAddr max_valid; + MathUtil::Rectangle GetScaledRect() const { + return { 0, 0, GetScaledWidth(), GetScaledHeight() }; + } + + u32 PixelsInBytes(u32 size) const { + return size * 8 / GetFormatBpp(pixel_format); + } + + u32 BytesInPixels(u32 pixels) const { + return pixels * GetFormatBpp(pixel_format) / 8; + } + + u32 BytesPerPixel() const { + return BytesInPixels(1); + } + + bool ExactMatch(const SurfaceParams& other_surface) const; + bool CanSubRect(const SurfaceParams& sub_surface) const; + bool CanExpand(const SurfaceParams& expanded_surface) const; + bool CanTexCopy(const SurfaceParams& texcopy_params) const; + + MathUtil::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; + MathUtil::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; + + PAddr addr = 0; + PAddr end = 0; + u32 size = 0; + + u32 width = 0; + u32 height = 0; + u32 stride = 0; + u16 res_scale = 1; + + bool is_tiled = false; + PixelFormat pixel_format = PixelFormat::Invalid; + SurfaceType type = SurfaceType::Invalid; +}; + +struct CachedSurface : SurfaceParams { + bool CanCopy(const SurfaceParams& dest_surface) const; + + bool IsRegionValid(const SurfaceInterval& interval) const { + return (invalid_regions.find(interval) == invalid_regions.end()); + } + + bool IsRegionPartiallyValid(const SurfaceInterval& interval) const { + const auto it = invalid_regions.find(interval); + if (it == invalid_regions.end()) + return true; + return ((boost::icl::first(*it) > addr) || (boost::icl::last_next(*it) < end)); + } + + SurfaceRegions invalid_regions; + + u32 fill_size = 0; /// Number of bytes to read from fill_data + std::array fill_data; OGLTexture texture; - u32 width; - u32 height; - /// Stride between lines, in pixels. Only valid for images in linear format. - u32 pixel_stride = 0; - float res_scale_width = 1.f; - float res_scale_height = 1.f; - bool is_tiled; - PixelFormat pixel_format; - bool dirty; + u32 gl_bytes_per_pixel; + int gl_buffer_offset; + std::vector gl_buffer; + bool gl_buffer_dirty; + + // Read/Write data in 3DS memory to/from gl_buffer + void LoadGLBuffer(PAddr load_start, PAddr load_end); + void FlushGLBuffer(PAddr flush_start, PAddr flush_end); + + // Upload/Download data in gl_buffer in/to this surface's texture + void UploadGLTexture(); + void DownloadGLTexture(); }; class RasterizerCacheOpenGL : NonCopyable { @@ -194,46 +288,57 @@ public: RasterizerCacheOpenGL(); ~RasterizerCacheOpenGL(); - /// Blits one texture to another - void BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, - const MathUtil::Rectangle& src_rect, - const MathUtil::Rectangle& dst_rect); + /// Blit one surface's texture to another + bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle& src_rect, + const Surface& dst_surface, const MathUtil::Rectangle& dst_rect); - /// Attempt to blit one surface's texture to another - bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle& src_rect, - CachedSurface* dst_surface, const MathUtil::Rectangle& dst_rect); - - /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) - CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, - bool load_if_create); + /// Load a texture from 3DS memory to OpenGL and cache it (if not already cached) + Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, bool load_if_create); /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from /// 3DS memory to OpenGL and caches it (if not already cached) - CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, - bool load_if_create, MathUtil::Rectangle& out_rect); + SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create); - /// Gets a surface based on the texture configuration - CachedSurface* GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); + /// Get a surface based on the texture configuration + Surface GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); - /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer - /// configuration - std::tuple> GetFramebufferSurfaces( - const Pica::FramebufferRegs::FramebufferConfig& config); + /// Get the color and depth surfaces based on the framebuffer configuration + SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, + const MathUtil::Rectangle& viewport_rect); - /// Attempt to get a surface that exactly matches the fill region and format - CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config); + /// Get a surface that matches the fill config + Surface GetFillSurface(const GPU::Regs::MemoryFillConfig& config); - /// Write the surface back to memory - void FlushSurface(CachedSurface* surface); + /// Get a surface that matches a "texture copy" display transfer config + SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); - /// Write any cached resources overlapping the region back to memory (if dirty) and optionally - /// invalidate them in the cache - void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate); + /// Write any cached resources overlapping the region back to memory (if dirty) + void FlushRegion(PAddr addr, u32 size, Surface flush_surface = nullptr); + + /// Mark region as being invalidated by region_owner (nullptr if 3DS memory) + void InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner); /// Flush all cached resources tracked by this cache manager void FlushAll(); private: + /// Update surface's texture for given region when necessary + void ValidateSurface(const Surface& surface, PAddr addr, u32 size); + + /// Create a new surface + Surface CreateSurface(const SurfaceParams& params); + + /// Register surface into the cache + void RegisterSurface(const Surface& surface); + + /// Remove surface from the cache + void UnregisterSurface(const Surface& surface); + + /// Increase/decrease the number of surface in pages touching the specified region + void UpdatePagesCachedCount(PAddr addr, u32 size, int delta); + SurfaceCache surface_cache; - OGLFramebuffer transfer_framebuffers[2]; + SurfaceMap dirty_regions; + PageMap cached_pages; }; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 9fe183944..62f449a35 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -41,7 +41,7 @@ struct LightSrc { }; layout (std140) uniform shader_data { - vec2 framebuffer_scale; + int framebuffer_scale; int alphatest_ref; float depth_scale; float depth_offset; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 5770ae08f..b89bc2303 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -69,6 +69,12 @@ OpenGLState::OpenGLState() { draw.uniform_buffer = 0; draw.shader_program = 0; + scissor.enabled = false; + scissor.x = 0; + scissor.y = 0; + scissor.width = 0; + scissor.height = 0; + clip_distance = {}; } @@ -263,6 +269,22 @@ void OpenGLState::Apply() const { glUseProgram(draw.shader_program); } + // Scissor test + if (scissor.enabled != cur_state.scissor.enabled) { + if (scissor.enabled) { + glEnable(GL_SCISSOR_TEST); + } else { + glDisable(GL_SCISSOR_TEST); + } + } + + if (scissor.x != cur_state.scissor.x || + scissor.y != cur_state.scissor.y || + scissor.width != cur_state.scissor.width || + scissor.height != cur_state.scissor.height) { + glScissor(scissor.x, scissor.y, scissor.width, scissor.height); + } + // Clip distance for (size_t i = 0; i < clip_distance.size(); ++i) { if (clip_distance[i] != cur_state.clip_distance[i]) { diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 437fe34c4..a6bd9476e 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -124,6 +124,14 @@ public: GLuint shader_program; // GL_CURRENT_PROGRAM } draw; + struct { + bool enabled; // GL_SCISSOR_TEST + GLint x; + GLint y; + GLsizei width; + GLsizei height; + } scissor; + std::array clip_distance; // GL_CLIP_DISTANCE OpenGLState(); diff --git a/src/video_core/swrasterizer/swrasterizer.h b/src/video_core/swrasterizer/swrasterizer.h index 6d42d7409..6c524f013 100644 --- a/src/video_core/swrasterizer/swrasterizer.h +++ b/src/video_core/swrasterizer/swrasterizer.h @@ -22,6 +22,7 @@ class SWRasterizer : public RasterizerInterface { void NotifyPicaRegisterChanged(u32 id) override {} void FlushAll() override {} void FlushRegion(PAddr addr, u32 size) override {} + void InvalidateRegion(PAddr addr, u32 size) override {} void FlushAndInvalidateRegion(PAddr addr, u32 size) override {} }; }