Merge branch 'texcache-rebase' of https://github.com/Phanto-m/citra

# Conflicts:
#	src/citra_qt/configuration/configure_graphics.cpp
#	src/core/memory.cpp
#	src/tests/core/arm/arm_test_common.cpp
This commit is contained in:
citra 2017-10-19 19:01:50 +01:00
commit 5cd9f1ee80
21 changed files with 1499 additions and 1055 deletions

View File

@ -88,7 +88,7 @@ void Config::ReadValues() {
Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true); Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true);
Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true); Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true);
Settings::values.resolution_factor = Settings::values.resolution_factor =
(float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); static_cast<u16>(sdl2_config->GetInteger("Renderer", "resolution_factor", 1));
Settings::values.use_vsync = sdl2_config->GetBoolean("Renderer", "use_vsync", false); Settings::values.use_vsync = sdl2_config->GetBoolean("Renderer", "use_vsync", false);
Settings::values.toggle_framelimit = Settings::values.toggle_framelimit =
sdl2_config->GetBoolean("Renderer", "toggle_framelimit", true); sdl2_config->GetBoolean("Renderer", "toggle_framelimit", true);

View File

@ -73,7 +73,8 @@ void Config::ReadValues() {
qt_config->beginGroup("Renderer"); qt_config->beginGroup("Renderer");
Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", true).toBool(); Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", true).toBool();
Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool(); Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool();
Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat(); Settings::values.resolution_factor =
static_cast<u16>(qt_config->value("resolution_factor", 1).toInt());
Settings::values.use_vsync = qt_config->value("use_vsync", false).toBool(); Settings::values.use_vsync = qt_config->value("use_vsync", false).toBool();
Settings::values.toggle_framelimit = qt_config->value("toggle_framelimit", true).toBool(); Settings::values.toggle_framelimit = qt_config->value("toggle_framelimit", true).toBool();

View File

@ -32,81 +32,11 @@ void ConfigureGraphics::showLayoutBackgroundDialog() {
} }
} }
enum class Resolution : int {
Auto,
Scale1x,
Scale2x,
Scale3x,
Scale4x,
Scale5x,
Scale6x,
Scale7x,
Scale8x,
Scale9x,
Scale10x,
};
float ToResolutionFactor(Resolution option) {
switch (option) {
case Resolution::Auto:
return 0.f;
case Resolution::Scale1x:
return 1.f;
case Resolution::Scale2x:
return 2.f;
case Resolution::Scale3x:
return 3.f;
case Resolution::Scale4x:
return 4.f;
case Resolution::Scale5x:
return 5.f;
case Resolution::Scale6x:
return 6.f;
case Resolution::Scale7x:
return 7.f;
case Resolution::Scale8x:
return 8.f;
case Resolution::Scale9x:
return 9.f;
case Resolution::Scale10x:
return 10.f;
}
return 0.f;
}
Resolution FromResolutionFactor(float factor) {
if (factor == 0.f) {
return Resolution::Auto;
} else if (factor == 1.f) {
return Resolution::Scale1x;
} else if (factor == 2.f) {
return Resolution::Scale2x;
} else if (factor == 3.f) {
return Resolution::Scale3x;
} else if (factor == 4.f) {
return Resolution::Scale4x;
} else if (factor == 5.f) {
return Resolution::Scale5x;
} else if (factor == 6.f) {
return Resolution::Scale6x;
} else if (factor == 7.f) {
return Resolution::Scale7x;
} else if (factor == 8.f) {
return Resolution::Scale8x;
} else if (factor == 9.f) {
return Resolution::Scale9x;
} else if (factor == 10.f) {
return Resolution::Scale10x;
}
return Resolution::Auto;
}
void ConfigureGraphics::setConfiguration() { void ConfigureGraphics::setConfiguration() {
ui->toggle_hw_renderer->setChecked(Settings::values.use_hw_renderer); ui->toggle_hw_renderer->setChecked(Settings::values.use_hw_renderer);
ui->resolution_factor_combobox->setEnabled(Settings::values.use_hw_renderer); ui->resolution_factor_combobox->setEnabled(Settings::values.use_hw_renderer);
ui->toggle_shader_jit->setChecked(Settings::values.use_shader_jit); ui->toggle_shader_jit->setChecked(Settings::values.use_shader_jit);
ui->resolution_factor_combobox->setCurrentIndex( ui->resolution_factor_combobox->setCurrentIndex(Settings::values.resolution_factor);
static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
ui->toggle_vsync->setChecked(Settings::values.use_vsync); ui->toggle_vsync->setChecked(Settings::values.use_vsync);
{ {
bg_color.setRgbF(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue); bg_color.setRgbF(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue);
@ -121,7 +51,7 @@ void ConfigureGraphics::applyConfiguration() {
Settings::values.use_hw_renderer = ui->toggle_hw_renderer->isChecked(); Settings::values.use_hw_renderer = ui->toggle_hw_renderer->isChecked();
Settings::values.use_shader_jit = ui->toggle_shader_jit->isChecked(); Settings::values.use_shader_jit = ui->toggle_shader_jit->isChecked();
Settings::values.resolution_factor = Settings::values.resolution_factor =
ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); static_cast<u16>(ui->resolution_factor_combobox->currentIndex());
Settings::values.use_vsync = ui->toggle_vsync->isChecked(); Settings::values.use_vsync = ui->toggle_vsync->isChecked();
Settings::values.bg_red = bg_color.redF(); Settings::values.bg_red = bg_color.redF();
Settings::values.bg_green = bg_color.greenF(); Settings::values.bg_green = bg_color.greenF();

View File

@ -16,8 +16,8 @@ static const float TOP_SCREEN_ASPECT_RATIO =
static const float BOT_SCREEN_ASPECT_RATIO = static const float BOT_SCREEN_ASPECT_RATIO =
static_cast<float>(Core::kScreenBottomHeight) / Core::kScreenBottomWidth; static_cast<float>(Core::kScreenBottomHeight) / Core::kScreenBottomWidth;
float FramebufferLayout::GetScalingRatio() const { u16 FramebufferLayout::GetScalingRatio() const {
return static_cast<float>(top_screen.GetWidth()) / Core::kScreenTopWidth; return static_cast<u16>(((top_screen.GetWidth() - 1) / Core::kScreenTopWidth) + 1);
} }
// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio // Finds the largest size subrectangle contained in window area that is confined to the aspect ratio

View File

@ -21,7 +21,7 @@ struct FramebufferLayout {
* Returns the ration of pixel size of the top screen, compared to the native size of the 3DS * Returns the ration of pixel size of the top screen, compared to the native size of the 3DS
* screen. * screen.
*/ */
float GetScalingRatio() const; u16 GetScalingRatio() const;
}; };
/** /**

View File

@ -58,7 +58,6 @@ void VMManager::Reset() {
page_table.pointers.fill(nullptr); page_table.pointers.fill(nullptr);
page_table.attributes.fill(Memory::PageType::Unmapped); page_table.attributes.fill(Memory::PageType::Unmapped);
page_table.cached_res_count.fill(0);
UpdatePageTableForVMA(initial_vma); UpdatePageTableForVMA(initial_vma);
} }

View File

@ -476,10 +476,11 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
// TODO: Consider attempting rasterizer-accelerated surface blit if that usage is ever // TODO: Consider attempting rasterizer-accelerated surface blit if that usage is ever
// possible/likely // possible/likely
Memory::RasterizerFlushVirtualRegion(command.dma_request.source_address, Memory::RasterizerFlushVirtualRegion(command.dma_request.source_address,
command.dma_request.size, Memory::FlushMode::Flush); command.dma_request.size,
Memory::FlushMode::Flush);
Memory::RasterizerFlushVirtualRegion(command.dma_request.dest_address, Memory::RasterizerFlushVirtualRegion(command.dma_request.dest_address,
command.dma_request.size, command.dma_request.size,
Memory::FlushMode::FlushAndInvalidate); Memory::FlushMode::Invalidate);
// TODO(Subv): These memory accesses should not go through the application's memory mapping. // TODO(Subv): These memory accesses should not go through the application's memory mapping.
// They should go through the GSP module's memory mapping. // They should go through the GSP module's memory mapping.

View File

@ -96,20 +96,11 @@ static void MemoryFill(const Regs::MemoryFillConfig& config) {
u8* start = Memory::GetPhysicalPointer(start_addr); u8* start = Memory::GetPhysicalPointer(start_addr);
u8* end = Memory::GetPhysicalPointer(end_addr); u8* end = Memory::GetPhysicalPointer(end_addr);
// TODO: Consider always accelerating and returning vector of
// regions that the accelerated fill did not cover to
// reduce/eliminate the fill that the cpu has to do.
// This would also mean that the flush below is not needed.
// Fill should first flush all surfaces that touch but are
// not completely within the fill range.
// Then fill all completely covered surfaces, and return the
// regions that were between surfaces or within the touching
// ones for cpu to manually fill here.
if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config))
return; return;
Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), Memory::RasterizerInvalidateRegion(config.GetStartAddress(),
config.GetEndAddress() - config.GetStartAddress()); config.GetEndAddress() - config.GetStartAddress());
if (config.fill_24bit) { if (config.fill_24bit) {
// fill with 24-bit values // fill with 24-bit values
@ -199,7 +190,7 @@ static void DisplayTransfer(const Regs::DisplayTransferConfig& config) {
u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); Memory::RasterizerInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
for (u32 y = 0; y < output_height; ++y) { for (u32 y = 0; y < output_height; ++y) {
for (u32 x = 0; x < output_width; ++x) { for (u32 x = 0; x < output_width; ++x) {
@ -367,8 +358,12 @@ static void TextureCopy(const Regs::DisplayTransferConfig& config) {
size_t contiguous_output_size = size_t contiguous_output_size =
config.texture_copy.size / output_width * (output_width + output_gap); config.texture_copy.size / output_width * (output_width + output_gap);
Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), // Only need to flush output if it has a gap
static_cast<u32>(contiguous_output_size)); const auto FlushInvalidate_fn = (output_gap != 0) ?
Memory::RasterizerFlushAndInvalidateRegion :
Memory::RasterizerInvalidateRegion;
FlushInvalidate_fn(config.GetPhysicalOutputAddress(),
static_cast<u32>(contiguous_output_size));
u32 remaining_input = input_width; u32 remaining_input = input_width;
u32 remaining_output = output_width; u32 remaining_output = output_width;

View File

@ -42,7 +42,7 @@ static void MapPages(PageTable& page_table, u32 base, u32 size, u8* memory, Page
(base + size) * PAGE_SIZE); (base + size) * PAGE_SIZE);
RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE, RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE,
FlushMode::FlushAndInvalidate); FlushMode::Invalidate);
u32 end = base + size; u32 end = base + size;
while (base != end) { while (base != end) {
@ -50,7 +50,6 @@ static void MapPages(PageTable& page_table, u32 base, u32 size, u8* memory, Page
page_table.attributes[base] = type; page_table.attributes[base] = type;
page_table.pointers[base] = memory; page_table.pointers[base] = memory;
page_table.cached_res_count[base] = 0;
base += 1; base += 1;
if (memory != nullptr) if (memory != nullptr)
@ -200,7 +199,7 @@ void Write(const VAddr vaddr, const T data) {
ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr);
break; break;
case PageType::RasterizerCachedMemory: { case PageType::RasterizerCachedMemory: {
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate); RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T)); std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
break; break;
} }
@ -208,7 +207,7 @@ void Write(const VAddr vaddr, const T data) {
WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data); WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
break; break;
case PageType::RasterizerCachedSpecial: { case PageType::RasterizerCachedSpecial: {
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate); RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data); WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
break; break;
} }
@ -334,7 +333,7 @@ u8* GetPhysicalPointer(PAddr address) {
return target_pointer; return target_pointer;
} }
void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached) {
if (start == 0) { if (start == 0) {
return; return;
} }
@ -355,14 +354,10 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
} }
VAddr vaddr = *maybe_vaddr; VAddr vaddr = *maybe_vaddr;
u8& res_count = current_page_table->cached_res_count[vaddr >> PAGE_BITS]; PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
ASSERT_MSG(count_delta <= UINT8_MAX - res_count,
"Rasterizer resource cache counter overflow!");
ASSERT_MSG(count_delta >= -res_count, "Rasterizer resource cache counter underflow!");
// Switch page type to cached if now cached if (cached) {
if (res_count == 0) { // Switch page type to cached
PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
switch (page_type) { switch (page_type) {
case PageType::Unmapped: case PageType::Unmapped:
// It is not necessary for a process to have this region mapped into its address // It is not necessary for a process to have this region mapped into its address
@ -380,11 +375,8 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
} }
} }
res_count += count_delta; else {
// Switch page type to uncached
// Switch page type to uncached if now uncached
if (res_count == 0) {
PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
switch (page_type) { switch (page_type) {
case PageType::Unmapped: case PageType::Unmapped:
// It is not necessary for a process to have this region mapped into its address // It is not necessary for a process to have this region mapped into its address
@ -413,6 +405,12 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
} }
} }
void RasterizerInvalidateRegion(PAddr start, u32 size) {
if (VideoCore::g_renderer != nullptr) {
VideoCore::g_renderer->Rasterizer()->InvalidateRegion(start, size);
}
}
void RasterizerFlushRegion(PAddr start, u32 size) { void RasterizerFlushRegion(PAddr start, u32 size) {
if (VideoCore::g_renderer != nullptr) { if (VideoCore::g_renderer != nullptr) {
VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size); VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size);
@ -450,6 +448,9 @@ void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) {
case FlushMode::Flush: case FlushMode::Flush:
rasterizer->FlushRegion(physical_start, overlap_size); rasterizer->FlushRegion(physical_start, overlap_size);
break; break;
case FlushMode::Invalidate:
rasterizer->InvalidateRegion(physical_start, overlap_size);
break;
case FlushMode::FlushAndInvalidate: case FlushMode::FlushAndInvalidate:
rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size); rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size);
break; break;
@ -588,7 +589,7 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
} }
case PageType::RasterizerCachedMemory: { case PageType::RasterizerCachedMemory: {
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
FlushMode::FlushAndInvalidate); FlushMode::Invalidate);
std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount); std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount);
break; break;
} }
@ -596,7 +597,7 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr); MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
DEBUG_ASSERT(handler); DEBUG_ASSERT(handler);
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
FlushMode::FlushAndInvalidate); FlushMode::Invalidate);
handler->WriteBlock(current_vaddr, src_buffer, copy_amount); handler->WriteBlock(current_vaddr, src_buffer, copy_amount);
break; break;
} }
@ -647,14 +648,14 @@ void ZeroBlock(const VAddr dest_addr, const size_t size) {
} }
case PageType::RasterizerCachedMemory: { case PageType::RasterizerCachedMemory: {
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
FlushMode::FlushAndInvalidate); FlushMode::Invalidate);
std::memset(GetPointerFromVMA(current_vaddr), 0, copy_amount); std::memset(GetPointerFromVMA(current_vaddr), 0, copy_amount);
break; break;
} }
case PageType::RasterizerCachedSpecial: { case PageType::RasterizerCachedSpecial: {
DEBUG_ASSERT(GetMMIOHandler(current_vaddr)); DEBUG_ASSERT(GetMMIOHandler(current_vaddr));
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
FlushMode::FlushAndInvalidate); FlushMode::Invalidate);
GetMMIOHandler(current_vaddr)->WriteBlock(current_vaddr, zeros.data(), copy_amount); GetMMIOHandler(current_vaddr)->WriteBlock(current_vaddr, zeros.data(), copy_amount);
break; break;
} }

View File

@ -72,12 +72,6 @@ struct PageTable {
* the corresponding entry in `pointers` MUST be set to null. * the corresponding entry in `pointers` MUST be set to null.
*/ */
std::array<PageType, PAGE_TABLE_NUM_ENTRIES> attributes; std::array<PageType, PAGE_TABLE_NUM_ENTRIES> attributes;
/**
* Indicates the number of externally cached resources touching a page that should be
* flushed before the memory is accessed
*/
std::array<u8, PAGE_TABLE_NUM_ENTRIES> cached_res_count;
}; };
/// Physical memory regions as seen from the ARM11 /// Physical memory regions as seen from the ARM11
@ -244,16 +238,20 @@ boost::optional<VAddr> PhysicalToVirtualAddress(PAddr addr);
u8* GetPhysicalPointer(PAddr address); u8* GetPhysicalPointer(PAddr address);
/** /**
* Adds the supplied value to the rasterizer resource cache counter of each * Mark each page touching the region as cached.
* page touching the region.
*/ */
void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta); void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached);
/** /**
* Flushes any externally cached rasterizer resources touching the given region. * Flushes any externally cached rasterizer resources touching the given region.
*/ */
void RasterizerFlushRegion(PAddr start, u32 size); void RasterizerFlushRegion(PAddr start, u32 size);
/**
* Invalidates any externally cached rasterizer resources touching the given region.
*/
void RasterizerInvalidateRegion(PAddr start, u32 size);
/** /**
* Flushes and invalidates any externally cached rasterizer resources touching the given region. * Flushes and invalidates any externally cached rasterizer resources touching the given region.
*/ */
@ -262,6 +260,8 @@ void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size);
enum class FlushMode { enum class FlushMode {
/// Write back modified surfaces to RAM /// Write back modified surfaces to RAM
Flush, Flush,
/// Remove region from the cache
Invalidate,
/// Write back modified surfaces to RAM, and also remove them from the cache /// Write back modified surfaces to RAM, and also remove them from the cache
FlushAndInvalidate, FlushAndInvalidate,
}; };

View File

@ -95,7 +95,7 @@ struct Values {
// Renderer // Renderer
bool use_hw_renderer; bool use_hw_renderer;
bool use_shader_jit; bool use_shader_jit;
float resolution_factor; u16 resolution_factor;
bool use_vsync; bool use_vsync;
bool toggle_framelimit; bool toggle_framelimit;

View File

@ -20,7 +20,6 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
page_table->pointers.fill(nullptr); page_table->pointers.fill(nullptr);
page_table->attributes.fill(Memory::PageType::Unmapped); page_table->attributes.fill(Memory::PageType::Unmapped);
page_table->cached_res_count.fill(0);
Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory); Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory); Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);

View File

@ -38,6 +38,9 @@ public:
/// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
virtual void FlushRegion(PAddr addr, u32 size) = 0; virtual void FlushRegion(PAddr addr, u32 size) = 0;
/// Notify rasterizer that any caches of the specified region should be invalidated
virtual void InvalidateRegion(PAddr addr, u32 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
/// and invalidated /// and invalidated
virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0;

View File

@ -8,7 +8,6 @@
#include <utility> #include <utility>
#include <glad/glad.h> #include <glad/glad.h>
#include "common/assert.h" #include "common/assert.h"
#include "common/color.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/math_util.h" #include "common/math_util.h"
#include "common/microprofile.h" #include "common/microprofile.h"
@ -23,6 +22,9 @@
#include "video_core/renderer_opengl/pica_to_gl.h" #include "video_core/renderer_opengl/pica_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/renderer_opengl/renderer_opengl.h"
using PixelFormat = SurfaceParams::PixelFormat;
using SurfaceType = SurfaceParams::SurfaceType;
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
@ -225,21 +227,59 @@ void RasterizerOpenGL::DrawTriangles() {
MICROPROFILE_SCOPE(OpenGL_Drawing); MICROPROFILE_SCOPE(OpenGL_Drawing);
const auto& regs = Pica::g_state.regs; const auto& regs = Pica::g_state.regs;
// Sync and bind the framebuffer surfaces const bool has_stencil = regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8;
CachedSurface* color_surface;
CachedSurface* depth_surface;
MathUtil::Rectangle<int> rect;
std::tie(color_surface, depth_surface, rect) =
res_cache.GetFramebufferSurfaces(regs.framebuffer.framebuffer);
const bool write_color_fb = state.color_mask.red_enabled == GL_TRUE ||
state.color_mask.green_enabled == GL_TRUE ||
state.color_mask.blue_enabled == GL_TRUE ||
state.color_mask.alpha_enabled == GL_TRUE;
const bool write_depth_fb = state.depth.write_mask == GL_TRUE ||
(has_stencil && state.stencil.write_mask != 0);
const bool using_color_fb = regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 &&
write_color_fb;
const bool using_depth_fb = regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 &&
(write_depth_fb || state.depth.test_enabled || (has_stencil && state.stencil.test_enabled));
MathUtil::Rectangle<s32> viewport_rect_unscaled{
// These registers hold half-width and half-height, so must be multiplied by 2
regs.rasterizer.viewport_corner.x, // left
regs.rasterizer.viewport_corner.y + // top
static_cast<s32>(Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * 2),
regs.rasterizer.viewport_corner.x + // right
static_cast<s32>(Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * 2),
regs.rasterizer.viewport_corner.y // bottom
};
Surface color_surface;
Surface depth_surface;
MathUtil::Rectangle<u32> surfaces_rect;
std::tie(color_surface, depth_surface, surfaces_rect) =
res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled);
const u16 res_scale = color_surface != nullptr ? color_surface->res_scale :
(depth_surface == nullptr ? 1u : depth_surface->res_scale);
MathUtil::Rectangle<u32> draw_rect{
MathUtil::Clamp(surfaces_rect.left + viewport_rect_unscaled.left * res_scale, // left
surfaces_rect.left, surfaces_rect.right),
MathUtil::Clamp(surfaces_rect.bottom + viewport_rect_unscaled.GetHeight() * res_scale, // top
surfaces_rect.bottom, surfaces_rect.top),
MathUtil::Clamp(surfaces_rect.left + viewport_rect_unscaled.GetWidth() * res_scale, // right
surfaces_rect.left, surfaces_rect.right),
MathUtil::Clamp(surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale, // bottom
surfaces_rect.bottom, surfaces_rect.top)
};
// Bind the framebuffer surfaces
state.draw.draw_framebuffer = framebuffer.handle; state.draw.draw_framebuffer = framebuffer.handle;
state.Apply(); state.Apply();
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
color_surface != nullptr ? color_surface->texture.handle : 0, 0); color_surface != nullptr ? color_surface->texture.handle : 0, 0);
if (depth_surface != nullptr) { if (depth_surface != nullptr) {
if (regs.framebuffer.framebuffer.depth_format == if (has_stencil) {
Pica::FramebufferRegs::DepthFormat::D24S8) {
// attach both depth and stencil // attach both depth and stencil
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
depth_surface->texture.handle, 0); depth_surface->texture.handle, 0);
@ -257,38 +297,29 @@ void RasterizerOpenGL::DrawTriangles() {
} }
// Sync the viewport // Sync the viewport
// These registers hold half-width and half-height, so must be multiplied by 2
GLsizei viewport_width =
(GLsizei)Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * 2;
GLsizei viewport_height =
(GLsizei)Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * 2;
glViewport( glViewport(
(GLint)(rect.left + regs.rasterizer.viewport_corner.x * color_surface->res_scale_width), static_cast<GLint>(surfaces_rect.left + viewport_rect_unscaled.left * res_scale),
(GLint)(rect.bottom + regs.rasterizer.viewport_corner.y * color_surface->res_scale_height), static_cast<GLint>(surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale),
(GLsizei)(viewport_width * color_surface->res_scale_width), static_cast<GLsizei>(viewport_rect_unscaled.GetWidth() * res_scale),
(GLsizei)(viewport_height * color_surface->res_scale_height)); static_cast<GLsizei>(viewport_rect_unscaled.GetHeight() * res_scale));
if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width || if (uniform_block_data.data.framebuffer_scale != res_scale) {
uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) { uniform_block_data.data.framebuffer_scale = res_scale;
uniform_block_data.data.framebuffer_scale[0] = color_surface->res_scale_width;
uniform_block_data.data.framebuffer_scale[1] = color_surface->res_scale_height;
uniform_block_data.dirty = true; uniform_block_data.dirty = true;
} }
// Scissor checks are window-, not viewport-relative, which means that if the cached texture // Scissor checks are window-, not viewport-relative, which means that if the cached texture
// sub-rect changes, the scissor bounds also need to be updated. // sub-rect changes, the scissor bounds also need to be updated.
GLint scissor_x1 = static_cast<GLint>( GLint scissor_x1 = static_cast<GLint>(
rect.left + regs.rasterizer.scissor_test.x1 * color_surface->res_scale_width); surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale);
GLint scissor_y1 = static_cast<GLint>( GLint scissor_y1 = static_cast<GLint>(
rect.bottom + regs.rasterizer.scissor_test.y1 * color_surface->res_scale_height); surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale);
// x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when
// scaling or doing multisampling. // scaling or doing multisampling.
GLint scissor_x2 = static_cast<GLint>( GLint scissor_x2 = static_cast<GLint>(
rect.left + (regs.rasterizer.scissor_test.x2 + 1) * color_surface->res_scale_width); surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale);
GLint scissor_y2 = static_cast<GLint>( GLint scissor_y2 = static_cast<GLint>(
rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * color_surface->res_scale_height); surfaces_rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * res_scale);
if (uniform_block_data.data.scissor_x1 != scissor_x1 || if (uniform_block_data.data.scissor_x1 != scissor_x1 ||
uniform_block_data.data.scissor_x2 != scissor_x2 || uniform_block_data.data.scissor_x2 != scissor_x2 ||
@ -309,7 +340,7 @@ void RasterizerOpenGL::DrawTriangles() {
if (texture.enabled) { if (texture.enabled) {
texture_samplers[texture_index].SyncWithConfig(texture.config); texture_samplers[texture_index].SyncWithConfig(texture.config);
CachedSurface* surface = res_cache.GetTextureSurface(texture); Surface surface = res_cache.GetTextureSurface(texture);
if (surface != nullptr) { if (surface != nullptr) {
state.texture_units[texture_index].texture_2d = surface->texture.handle; state.texture_units[texture_index].texture_2d = surface->texture.handle;
} else { } else {
@ -378,6 +409,15 @@ void RasterizerOpenGL::DrawTriangles() {
uniform_block_data.dirty = false; uniform_block_data.dirty = false;
} }
// Viewport can have negative offsets or larger
// dimensions than our framebuffer sub-rect.
// Enable scissor test to prevent drawing
// outside of the framebuffer region
state.scissor.enabled = true;
state.scissor.x = draw_rect.left;
state.scissor.y = draw_rect.bottom;
state.scissor.width = draw_rect.GetWidth();
state.scissor.height = draw_rect.GetHeight();
state.Apply(); state.Apply();
// Draw the vertex batch // Draw the vertex batch
@ -385,16 +425,8 @@ void RasterizerOpenGL::DrawTriangles() {
GL_STREAM_DRAW); GL_STREAM_DRAW);
glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size());
// Mark framebuffer surfaces as dirty // Disable scissor test
// TODO: Restrict invalidation area to the viewport state.scissor.enabled = false;
if (color_surface != nullptr) {
color_surface->dirty = true;
res_cache.FlushRegion(color_surface->addr, color_surface->size, color_surface, true);
}
if (depth_surface != nullptr) {
depth_surface->dirty = true;
res_cache.FlushRegion(depth_surface->addr, depth_surface->size, depth_surface, true);
}
vertex_batch.clear(); vertex_batch.clear();
@ -403,6 +435,25 @@ void RasterizerOpenGL::DrawTriangles() {
state.texture_units[texture_index].texture_2d = 0; state.texture_units[texture_index].texture_2d = 0;
} }
state.Apply(); state.Apply();
// Mark framebuffer surfaces as dirty
MathUtil::Rectangle<u32> draw_rect_unscaled{
draw_rect.left / res_scale, draw_rect.top / res_scale,
draw_rect.right / res_scale, draw_rect.bottom / res_scale
};
if (color_surface != nullptr && write_color_fb) {
auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled);
res_cache.InvalidateRegion(boost::icl::first(interval),
boost::icl::length(interval),
color_surface);
}
if (depth_surface != nullptr && write_depth_fb) {
auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled);
res_cache.InvalidateRegion(boost::icl::first(interval),
boost::icl::length(interval),
depth_surface);
}
} }
void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
@ -891,227 +942,122 @@ void RasterizerOpenGL::FlushAll() {
void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.FlushRegion(addr, size, nullptr, false); res_cache.FlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.InvalidateRegion(addr, size, nullptr);
} }
void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.FlushRegion(addr, size, nullptr, true); res_cache.FlushRegion(addr, size);
res_cache.InvalidateRegion(addr, size, nullptr);
} }
bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
MICROPROFILE_SCOPE(OpenGL_Blits); MICROPROFILE_SCOPE(OpenGL_Blits);
CachedSurface src_params; SurfaceParams src_params;
src_params.addr = config.GetPhysicalInputAddress(); src_params.addr = config.GetPhysicalInputAddress();
// It's important to use the correct source input width to properly skip over parts of the input src_params.width = config.output_width;
// image which will be cropped from the output but still affect the stride of the input image. src_params.stride = config.input_width;
src_params.width = config.input_width;
// Using the output's height is fine because we don't read or skip over the remaining part of
// the image, and it allows for smaller texture cache lookup rectangles.
src_params.height = config.output_height; src_params.height = config.output_height;
src_params.is_tiled = !config.input_linear; src_params.is_tiled = !config.input_linear;
src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format); src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.input_format);
src_params.UpdateParams();
CachedSurface dst_params; SurfaceParams dst_params;
dst_params.addr = config.GetPhysicalOutputAddress(); dst_params.addr = config.GetPhysicalOutputAddress();
dst_params.width = dst_params.width = config.scaling != config.NoScale ? config.output_width.Value() / 2 : config.output_width.Value();
config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value(); dst_params.height = config.scaling == config.ScaleXY ? config.output_height.Value() / 2 : config.output_height.Value();
dst_params.height =
config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value();
dst_params.is_tiled = config.input_linear != config.dont_swizzle; dst_params.is_tiled = config.input_linear != config.dont_swizzle;
dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format); dst_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.output_format);
dst_params.UpdateParams();
MathUtil::Rectangle<int> src_rect; MathUtil::Rectangle<u32> src_rect;
CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect); Surface src_surface;
std::tie(src_surface, src_rect) = res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true);
if (src_surface == nullptr) { if (src_surface == nullptr)
return false; return false;
}
// Adjust the source rectangle to take into account parts of the input lines being cropped dst_params.res_scale = src_surface->res_scale;
if (config.input_width > config.output_width) {
src_rect.right -= static_cast<int>((config.input_width - config.output_width) *
src_surface->res_scale_width);
}
// Require destination surface to have same resolution scale as source to preserve scaling MathUtil::Rectangle<u32> dst_rect;
dst_params.res_scale_width = src_surface->res_scale_width; Surface dst_surface;
dst_params.res_scale_height = src_surface->res_scale_height; std::tie(dst_surface, dst_rect) = res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, false);
if (dst_surface == nullptr)
MathUtil::Rectangle<int> dst_rect;
CachedSurface* dst_surface = res_cache.GetSurfaceRect(dst_params, true, false, dst_rect);
if (dst_surface == nullptr) {
return false; return false;
}
// Don't accelerate if the src and dst surfaces are the same if (config.flip_vertically)
if (src_surface == dst_surface) { std::swap(src_rect.top, src_rect.bottom);
if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect))
return false; return false;
}
if (config.flip_vertically) { res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface);
std::swap(dst_rect.top, dst_rect.bottom);
}
if (!res_cache.TryBlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) {
return false;
}
u32 dst_size = dst_params.width * dst_params.height *
CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8;
dst_surface->dirty = true;
res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true);
return true; return true;
} }
bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) {
// TODO(tfarley): Try to hardware accelerate this const u32 input_width = config.texture_copy.input_width * 16;
return false; const u32 input_gap = config.texture_copy.input_gap * 16;
const u32 output_width = config.texture_copy.output_width * 16;
const u32 output_gap = config.texture_copy.output_gap * 16;
if (config.texture_copy.size == 0)
return true;
if (input_width != output_width || config.texture_copy.size % input_width != 0)
return false;
SurfaceParams src_params;
src_params.addr = config.GetPhysicalInputAddress();
src_params.stride = input_width + input_gap; // stride in bytes
src_params.width = input_width; // width in bytes
src_params.height = config.texture_copy.size / input_width;
src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width;
src_params.end = src_params.addr + src_params.size;
MathUtil::Rectangle<u32> src_rect;
Surface src_surface;
std::tie(src_surface, src_rect) = res_cache.GetTexCopySurface(src_params);
if (src_surface == nullptr)
return false;
if ((output_gap * 8) % SurfaceParams::GetFormatBpp(src_surface->pixel_format) != 0 ||
(src_surface->is_tiled && src_surface->PixelsInBytes(output_gap) % 64 != 0))
return false;
SurfaceParams dst_params = *src_surface;
dst_params.addr = config.GetPhysicalOutputAddress();
dst_params.stride = (output_width + output_gap) * src_surface->stride / src_params.stride;
dst_params.width = output_width * src_surface->stride / src_params.stride;
dst_params.height = src_surface->is_tiled ? src_params.height * 8 : src_params.height;
dst_params.res_scale = src_surface->res_scale;
dst_params.UpdateParams();
const bool load_gap = output_gap != 0; // Since we are going to invalidate the gap if there is one, we will have to load it first
MathUtil::Rectangle<u32> dst_rect;
Surface dst_surface;
std::tie(dst_surface, dst_rect) = res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, load_gap);
if (src_surface == nullptr)
return false;
if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect))
return false;
res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface);
return true;
} }
bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) {
MICROPROFILE_SCOPE(OpenGL_Blits); Surface dst_surface = res_cache.GetFillSurface(config);
using PixelFormat = CachedSurface::PixelFormat; if (dst_surface == nullptr)
using SurfaceType = CachedSurface::SurfaceType;
CachedSurface* dst_surface = res_cache.TryGetFillSurface(config);
if (dst_surface == nullptr) {
return false; return false;
}
OpenGLState cur_state = OpenGLState::GetCurState(); res_cache.InvalidateRegion(dst_surface->addr, dst_surface->size, dst_surface);
SurfaceType dst_type = CachedSurface::GetFormatType(dst_surface->pixel_format);
GLuint old_fb = cur_state.draw.draw_framebuffer;
cur_state.draw.draw_framebuffer = framebuffer.handle;
// TODO: When scissor test is implemented, need to disable scissor test in cur_state here so
// Clear call isn't affected
cur_state.Apply();
if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) {
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
dst_surface->texture.handle, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f};
// TODO: Handle additional pixel format and fill value size combinations to accelerate more
// cases
// For instance, checking if fill value's bytes/bits repeat to allow filling
// I8/A8/I4/A4/...
// Currently only handles formats that are multiples of the fill value size
if (config.fill_24bit) {
switch (dst_surface->pixel_format) {
case PixelFormat::RGB8:
color_values[0] = config.value_24bit_r / 255.0f;
color_values[1] = config.value_24bit_g / 255.0f;
color_values[2] = config.value_24bit_b / 255.0f;
break;
default:
return false;
}
} else if (config.fill_32bit) {
u32 value = config.value_32bit;
switch (dst_surface->pixel_format) {
case PixelFormat::RGBA8:
color_values[0] = (value >> 24) / 255.0f;
color_values[1] = ((value >> 16) & 0xFF) / 255.0f;
color_values[2] = ((value >> 8) & 0xFF) / 255.0f;
color_values[3] = (value & 0xFF) / 255.0f;
break;
default:
return false;
}
} else {
u16 value_16bit = config.value_16bit.Value();
Math::Vec4<u8> color;
switch (dst_surface->pixel_format) {
case PixelFormat::RGBA8:
color_values[0] = (value_16bit >> 8) / 255.0f;
color_values[1] = (value_16bit & 0xFF) / 255.0f;
color_values[2] = color_values[0];
color_values[3] = color_values[1];
break;
case PixelFormat::RGB5A1:
color = Color::DecodeRGB5A1((const u8*)&value_16bit);
color_values[0] = color[0] / 31.0f;
color_values[1] = color[1] / 31.0f;
color_values[2] = color[2] / 31.0f;
color_values[3] = color[3];
break;
case PixelFormat::RGB565:
color = Color::DecodeRGB565((const u8*)&value_16bit);
color_values[0] = color[0] / 31.0f;
color_values[1] = color[1] / 63.0f;
color_values[2] = color[2] / 31.0f;
break;
case PixelFormat::RGBA4:
color = Color::DecodeRGBA4((const u8*)&value_16bit);
color_values[0] = color[0] / 15.0f;
color_values[1] = color[1] / 15.0f;
color_values[2] = color[2] / 15.0f;
color_values[3] = color[3] / 15.0f;
break;
case PixelFormat::IA8:
case PixelFormat::RG8:
color_values[0] = (value_16bit >> 8) / 255.0f;
color_values[1] = (value_16bit & 0xFF) / 255.0f;
break;
default:
return false;
}
}
cur_state.color_mask.red_enabled = GL_TRUE;
cur_state.color_mask.green_enabled = GL_TRUE;
cur_state.color_mask.blue_enabled = GL_TRUE;
cur_state.color_mask.alpha_enabled = GL_TRUE;
cur_state.Apply();
glClearBufferfv(GL_COLOR, 0, color_values);
} else if (dst_type == SurfaceType::Depth) {
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
dst_surface->texture.handle, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
GLfloat value_float;
if (dst_surface->pixel_format == CachedSurface::PixelFormat::D16) {
value_float = config.value_32bit / 65535.0f; // 2^16 - 1
} else if (dst_surface->pixel_format == CachedSurface::PixelFormat::D24) {
value_float = config.value_32bit / 16777215.0f; // 2^24 - 1
}
cur_state.depth.write_mask = GL_TRUE;
cur_state.Apply();
glClearBufferfv(GL_DEPTH, 0, &value_float);
} else if (dst_type == SurfaceType::DepthStencil) {
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
dst_surface->texture.handle, 0);
GLfloat value_float = (config.value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1
GLint value_int = (config.value_32bit >> 24);
cur_state.depth.write_mask = GL_TRUE;
cur_state.stencil.write_mask = 0xFF;
cur_state.Apply();
glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int);
}
cur_state.draw.draw_framebuffer = old_fb;
// TODO: Return scissor test to previous value when scissor test is implemented
cur_state.Apply();
dst_surface->dirty = true;
res_cache.FlushRegion(dst_surface->addr, dst_surface->size, dst_surface, true);
return true; return true;
} }
@ -1123,16 +1069,18 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
} }
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
CachedSurface src_params; SurfaceParams src_params;
src_params.addr = framebuffer_addr; src_params.addr = framebuffer_addr;
src_params.width = config.width; src_params.width = std::min(config.width.Value(), pixel_stride);
src_params.height = config.height; src_params.height = config.height;
src_params.pixel_stride = pixel_stride; src_params.stride = pixel_stride;
src_params.is_tiled = false; src_params.is_tiled = false;
src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format); src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.color_format);
src_params.UpdateParams();
MathUtil::Rectangle<int> src_rect; MathUtil::Rectangle<u32> src_rect;
CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect); Surface src_surface;
std::tie(src_surface, src_rect) = res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true);
if (src_surface == nullptr) { if (src_surface == nullptr) {
return false; return false;

View File

@ -43,6 +43,7 @@ public:
void NotifyPicaRegisterChanged(u32 id) override; void NotifyPicaRegisterChanged(u32 id) override;
void FlushAll() override; void FlushAll() override;
void FlushRegion(PAddr addr, u32 size) override; void FlushRegion(PAddr addr, u32 size) override;
void InvalidateRegion(PAddr addr, u32 size) override;
void FlushAndInvalidateRegion(PAddr addr, u32 size) override; void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override;
@ -135,7 +136,7 @@ private:
// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
// Not following that rule will cause problems on some AMD drivers. // Not following that rule will cause problems on some AMD drivers.
struct UniformData { struct UniformData {
alignas(8) GLvec2 framebuffer_scale; GLint framebuffer_scale;
GLint alphatest_ref; GLint alphatest_ref;
GLfloat depth_scale; GLfloat depth_scale;
GLfloat depth_offset; GLfloat depth_offset;
@ -155,7 +156,7 @@ private:
}; };
static_assert( static_assert(
sizeof(UniformData) == 0x470, sizeof(UniformData) == 0x460,
"The size of the UniformData structure has changed, update the structure in the shader"); "The size of the UniformData structure has changed, update the structure in the shader");
static_assert(sizeof(UniformData) < 16384, static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec"); "UniformData structure must be less than 16kb as per the OpenGL spec");

File diff suppressed because it is too large Load Diff

View File

@ -12,6 +12,7 @@
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-local-typedef" #pragma GCC diagnostic ignored "-Wunused-local-typedef"
#endif #endif
#include <boost/icl/interval_set.hpp>
#include <boost/icl/interval_map.hpp> #include <boost/icl/interval_map.hpp>
#ifdef __GNUC__ #ifdef __GNUC__
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
@ -20,21 +21,36 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/math_util.h"
#include "core/hw/gpu.h" #include "core/hw/gpu.h"
#include "video_core/regs_framebuffer.h" #include "video_core/regs_framebuffer.h"
#include "video_core/regs_texturing.h" #include "video_core/regs_texturing.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
namespace MathUtil {
template <class T>
struct Rectangle;
}
struct CachedSurface; struct CachedSurface;
using Surface = std::shared_ptr<CachedSurface>;
using SurfaceSet = std::set<Surface>;
using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<CachedSurface>>>; using SurfaceRegions = boost::icl::interval_set<PAddr>;
using SurfaceMap = boost::icl::interval_map<PAddr, Surface>;
using SurfaceCache = boost::icl::interval_map<PAddr, SurfaceSet>;
struct CachedSurface { using SurfaceInterval = SurfaceCache::interval_type;
static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() &&
std::is_same<SurfaceMap::interval_type, SurfaceCache::interval_type>(), "incorrect interval types");
using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>;
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
using PageMap = boost::icl::interval_map<u32, int>;
enum class ScaleMatch {
Exact, // only accept same res scale
Upscale, // only allow higher scale than params
Ignore // accept every scaled res
};
struct SurfaceParams {
enum class PixelFormat { enum class PixelFormat {
// First 5 formats are shared between textures and color buffers // First 5 formats are shared between textures and color buffers
RGBA8 = 0, RGBA8 = 0,
@ -68,11 +84,12 @@ struct CachedSurface {
Texture = 1, Texture = 1,
Depth = 2, Depth = 2,
DepthStencil = 3, DepthStencil = 3,
Invalid = 4, Fill = 4,
Invalid = 5
}; };
static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { static unsigned int GetFormatBpp(SurfaceParams::PixelFormat format) {
static const std::array<unsigned int, 18> bpp_table = { static constexpr std::array<unsigned int, 18> bpp_table = {
32, // RGBA8 32, // RGBA8
24, // RGB8 24, // RGB8
16, // RGB5A1 16, // RGB5A1
@ -93,8 +110,8 @@ struct CachedSurface {
32, // D24S8 32, // D24S8
}; };
ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table)); ASSERT(static_cast<size_t>(format) < bpp_table.size());
return bpp_table[(unsigned int)format]; return bpp_table[static_cast<size_t>(format)];
} }
static PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) { static PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) {
@ -162,31 +179,108 @@ struct CachedSurface {
return SurfaceType::Invalid; return SurfaceType::Invalid;
} }
/// Update the params "size", "end" and "type" from the already set "addr", "width", "height" and "pixel_format"
void UpdateParams() {
size = width * height * GetFormatBpp(pixel_format) / 8;
if (stride == 0)
stride = width;
else
size += (stride - width) * (height - 1) * GetFormatBpp(pixel_format) / 8;
end = addr + size;
type = GetFormatType(pixel_format);
}
SurfaceInterval GetInterval() const {
return SurfaceInterval::right_open(addr, end);
}
SurfaceInterval GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const;
u32 GetScaledWidth() const { u32 GetScaledWidth() const {
return (u32)(width * res_scale_width); return width * res_scale;
} }
u32 GetScaledHeight() const { u32 GetScaledHeight() const {
return (u32)(height * res_scale_height); return height * res_scale;
} }
PAddr addr; MathUtil::Rectangle<u32> GetRect() const {
u32 size; return { 0, 0, width, height };
}
PAddr min_valid; MathUtil::Rectangle<u32> GetScaledRect() const {
PAddr max_valid; return { 0, 0, GetScaledWidth(), GetScaledHeight() };
}
u32 PixelsInBytes(u32 size) const {
return size * 8 / GetFormatBpp(pixel_format);
}
u32 BytesInPixels(u32 pixels) const {
return pixels * GetFormatBpp(pixel_format) / 8;
}
u32 BytesPerPixel() const {
return BytesInPixels(1);
}
bool ExactMatch(const SurfaceParams& other_surface) const;
bool CanSubRect(const SurfaceParams& sub_surface) const;
bool CanExpand(const SurfaceParams& expanded_surface) const;
bool CanTexCopy(const SurfaceParams& texcopy_params) const;
MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const;
MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const;
PAddr addr = 0;
PAddr end = 0;
u32 size = 0;
u32 width = 0;
u32 height = 0;
u32 stride = 0;
u16 res_scale = 1;
bool is_tiled = false;
PixelFormat pixel_format = PixelFormat::Invalid;
SurfaceType type = SurfaceType::Invalid;
};
struct CachedSurface : SurfaceParams {
bool CanCopy(const SurfaceParams& dest_surface) const;
bool IsRegionValid(const SurfaceInterval& interval) const {
return (invalid_regions.find(interval) == invalid_regions.end());
}
bool IsRegionPartiallyValid(const SurfaceInterval& interval) const {
const auto it = invalid_regions.find(interval);
if (it == invalid_regions.end())
return true;
return ((boost::icl::first(*it) > addr) || (boost::icl::last_next(*it) < end));
}
SurfaceRegions invalid_regions;
u32 fill_size = 0; /// Number of bytes to read from fill_data
std::array<u8, 4> fill_data;
OGLTexture texture; OGLTexture texture;
u32 width;
u32 height;
/// Stride between lines, in pixels. Only valid for images in linear format.
u32 pixel_stride = 0;
float res_scale_width = 1.f;
float res_scale_height = 1.f;
bool is_tiled; u32 gl_bytes_per_pixel;
PixelFormat pixel_format; int gl_buffer_offset;
bool dirty; std::vector<u8> gl_buffer;
bool gl_buffer_dirty;
// Read/Write data in 3DS memory to/from gl_buffer
void LoadGLBuffer(PAddr load_start, PAddr load_end);
void FlushGLBuffer(PAddr flush_start, PAddr flush_end);
// Upload/Download data in gl_buffer in/to this surface's texture
void UploadGLTexture();
void DownloadGLTexture();
}; };
class RasterizerCacheOpenGL : NonCopyable { class RasterizerCacheOpenGL : NonCopyable {
@ -194,46 +288,57 @@ public:
RasterizerCacheOpenGL(); RasterizerCacheOpenGL();
~RasterizerCacheOpenGL(); ~RasterizerCacheOpenGL();
/// Blits one texture to another /// Blit one surface's texture to another
void BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle<u32>& src_rect,
const MathUtil::Rectangle<int>& src_rect, const Surface& dst_surface, const MathUtil::Rectangle<u32>& dst_rect);
const MathUtil::Rectangle<int>& dst_rect);
/// Attempt to blit one surface's texture to another /// Load a texture from 3DS memory to OpenGL and cache it (if not already cached)
bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, bool load_if_create);
CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect);
/// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale,
bool load_if_create);
/// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
/// 3DS memory to OpenGL and caches it (if not already cached) /// 3DS memory to OpenGL and caches it (if not already cached)
CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale,
bool load_if_create, MathUtil::Rectangle<int>& out_rect); bool load_if_create);
/// Gets a surface based on the texture configuration /// Get a surface based on the texture configuration
CachedSurface* GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); Surface GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config);
/// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer /// Get the color and depth surfaces based on the framebuffer configuration
/// configuration SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces( const MathUtil::Rectangle<s32>& viewport_rect);
const Pica::FramebufferRegs::FramebufferConfig& config);
/// Attempt to get a surface that exactly matches the fill region and format /// Get a surface that matches the fill config
CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config); Surface GetFillSurface(const GPU::Regs::MemoryFillConfig& config);
/// Write the surface back to memory /// Get a surface that matches a "texture copy" display transfer config
void FlushSurface(CachedSurface* surface); SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params);
/// Write any cached resources overlapping the region back to memory (if dirty) and optionally /// Write any cached resources overlapping the region back to memory (if dirty)
/// invalidate them in the cache void FlushRegion(PAddr addr, u32 size, Surface flush_surface = nullptr);
void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate);
/// Mark region as being invalidated by region_owner (nullptr if 3DS memory)
void InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner);
/// Flush all cached resources tracked by this cache manager /// Flush all cached resources tracked by this cache manager
void FlushAll(); void FlushAll();
private: private:
/// Update surface's texture for given region when necessary
void ValidateSurface(const Surface& surface, PAddr addr, u32 size);
/// Create a new surface
Surface CreateSurface(const SurfaceParams& params);
/// Register surface into the cache
void RegisterSurface(const Surface& surface);
/// Remove surface from the cache
void UnregisterSurface(const Surface& surface);
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(PAddr addr, u32 size, int delta);
SurfaceCache surface_cache; SurfaceCache surface_cache;
OGLFramebuffer transfer_framebuffers[2]; SurfaceMap dirty_regions;
PageMap cached_pages;
}; };

View File

@ -41,7 +41,7 @@ struct LightSrc {
}; };
layout (std140) uniform shader_data { layout (std140) uniform shader_data {
vec2 framebuffer_scale; int framebuffer_scale;
int alphatest_ref; int alphatest_ref;
float depth_scale; float depth_scale;
float depth_offset; float depth_offset;

View File

@ -69,6 +69,12 @@ OpenGLState::OpenGLState() {
draw.uniform_buffer = 0; draw.uniform_buffer = 0;
draw.shader_program = 0; draw.shader_program = 0;
scissor.enabled = false;
scissor.x = 0;
scissor.y = 0;
scissor.width = 0;
scissor.height = 0;
clip_distance = {}; clip_distance = {};
} }
@ -263,6 +269,22 @@ void OpenGLState::Apply() const {
glUseProgram(draw.shader_program); glUseProgram(draw.shader_program);
} }
// Scissor test
if (scissor.enabled != cur_state.scissor.enabled) {
if (scissor.enabled) {
glEnable(GL_SCISSOR_TEST);
} else {
glDisable(GL_SCISSOR_TEST);
}
}
if (scissor.x != cur_state.scissor.x ||
scissor.y != cur_state.scissor.y ||
scissor.width != cur_state.scissor.width ||
scissor.height != cur_state.scissor.height) {
glScissor(scissor.x, scissor.y, scissor.width, scissor.height);
}
// Clip distance // Clip distance
for (size_t i = 0; i < clip_distance.size(); ++i) { for (size_t i = 0; i < clip_distance.size(); ++i) {
if (clip_distance[i] != cur_state.clip_distance[i]) { if (clip_distance[i] != cur_state.clip_distance[i]) {

View File

@ -124,6 +124,14 @@ public:
GLuint shader_program; // GL_CURRENT_PROGRAM GLuint shader_program; // GL_CURRENT_PROGRAM
} draw; } draw;
struct {
bool enabled; // GL_SCISSOR_TEST
GLint x;
GLint y;
GLsizei width;
GLsizei height;
} scissor;
std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE
OpenGLState(); OpenGLState();

View File

@ -22,6 +22,7 @@ class SWRasterizer : public RasterizerInterface {
void NotifyPicaRegisterChanged(u32 id) override {} void NotifyPicaRegisterChanged(u32 id) override {}
void FlushAll() override {} void FlushAll() override {}
void FlushRegion(PAddr addr, u32 size) override {} void FlushRegion(PAddr addr, u32 size) override {}
void InvalidateRegion(PAddr addr, u32 size) override {}
void FlushAndInvalidateRegion(PAddr addr, u32 size) override {} void FlushAndInvalidateRegion(PAddr addr, u32 size) override {}
}; };
} }