Rasterizer cache refactor v2 (#6479)

* rasterizer_cache: Switch to template

* Eliminates all opengl references in the rasterizer cache headers
  thus completing the backend abstraction

* rasterizer_cache: Switch to page table

* Surface storage isn't particularly interval sensitive so we can use a page table to make it faster

* rasterizer_cache: Move sampler management out of rasterizer cache

* rasterizer_cache: Remove shared_ptr usage

* Switches to yuzu's slot vector for improved memory locality.

* rasterizer_cache: Rework reinterpretation lookup

* citra_qt: Per game texture filter

* rasterizer_cache: Log additional settings

* gl_texture_runtime: Resolve shadow map comment

* rasterizer_cache: Don't use float for viewport

* gl_texture_runtime: Fix custom allocation recycling

* rasterizer_cache: Minor cleanups

* Cleanup texture cubes when all the faces have been unregistered from the cache

* custom_tex_manager: Allow multiple hash mappings per texture

* code: Move slot vector to common

* rasterizer_cache: Prevent texture cube crashes

* rasterizer_cache: Improve mipmap validation

* CanSubRect now works properly when validating multi-level surfaces, for example Dark Moon validates a 4 level surface from a 3 level one and it works

* gl_blit_handler: Unbind sampler on reinterpretation
This commit is contained in:
GPUCode 2023-05-07 02:34:28 +03:00 committed by GitHub
parent 322d7a8287
commit 2e655f73b8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 2238 additions and 1927 deletions

View File

@ -208,12 +208,13 @@ void ConfigureEnhancements::SetupPerGameUI() {
ConfigurationShared::SetColoredComboBox(
ui->resolution_factor_combobox, ui->widget_resolution,
static_cast<u32>(Settings::values.resolution_factor.GetValue(true)));
static_cast<int>(Settings::values.resolution_factor.GetValue(true)));
ConfigurationShared::SetColoredComboBox(ui->texture_filter_combobox, ui->widget_texture_filter,
0);
ConfigurationShared::SetColoredComboBox(
ui->texture_filter_combobox, ui->widget_texture_filter,
static_cast<int>(Settings::values.texture_filter.GetValue(true)));
ConfigurationShared::SetColoredComboBox(
ui->layout_combobox, ui->widget_layout,
static_cast<u32>(Settings::values.layout_option.GetValue(true)));
static_cast<int>(Settings::values.layout_option.GetValue(true)));
}

View File

@ -100,6 +100,7 @@ add_library(citra_common STATIC
scope_exit.h
settings.cpp
settings.h
slot_vector.h
serialization/atomic.h
serialization/boost_discrete_interval.hpp
serialization/boost_flat_set.h

View File

@ -163,6 +163,8 @@ void LogSettings() {
log_setting("Layout_LargeScreenProportion", values.large_screen_proportion.GetValue());
log_setting("Utility_DumpTextures", values.dump_textures.GetValue());
log_setting("Utility_CustomTextures", values.custom_textures.GetValue());
log_setting("Utility_PreloadTextures", values.preload_textures.GetValue());
log_setting("Utility_AsyncCustomLoading", values.async_custom_loading.GetValue());
log_setting("Utility_UseDiskShaderCache", values.use_disk_shader_cache.GetValue());
log_setting("Audio_Emulation", GetAudioEmulationName(values.audio_emulation.GetValue()));
log_setting("Audio_OutputType", values.output_type.GetValue());

154
src/common/slot_vector.h Normal file
View File

@ -0,0 +1,154 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <bit>
#include <compare>
#include <numeric>
#include <type_traits>
#include <utility>
#include <vector>
#include "common/assert.h"
#include "common/common_types.h"
namespace Common {
struct SlotId {
static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max();
constexpr auto operator<=>(const SlotId&) const noexcept = default;
constexpr explicit operator bool() const noexcept {
return index != INVALID_INDEX;
}
u32 index = INVALID_INDEX;
};
template <class T>
class SlotVector {
public:
~SlotVector() noexcept {
size_t index = 0;
for (u64 bits : stored_bitset) {
for (size_t bit = 0; bits; ++bit, bits >>= 1) {
if ((bits & 1) != 0) {
values[index + bit].object.~T();
}
}
index += 64;
}
delete[] values;
}
[[nodiscard]] T& operator[](SlotId id) noexcept {
ValidateIndex(id);
return values[id.index].object;
}
[[nodiscard]] const T& operator[](SlotId id) const noexcept {
ValidateIndex(id);
return values[id.index].object;
}
template <typename... Args>
[[nodiscard]] SlotId insert(Args&&... args) noexcept {
const u32 index = FreeValueIndex();
new (&values[index].object) T(std::forward<Args>(args)...);
SetStorageBit(index);
return SlotId{index};
}
void erase(SlotId id) noexcept {
values[id.index].object.~T();
free_list.push_back(id.index);
ResetStorageBit(id.index);
}
private:
struct NonTrivialDummy {
NonTrivialDummy() noexcept {}
};
union Entry {
Entry() noexcept : dummy{} {}
~Entry() noexcept {}
NonTrivialDummy dummy;
T object;
};
void SetStorageBit(u32 index) noexcept {
stored_bitset[index / 64] |= u64(1) << (index % 64);
}
void ResetStorageBit(u32 index) noexcept {
stored_bitset[index / 64] &= ~(u64(1) << (index % 64));
}
bool ReadStorageBit(u32 index) noexcept {
return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0;
}
void ValidateIndex(SlotId id) const noexcept {
DEBUG_ASSERT(id);
DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
}
[[nodiscard]] u32 FreeValueIndex() noexcept {
if (free_list.empty()) {
Reserve(values_capacity ? (values_capacity << 1) : 1);
}
const u32 free_index = free_list.back();
free_list.pop_back();
return free_index;
}
void Reserve(size_t new_capacity) noexcept {
Entry* const new_values = new Entry[new_capacity];
size_t index = 0;
for (u64 bits : stored_bitset) {
for (size_t bit = 0; bits; ++bit, bits >>= 1) {
const size_t i = index + bit;
if ((bits & 1) == 0) {
continue;
}
T& old_value = values[i].object;
new (&new_values[i].object) T(std::move(old_value));
old_value.~T();
}
index += 64;
}
stored_bitset.resize((new_capacity + 63) / 64);
const size_t old_free_size = free_list.size();
free_list.resize(old_free_size + (new_capacity - values_capacity));
std::iota(free_list.begin() + old_free_size, free_list.end(),
static_cast<u32>(values_capacity));
delete[] values;
values = new_values;
values_capacity = new_capacity;
}
Entry* values = nullptr;
size_t values_capacity = 0;
std::vector<u64> stored_bitset;
std::vector<u32> free_list;
};
} // namespace Common
template <>
struct std::hash<Common::SlotId> {
size_t operator()(const Common::SlotId& id) const noexcept {
return std::hash<u32>{}(id.index);
}
};

View File

@ -40,6 +40,8 @@ add_library(video_core STATIC
rasterizer_cache/pixel_format.h
rasterizer_cache/rasterizer_cache.cpp
rasterizer_cache/rasterizer_cache.h
rasterizer_cache/rasterizer_cache_base.h
rasterizer_cache/sampler_params.h
rasterizer_cache/surface_base.cpp
rasterizer_cache/surface_base.h
rasterizer_cache/surface_params.cpp
@ -53,10 +55,9 @@ add_library(video_core STATIC
renderer_opengl/gl_blit_helper.h
renderer_opengl/gl_driver.cpp
renderer_opengl/gl_driver.h
renderer_opengl/gl_format_reinterpreter.cpp
renderer_opengl/gl_format_reinterpreter.h
renderer_opengl/gl_rasterizer.cpp
renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_rasterizer_cache.cpp
renderer_opengl/gl_resource_manager.cpp
renderer_opengl/gl_resource_manager.h
renderer_opengl/gl_shader_decompiler.cpp
@ -126,7 +127,7 @@ target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC citra_common citra_core)
target_link_libraries(video_core PRIVATE glad json-headers dds-ktx nihstro-headers Boost::serialization)
target_link_libraries(video_core PRIVATE Boost::serialization dds-ktx glad json-headers nihstro-headers tsl::robin_map)
set_target_properties(video_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO})
if ("x86_64" IN_LIST ARCHITECTURE)

View File

@ -13,6 +13,7 @@
#include "core/frontend/image_interface.h"
#include "video_core/custom_textures/custom_tex_manager.h"
#include "video_core/rasterizer_cache/surface_params.h"
#include "video_core/rasterizer_cache/utils.h"
namespace VideoCore {
@ -21,7 +22,7 @@ namespace {
MICROPROFILE_DEFINE(CustomTexManager_TickFrame, "CustomTexManager", "TickFrame",
MP_RGB(54, 16, 32));
constexpr std::size_t MAX_UPLOADS_PER_TICK = 16;
constexpr std::size_t MAX_UPLOADS_PER_TICK = 8;
bool IsPow2(u32 value) {
return value != 0 && (value & (value - 1)) == 0;
@ -111,11 +112,14 @@ void CustomTexManager::FindCustomTextures() {
if (!ParseFilename(file, texture)) {
continue;
}
auto& material = material_map[texture->hash];
if (!material) {
material = std::make_unique<Material>();
for (const u64 hash : texture->hashes) {
auto& material = material_map[hash];
if (!material) {
material = std::make_unique<Material>();
}
material->hash = hash;
material->AddMapTexture(texture);
}
material->AddMapTexture(texture);
}
textures_loaded = true;
}
@ -145,21 +149,25 @@ bool CustomTexManager::ParseFilename(const FileUtil::FSTEntry& file, CustomTextu
parts.pop_back();
}
// First check if the path is mapped directly to a hash
// before trying to parse the texture filename.
// First look if this file is mapped to any number of hashes.
std::vector<u64>& hashes = texture->hashes;
const auto it = path_to_hash_map.find(file.virtualName);
if (it != path_to_hash_map.end()) {
texture->hash = it->second;
} else {
u32 width;
u32 height;
u32 format;
unsigned long long hash{};
if (std::sscanf(parts.back().c_str(), "tex1_%ux%u_%llX_%u", &width, &height, &hash,
&format) != 4) {
return false;
}
texture->hash = hash;
hashes = it->second;
}
// It's also possible for pack creators to retain the default texture name
// still map the texture to another hash. Support that as well.
u32 width;
u32 height;
u32 format;
unsigned long long hash{};
const bool is_parsed = std::sscanf(parts.back().c_str(), "tex1_%ux%u_%llX_%u", &width, &height,
&hash, &format) == 4;
const bool is_mapped =
!hashes.empty() && std::find(hashes.begin(), hashes.end(), hash) != hashes.end();
if (is_parsed && !is_mapped) {
hashes.push_back(hash);
}
texture->path = file.physicalName;
@ -181,9 +189,9 @@ void CustomTexManager::WriteConfig() {
json["description"] = "A graphics pack";
auto& options = json["options"];
options["skip_mipmap"] = skip_mipmap;
options["flip_png_files"] = flip_png_files;
options["use_new_hash"] = use_new_hash;
options["skip_mipmap"] = false;
options["flip_png_files"] = true;
options["use_new_hash"] = true;
FileUtil::IOFile file{pack_config, "w"};
const std::string output = json.dump(4);
@ -311,7 +319,7 @@ void CustomTexManager::ReadConfig(const std::string& load_path) {
return;
}
nlohmann::json json = nlohmann::json::parse(config);
nlohmann::json json = nlohmann::json::parse(config, nullptr, false, true);
const auto& options = json["options"];
skip_mipmap = options["skip_mipmap"].get<bool>();
@ -330,13 +338,7 @@ void CustomTexManager::ReadConfig(const std::string& load_path) {
const auto parse = [&](const std::string& file) {
const std::string filename{FileUtil::GetFilename(file)};
auto [it, new_hash] = path_to_hash_map.try_emplace(filename);
if (!new_hash) {
LOG_ERROR(Render,
"File {} with key {} already exists and is mapped to {:#016X}, skipping",
file, material.key(), path_to_hash_map[filename]);
return;
}
it->second = hash;
it->second.push_back(hash);
};
const auto value = material.value();
if (value.is_string()) {

View File

@ -81,7 +81,7 @@ private:
Frontend::ImageInterface& image_interface;
std::unordered_set<u64> dumped_textures;
std::unordered_map<u64, std::unique_ptr<Material>> material_map;
std::unordered_map<std::string, u64> path_to_hash_map;
std::unordered_map<std::string, std::vector<u64>> path_to_hash_map;
std::vector<std::unique_ptr<CustomTexture>> custom_textures;
std::list<AsyncUpload> async_uploads;
std::unique_ptr<Common::ThreadWorker> workers;

View File

@ -55,6 +55,11 @@ CustomTexture::CustomTexture(Frontend::ImageInterface& image_interface_)
CustomTexture::~CustomTexture() = default;
void CustomTexture::LoadFromDisk(bool flip_png) {
std::scoped_lock lock{decode_mutex};
if (IsLoaded()) {
return;
}
FileUtil::IOFile file{path, "rb"};
std::vector<u8> input(file.GetSize());
if (file.ReadBytes(input.data(), input.size()) != input.size()) {
@ -71,7 +76,6 @@ void CustomTexture::LoadFromDisk(bool flip_png) {
break;
default:
LOG_ERROR(Render, "Unknown file format {}", file_format);
return;
}
}
@ -102,8 +106,7 @@ void Material::LoadFromDisk(bool flip_png) noexcept {
}
texture->LoadFromDisk(flip_png);
size += texture->data.size();
LOG_DEBUG(Render, "Loading {} map {} with hash {:#016X}", MapTypeName(texture->type),
texture->path, texture->hash);
LOG_DEBUG(Render, "Loading {} map {}", MapTypeName(texture->type), texture->path);
}
if (!textures[0]) {
LOG_ERROR(Render, "Unable to create material without color texture!");
@ -121,7 +124,7 @@ void Material::LoadFromDisk(bool flip_png) noexcept {
LOG_ERROR(Render,
"{} map {} of material with hash {:#016X} has dimentions {}x{} "
"which do not match the color texture dimentions {}x{}",
MapTypeName(texture->type), texture->path, texture->hash, texture->width,
MapTypeName(texture->type), texture->path, hash, texture->width,
texture->height, width, height);
state = DecodeState::Failed;
return;

View File

@ -6,6 +6,7 @@
#include <array>
#include <atomic>
#include <mutex>
#include <span>
#include <string>
#include <vector>
@ -39,7 +40,7 @@ public:
void LoadFromDisk(bool flip_png);
[[nodiscard]] bool IsParsed() const noexcept {
return file_format != CustomFileFormat::None && hash != 0;
return file_format != CustomFileFormat::None && !hashes.empty();
}
[[nodiscard]] bool IsLoaded() const noexcept {
@ -56,7 +57,8 @@ public:
std::string path;
u32 width;
u32 height;
u64 hash;
std::vector<u64> hashes;
std::mutex decode_mutex;
CustomPixelFormat format;
CustomFileFormat file_format;
std::vector<u8> data;
@ -67,6 +69,7 @@ struct Material {
u32 width;
u32 height;
u64 size;
u64 hash;
CustomPixelFormat format;
std::array<CustomTexture*, MAX_MAPS> textures;
std::atomic<DecodeState> state{};

View File

@ -4,29 +4,18 @@
//? #version 430 core
layout(location = 0) in mediump vec2 dst_coord;
layout(location = 0) in mediump vec2 tex_coord;
layout(location = 0) out lowp vec4 frag_color;
layout(binding = 0) uniform highp sampler2D depth;
layout(binding = 1) uniform lowp usampler2D stencil;
uniform mediump ivec2 dst_size;
uniform mediump ivec2 src_size;
uniform mediump ivec2 src_offset;
void main() {
mediump ivec2 tex_coord;
if (src_size == dst_size) {
tex_coord = ivec2(dst_coord);
} else {
highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x);
mediump int y = tex_index / src_size.x;
tex_coord = ivec2(tex_index - y * src_size.x, y);
}
tex_coord -= src_offset;
mediump vec2 coord = tex_coord * vec2(textureSize(depth, 0));
mediump ivec2 tex_icoord = ivec2(coord);
highp uint depth_val =
uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0));
lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
uint(texelFetch(depth, tex_icoord, 0).x * (exp2(32.0) - 1.0));
lowp uint stencil_val = texelFetch(stencil, tex_icoord, 0).x;
highp uvec4 components =
uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu);
frag_color = vec4(components) / (exp2(8.0) - 1.0);

View File

@ -6,7 +6,7 @@
layout(location = 0) out vec2 dst_coord;
uniform mediump ivec2 dst_size;
layout(location = 0) uniform mediump ivec2 dst_size;
const vec2 vertices[4] =
vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));

View File

@ -4,26 +4,15 @@
//? #version 430 core
layout(location = 0) in mediump vec2 dst_coord;
layout(location = 0) in mediump vec2 tex_coord;
layout(location = 0) out lowp vec4 frag_color;
layout(binding = 0) uniform lowp sampler2D source;
uniform mediump ivec2 dst_size;
uniform mediump ivec2 src_size;
uniform mediump ivec2 src_offset;
void main() {
mediump ivec2 tex_coord;
if (src_size == dst_size) {
tex_coord = ivec2(dst_coord);
} else {
highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x);
mediump int y = tex_index / src_size.x;
tex_coord = ivec2(tex_index - y * src_size.x, y);
}
tex_coord -= src_offset;
lowp ivec4 rgba4 = ivec4(texelFetch(source, tex_coord, 0) * (exp2(4.0) - 1.0));
mediump vec2 coord = tex_coord * vec2(textureSize(source, 0));
mediump ivec2 tex_icoord = ivec2(coord);
lowp ivec4 rgba4 = ivec4(texelFetch(source, tex_icoord, 0) * (exp2(4.0) - 1.0));
lowp ivec3 rgb5 =
((rgba4.rgb << ivec3(1, 2, 3)) | (rgba4.gba >> ivec3(3, 2, 1))) & 0x1F;
frag_color = vec4(vec3(rgb5) / (exp2(5.0) - 1.0), rgba4.a & 0x01);

View File

@ -10,9 +10,9 @@ namespace VideoCore {
FramebufferBase::FramebufferBase() = default;
FramebufferBase::FramebufferBase(const Pica::Regs& regs, const SurfaceBase* const color,
u32 color_level, const SurfaceBase* const depth_stencil,
u32 depth_level, Common::Rectangle<u32> surfaces_rect) {
FramebufferBase::FramebufferBase(const Pica::Regs& regs, const SurfaceBase* color, u32 color_level,
const SurfaceBase* depth_stencil, u32 depth_level,
Common::Rectangle<u32> surfaces_rect) {
res_scale = color ? color->res_scale : (depth_stencil ? depth_stencil->res_scale : 1u);
// Determine the draw rectangle (render area + scissor)
@ -31,10 +31,10 @@ FramebufferBase::FramebufferBase(const Pica::Regs& regs, const SurfaceBase* cons
surfaces_rect.bottom, surfaces_rect.top);
// Update viewport
viewport.x = static_cast<f32>(surfaces_rect.left + viewport_rect.left * res_scale);
viewport.y = static_cast<f32>(surfaces_rect.bottom + viewport_rect.bottom * res_scale);
viewport.width = static_cast<f32>(viewport_rect.GetWidth() * res_scale);
viewport.height = static_cast<f32>(viewport_rect.GetHeight() * res_scale);
viewport.x = static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale;
viewport.y = static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale;
viewport.width = static_cast<s32>(viewport_rect.GetWidth() * res_scale);
viewport.height = static_cast<s32>(viewport_rect.GetHeight() * res_scale);
// Scissor checks are window-, not viewport-relative, which means that if the cached texture
// sub-rect changes, the scissor bounds also need to be updated.

View File

@ -16,10 +16,10 @@ namespace VideoCore {
class SurfaceBase;
struct ViewportInfo {
f32 x;
f32 y;
f32 width;
f32 height;
s32 x;
s32 y;
s32 width;
s32 height;
};
/**
@ -29,8 +29,8 @@ struct ViewportInfo {
class FramebufferBase {
public:
FramebufferBase();
FramebufferBase(const Pica::Regs& regs, const SurfaceBase* const color, u32 color_level,
const SurfaceBase* const depth_stencil, u32 depth_level,
FramebufferBase(const Pica::Regs& regs, const SurfaceBase* color, u32 color_level,
const SurfaceBase* depth_stencil, u32 depth_level,
Common::Rectangle<u32> surfaces_rect);
SurfaceParams ColorParams() const noexcept {
@ -66,6 +66,7 @@ protected:
switch (type) {
case VideoCore::SurfaceType::Color:
return 0;
case VideoCore::SurfaceType::Depth:
case VideoCore::SurfaceType::DepthStencil:
return 1;
default:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,229 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <functional>
#include <optional>
#include <unordered_map>
#include <vector>
#include <boost/icl/interval_map.hpp>
#include <tsl/robin_map.h>
#include "video_core/rasterizer_cache/sampler_params.h"
#include "video_core/rasterizer_cache/surface_base.h"
namespace Memory {
class MemorySystem;
}
namespace Pica {
struct Regs;
}
namespace Pica::Texture {
struct TextureInfo;
}
namespace VideoCore {
enum class ScaleMatch {
Exact, ///< Only accept same res scale
Upscale, ///< Only allow higher scale than params
Ignore ///< Accept every scaled res
};
enum class MatchFlags {
Exact = 1 << 0, ///< Surface perfectly matches params
SubRect = 1 << 1, ///< Surface encompasses params
Copy = 1 << 2, ///< Surface that can be used as a copy source
Expand = 1 << 3, ///< Surface that can expand params
TexCopy = 1 << 4, ///< Surface that will match a display transfer "texture copy" parameters
Reinterpret = 1 << 5, ///< Surface might have different pixel format.
};
DECLARE_ENUM_FLAG_OPERATORS(MatchFlags);
class CustomTexManager;
class RendererBase;
template <class T>
class RasterizerCache {
/// Address shift for caching surfaces into a hash table
static constexpr u64 CITRA_PAGEBITS = 18;
using Runtime = typename T::Runtime;
using Sampler = typename T::Sampler;
using Surface = typename T::Surface;
using Framebuffer = typename T::Framebuffer;
using SurfaceMap = boost::icl::interval_map<PAddr, SurfaceId, boost::icl::partial_absorber,
std::less, boost::icl::inplace_plus,
boost::icl::inter_section, SurfaceInterval>;
using SurfaceRect_Tuple = std::pair<SurfaceId, Common::Rectangle<u32>>;
using PageMap = boost::icl::interval_map<u32, int>;
struct RenderTargets {
SurfaceId color_id;
SurfaceId depth_id;
};
struct TextureCube {
SurfaceId surface_id;
std::array<SurfaceId, 6> face_ids;
std::array<u64, 6> ticks;
};
public:
explicit RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager,
Runtime& runtime, Pica::Regs& regs, RendererBase& renderer);
~RasterizerCache();
/// Notify the cache that a new frame has been queued
void TickFrame();
/// Perform hardware accelerated texture copy according to the provided configuration
bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config);
/// Perform hardware accelerated display transfer according to the provided configuration
bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config);
/// Perform hardware accelerated memory fill according to the provided configuration
bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config);
/// Returns a reference to the surface object assigned to surface_id
Surface& GetSurface(SurfaceId surface_id);
/// Returns a reference to the sampler object matching the provided configuration
Sampler& GetSampler(const Pica::TexturingRegs::TextureConfig& config);
Sampler& GetSampler(SamplerId sampler_id);
/// Copy one surface's region to another
void CopySurface(Surface& src_surface, Surface& dst_surface, SurfaceInterval copy_interval);
/// Load a texture from 3DS memory to OpenGL and cache it (if not already cached)
SurfaceId GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale,
bool load_if_create);
/// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
/// 3DS memory to OpenGL and caches it (if not already cached)
SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale,
bool load_if_create);
/// Get a surface based on the texture configuration
Surface& GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config);
SurfaceId GetTextureSurface(const Pica::Texture::TextureInfo& info, u32 max_level = 0);
/// Get a texture cube based on the texture configuration
Surface& GetTextureCube(const TextureCubeConfig& config);
/// Get the color and depth surfaces based on the framebuffer configuration
Framebuffer GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb);
/// Marks the draw rectangle defined in framebuffer as invalid
void InvalidateFramebuffer(const Framebuffer& framebuffer);
/// Get a surface that matches a "texture copy" display transfer config
SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params);
/// Write any cached resources overlapping the region back to memory (if dirty)
void FlushRegion(PAddr addr, u32 size, SurfaceId flush_surface = {});
/// Mark region as being invalidated by region_owner (nullptr if 3DS memory)
void InvalidateRegion(PAddr addr, u32 size, SurfaceId region_owner = {});
/// Flush all cached resources tracked by this cache manager
void FlushAll();
/// Clear all cached resources tracked by this cache manager
void ClearAll(bool flush);
private:
/// Iterate over all page indices in a range
template <typename Func>
void ForEachPage(PAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> CITRA_PAGEBITS;
for (u64 page = addr >> CITRA_PAGEBITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
/// Iterates over all the surfaces in a region calling func
template <typename Func>
void ForEachSurfaceInRegion(PAddr addr, size_t size, Func&& func);
/// Get the best surface match (and its match type) for the given flags
template <MatchFlags find_flags>
SurfaceId FindMatch(const SurfaceParams& params, ScaleMatch match_scale_type,
std::optional<SurfaceInterval> validate_interval = std::nullopt);
/// Transfers ownership of a memory region from src_surface to dest_surface
void DuplicateSurface(SurfaceId src_id, SurfaceId dst_id);
/// Update surface's texture for given region when necessary
void ValidateSurface(SurfaceId surface, PAddr addr, u32 size);
/// Copies pixel data in interval from the guest VRAM to the host GPU surface
void UploadSurface(Surface& surface, SurfaceInterval interval);
/// Uploads a custom texture identified with hash to the target surface
bool UploadCustomSurface(SurfaceId surface_id, SurfaceInterval interval);
/// Copies pixel data in interval from the host GPU surface to the guest VRAM
void DownloadSurface(Surface& surface, SurfaceInterval interval);
/// Downloads a fill surface to guest VRAM
void DownloadFillSurface(Surface& surface, SurfaceInterval interval);
/// Attempt to find a reinterpretable surface in the cache and use it to copy for validation
bool ValidateByReinterpretation(Surface& surface, SurfaceParams params,
const SurfaceInterval& interval);
/// Return true if a surface with an invalid pixel format exists at the interval
bool IntervalHasInvalidPixelFormat(const SurfaceParams& params, SurfaceInterval interval);
/// Create a new surface
SurfaceId CreateSurface(const SurfaceParams& params);
/// Register surface into the cache
void RegisterSurface(SurfaceId surface);
/// Remove surface from the cache
void UnregisterSurface(SurfaceId surface);
/// Unregisters all surfaces from the cache
void UnregisterAll();
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(PAddr addr, u32 size, int delta);
private:
Memory::MemorySystem& memory;
CustomTexManager& custom_tex_manager;
Runtime& runtime;
Pica::Regs& regs;
RendererBase& renderer;
std::unordered_map<TextureCubeConfig, TextureCube> texture_cube_cache;
tsl::robin_pg_map<u64, std::vector<SurfaceId>, Common::IdentityHash<u64>> page_table;
std::unordered_map<SamplerParams, SamplerId> samplers;
Common::SlotVector<Surface> slot_surfaces;
Common::SlotVector<Sampler> slot_samplers;
SurfaceMap dirty_regions;
PageMap cached_pages;
std::vector<SurfaceId> remove_surfaces;
u32 resolution_scale_factor;
RenderTargets render_targets;
bool use_filter;
bool dump_textures;
bool use_custom_textures;
};
} // namespace VideoCore

View File

@ -0,0 +1,43 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <compare>
#include "common/hash.h"
#include "video_core/regs_texturing.h"
namespace VideoCore {
struct SamplerParams {
using TextureConfig = Pica::TexturingRegs::TextureConfig;
TextureConfig::TextureFilter mag_filter;
TextureConfig::TextureFilter min_filter;
TextureConfig::TextureFilter mip_filter;
TextureConfig::WrapMode wrap_s;
TextureConfig::WrapMode wrap_t;
u32 border_color = 0;
u32 lod_min = 0;
u32 lod_max = 0;
s32 lod_bias = 0;
auto operator<=>(const SamplerParams&) const noexcept = default;
const u64 Hash() const {
return Common::ComputeHash64(this, sizeof(SamplerParams));
}
};
static_assert(std::has_unique_object_representations_v<SamplerParams>,
"SamplerParams is not suitable for hashing");
} // namespace VideoCore
namespace std {
template <>
struct hash<VideoCore::SamplerParams> {
std::size_t operator()(const VideoCore::SamplerParams& params) const noexcept {
return params.Hash();
}
};
} // namespace std

View File

@ -45,13 +45,16 @@ bool SurfaceBase::CanFill(const SurfaceParams& dest_surface, SurfaceInterval fil
}
bool SurfaceBase::CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const {
SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval);
const SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval);
ASSERT(subrect_params.GetInterval() == copy_interval);
if (CanSubRect(subrect_params))
return true;
if (CanFill(dest_surface, copy_interval))
if (CanSubRect(subrect_params)) {
return true;
}
if (CanFill(dest_surface, copy_interval)) {
return true;
}
return false;
}
@ -102,6 +105,23 @@ SurfaceInterval SurfaceBase::GetCopyableInterval(const SurfaceParams& params) co
return result;
}
Extent SurfaceBase::RealExtent(bool scaled) {
const bool is_custom = IsCustom();
u32 real_width = width;
u32 real_height = height;
if (is_custom) {
real_width = material->width;
real_height = material->height;
} else if (scaled) {
real_width = GetScaledWidth();
real_height = GetScaledHeight();
}
return Extent{
.width = real_width,
.height = real_height,
};
}
bool SurfaceBase::HasNormalMap() const noexcept {
return material && material->Map(MapType::Normal) != nullptr;
}

View File

@ -6,6 +6,7 @@
#include <boost/icl/interval_set.hpp>
#include "video_core/rasterizer_cache/surface_params.h"
#include "video_core/rasterizer_cache/utils.h"
namespace VideoCore {
@ -13,6 +14,15 @@ using SurfaceRegions = boost::icl::interval_set<PAddr, std::less, SurfaceInterva
struct Material;
enum class SurfaceFlagBits : u32 {
Registered = 1 << 0, ///< Surface is registed in the rasterizer cache.
Picked = 1 << 1, ///< Surface has been picked when searching for a match.
Tracked = 1 << 2, ///< Surface is part of a texture cube and should be tracked.
Custom = 1 << 3, ///< Surface texture has been replaced with a custom texture.
ShadowMap = 1 << 4, ///< Surface is used during shadow rendering.
};
DECLARE_ENUM_FLAG_OPERATORS(SurfaceFlagBits);
class SurfaceBase : public SurfaceParams {
public:
SurfaceBase(const SurfaceParams& params);
@ -30,19 +40,27 @@ public:
/// Returns the clear value used to validate another surface from this fill surface
ClearValue MakeClearValue(PAddr copy_addr, PixelFormat dst_format);
/// Returns the internal surface extent.
Extent RealExtent(bool scaled = true);
/// Returns true if the surface contains a custom material with a normal map.
bool HasNormalMap() const noexcept;
bool Overlaps(PAddr overlap_addr, size_t overlap_size) const noexcept {
const PAddr overlap_end = overlap_addr + static_cast<PAddr>(overlap_size);
return addr < overlap_end && overlap_addr < end;
}
u64 ModificationTick() const noexcept {
return modification_tick;
}
bool IsCustom() const noexcept {
return is_custom && custom_format != CustomPixelFormat::Invalid;
return True(flags & SurfaceFlagBits::Custom) && custom_format != CustomPixelFormat::Invalid;
}
bool IsRegionValid(SurfaceInterval interval) const {
return (invalid_regions.find(interval) == invalid_regions.end());
return invalid_regions.find(interval) == invalid_regions.end();
}
void MarkValid(SurfaceInterval interval) {
@ -65,8 +83,7 @@ private:
std::array<u8, 4> MakeFillBuffer(PAddr copy_addr);
public:
bool registered = false;
bool is_custom = false;
SurfaceFlagBits flags{};
const Material* material = nullptr;
SurfaceRegions invalid_regions;
u32 fill_size = 0;

View File

@ -15,14 +15,23 @@ bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const {
}
bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
const u32 level = LevelOf(sub_surface.addr);
return sub_surface.addr >= addr && sub_surface.end <= end &&
sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid &&
sub_surface.is_tiled == is_tiled &&
(sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
(sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) &&
(sub_surface.addr - mipmap_offsets[level]) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
(sub_surface.stride == (stride >> level) ||
sub_surface.height <= (is_tiled ? 8u : 1u)) &&
GetSubRect(sub_surface).right <= stride;
}
bool SurfaceParams::CanReinterpret(const SurfaceParams& other_surface) {
return other_surface.addr >= addr && other_surface.end <= end &&
pixel_format != PixelFormat::Invalid && GetFormatBpp() == other_surface.GetFormatBpp() &&
other_surface.is_tiled == is_tiled &&
(other_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0;
}
bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const {
return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format &&
addr <= expanded_surface.end && expanded_surface.addr <= end &&
@ -206,7 +215,9 @@ SurfaceInterval SurfaceParams::LevelInterval(u32 level) const {
}
u32 SurfaceParams::LevelOf(PAddr level_addr) const {
ASSERT(level_addr >= addr && level_addr <= end);
if (level_addr < addr || level_addr > end) {
return 0;
}
u32 level = levels - 1;
while (mipmap_offsets[level] > level_addr) {

View File

@ -4,11 +4,15 @@
#pragma once
#include <boost/icl/right_open_interval.hpp>
#include "common/math_util.h"
#include "video_core/custom_textures/custom_format.h"
#include "video_core/rasterizer_cache/utils.h"
#include "video_core/rasterizer_cache/pixel_format.h"
namespace VideoCore {
using SurfaceInterval = boost::icl::right_open_interval<PAddr>;
constexpr std::size_t MAX_PICA_LEVELS = 8;
class SurfaceParams {
@ -19,6 +23,9 @@ public:
/// Returns true if sub_surface is a subrect of params
bool CanSubRect(const SurfaceParams& sub_surface) const;
/// Returns true if other_surface can be used for reinterpretion.
bool CanReinterpret(const SurfaceParams& other_surface);
/// Returns true if params can be expanded to match expanded_surface
bool CanExpand(const SurfaceParams& expanded_surface) const;

View File

@ -4,28 +4,31 @@
#pragma once
#include <compare>
#include <span>
#include <boost/icl/right_open_interval.hpp>
#include "common/hash.h"
#include "common/math_util.h"
#include "common/slot_vector.h"
#include "common/vector_math.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/regs_texturing.h"
namespace VideoCore {
using SurfaceInterval = boost::icl::right_open_interval<PAddr>;
using SurfaceId = Common::SlotId;
using SamplerId = Common::SlotId;
/// Fake surface ID for null surfaces
constexpr SurfaceId NULL_SURFACE_ID{0};
/// Fake surface ID for null cube surfaces
constexpr SurfaceId NULL_SURFACE_CUBE_ID{1};
/// Fake sampler ID for null samplers
constexpr SamplerId NULL_SAMPLER_ID{0};
struct Offset {
constexpr auto operator<=>(const Offset&) const noexcept = default;
u32 x = 0;
u32 y = 0;
};
struct Extent {
constexpr auto operator<=>(const Extent&) const noexcept = default;
u32 width = 1;
u32 height = 1;
};
@ -71,9 +74,9 @@ struct BufferTextureCopy {
};
struct StagingData {
u32 size = 0;
std::span<u8> mapped{};
u64 buffer_offset = 0;
u32 size;
std::span<u8> mapped;
u64 buffer_offset;
};
struct TextureCubeConfig {

View File

@ -2,12 +2,16 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/scope_exit.h"
#include "common/settings.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/renderer_opengl/gl_blit_helper.h"
#include "video_core/renderer_opengl/gl_driver.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_runtime.h"
#include "video_core/host_shaders/format_reinterpreter/d24s8_to_rgba8_frag.h"
#include "video_core/host_shaders/format_reinterpreter/rgba4_to_rgb5a1_frag.h"
#include "video_core/host_shaders/full_screen_triangle_vert.h"
#include "video_core/host_shaders/texture_filtering/bicubic_frag.h"
#include "video_core/host_shaders/texture_filtering/nearest_neighbor_frag.h"
@ -49,8 +53,8 @@ OGLProgram CreateProgram(std::string_view frag) {
} // Anonymous namespace
BlitHelper::BlitHelper(TextureRuntime& runtime_)
: runtime{runtime_}, linear_sampler{CreateSampler(GL_LINEAR)},
BlitHelper::BlitHelper(const Driver& driver_)
: driver{driver_}, linear_sampler{CreateSampler(GL_LINEAR)},
nearest_sampler{CreateSampler(GL_NEAREST)}, bicubic_program{CreateProgram(
HostShaders::BICUBIC_FRAG)},
nearest_program{CreateProgram(HostShaders::NEAREST_NEIGHBOR_FRAG)},
@ -58,34 +62,104 @@ BlitHelper::BlitHelper(TextureRuntime& runtime_)
xbrz_program{CreateProgram(HostShaders::XBRZ_FREESCALE_FRAG)},
gradient_x_program{CreateProgram(HostShaders::X_GRADIENT_FRAG)},
gradient_y_program{CreateProgram(HostShaders::Y_GRADIENT_FRAG)},
refine_program{CreateProgram(HostShaders::REFINE_FRAG)} {
refine_program{CreateProgram(HostShaders::REFINE_FRAG)},
d24s8_to_rgba8{CreateProgram(HostShaders::D24S8_TO_RGBA8_FRAG)},
rgba4_to_rgb5a1{CreateProgram(HostShaders::RGBA4_TO_RGB5A1_FRAG)} {
vao.Create();
filter_fbo.Create();
draw_fbo.Create();
state.draw.vertex_array = vao.handle;
for (u32 i = 0; i < 3; i++) {
state.texture_units[i].sampler = i == 2 ? nearest_sampler.handle : linear_sampler.handle;
}
if (driver.IsOpenGLES()) {
LOG_INFO(Render_OpenGL,
"Texture views are unsupported, reinterpretation will do intermediate copy");
temp_tex.Create();
use_texture_view = false;
}
}
BlitHelper::~BlitHelper() = default;
bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) {
OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
state.texture_units[0].texture_2d = source.Handle();
state.texture_units[0].sampler = 0;
state.texture_units[1].sampler = 0;
if (use_texture_view) {
temp_tex.Create();
glActiveTexture(GL_TEXTURE1);
glTextureView(temp_tex.handle, GL_TEXTURE_2D, source.Handle(), GL_DEPTH24_STENCIL8, 0, 1, 0,
1);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
} else if (blit.src_rect.top > temp_rect.top || blit.src_rect.right > temp_rect.right) {
temp_tex.Release();
temp_tex.Create();
state.texture_units[1].texture_2d = temp_tex.handle;
state.Apply();
glActiveTexture(GL_TEXTURE1);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, blit.src_rect.right,
blit.src_rect.top);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
temp_rect = blit.src_rect;
}
state.texture_units[1].texture_2d = temp_tex.handle;
state.Apply();
glActiveTexture(GL_TEXTURE1);
if (!use_texture_view) {
glCopyImageSubData(source.Handle(), GL_TEXTURE_2D, 0, blit.src_rect.left,
blit.src_rect.bottom, 0, temp_tex.handle, GL_TEXTURE_2D, 0,
blit.src_rect.left, blit.src_rect.bottom, 0, blit.src_rect.GetWidth(),
blit.src_rect.GetHeight(), 1);
}
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX);
SetParams(d24s8_to_rgba8, source.RealExtent(), blit.src_rect);
Draw(d24s8_to_rgba8, dest.Handle(), draw_fbo.handle, 0, blit.dst_rect);
if (use_texture_view) {
temp_tex.Release();
}
// Restore the sampler handles
state.texture_units[0].sampler = linear_sampler.handle;
state.texture_units[1].sampler = linear_sampler.handle;
return true;
}
bool BlitHelper::ConvertRGBA4ToRGB5A1(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) {
OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
state.texture_units[0].texture_2d = source.Handle();
SetParams(rgba4_to_rgb5a1, source.RealExtent(), blit.src_rect);
Draw(rgba4_to_rgb5a1, dest.Handle(), draw_fbo.handle, 0, blit.dst_rect);
return true;
}
bool BlitHelper::Filter(Surface& surface, const VideoCore::TextureBlit& blit) {
// Filtering to depth stencil surfaces isn't supported.
if (surface.type == SurfaceType::Depth || surface.type == SurfaceType::DepthStencil) {
const auto filter = Settings::values.texture_filter.GetValue();
const bool is_depth =
surface.type == SurfaceType::Depth || surface.type == SurfaceType::DepthStencil;
if (filter == Settings::TextureFilter::None || is_depth) {
return false;
}
// Avoid filtering for mipmaps as the result often looks terrible.
if (blit.src_level != 0) {
return true;
}
const OpenGLState prev_state = OpenGLState::GetCurState();
state.texture_units[0].texture_2d = surface.Handle(0);
const auto filter{Settings::values.texture_filter.GetValue()};
switch (filter) {
case TextureFilter::None:
break;
case TextureFilter::Anime4K:
FilterAnime4K(surface, blit);
break;
@ -101,15 +175,19 @@ bool BlitHelper::Filter(Surface& surface, const VideoCore::TextureBlit& blit) {
case TextureFilter::xBRZ:
FilterXbrz(surface, blit);
break;
default:
LOG_ERROR(Render_OpenGL, "Unknown texture filter {}", filter);
}
prev_state.Apply();
return true;
}
void BlitHelper::FilterAnime4K(Surface& surface, const VideoCore::TextureBlit& blit) {
static constexpr u8 internal_scale_factor = 2;
const OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
const auto& tuple = surface.Tuple();
const u32 src_width = blit.src_rect.GetWidth();
const u32 src_height = blit.src_rect.GetHeight();
@ -149,7 +227,7 @@ void BlitHelper::FilterAnime4K(Surface& surface, const VideoCore::TextureBlit& b
Draw(gradient_y_program, LUMAD.tex.handle, LUMAD.fbo.handle, 0, temp_rect);
// refine pass
Draw(refine_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect);
Draw(refine_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect);
// These will have handles from the previous texture that was filtered, reset them to avoid
// binding invalid textures.
@ -160,25 +238,36 @@ void BlitHelper::FilterAnime4K(Surface& surface, const VideoCore::TextureBlit& b
}
void BlitHelper::FilterBicubic(Surface& surface, const VideoCore::TextureBlit& blit) {
SetParams(bicubic_program, surface.Extent(), blit.src_rect);
Draw(bicubic_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect);
const OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
state.texture_units[0].texture_2d = surface.Handle(0);
SetParams(bicubic_program, surface.RealExtent(false), blit.src_rect);
Draw(bicubic_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect);
}
void BlitHelper::FilterNearest(Surface& surface, const VideoCore::TextureBlit& blit) {
const OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
state.texture_units[2].texture_2d = surface.Handle(0);
SetParams(nearest_program, surface.Extent(), blit.src_rect);
Draw(nearest_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect);
SetParams(nearest_program, surface.RealExtent(false), blit.src_rect);
Draw(nearest_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect);
}
void BlitHelper::FilterScaleForce(Surface& surface, const VideoCore::TextureBlit& blit) {
SetParams(scale_force_program, surface.Extent(), blit.src_rect);
Draw(scale_force_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect);
const OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
state.texture_units[0].texture_2d = surface.Handle(0);
SetParams(scale_force_program, surface.RealExtent(false), blit.src_rect);
Draw(scale_force_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect);
}
void BlitHelper::FilterXbrz(Surface& surface, const VideoCore::TextureBlit& blit) {
const OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
state.texture_units[0].texture_2d = surface.Handle(0);
glProgramUniform1f(xbrz_program.handle, 2, static_cast<GLfloat>(surface.res_scale));
SetParams(xbrz_program, surface.Extent(), blit.src_rect);
Draw(xbrz_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect);
SetParams(xbrz_program, surface.RealExtent(false), blit.src_rect);
Draw(xbrz_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect);
}
void BlitHelper::SetParams(OGLProgram& program, const VideoCore::Extent& src_extent,
@ -206,7 +295,7 @@ void BlitHelper::Draw(OGLProgram& program, GLuint dst_tex, GLuint dst_fbo, u32 d
dst_level);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
glDrawArrays(GL_TRIANGLES, 0, 3);
}
} // namespace OpenGL

View File

@ -15,16 +15,20 @@ struct TextureBlit;
namespace OpenGL {
class TextureRuntime;
class Driver;
class Surface;
class BlitHelper {
public:
BlitHelper(TextureRuntime& runtime);
explicit BlitHelper(const Driver& driver);
~BlitHelper();
bool Filter(Surface& surface, const VideoCore::TextureBlit& blit);
bool ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit);
bool ConvertRGBA4ToRGB5A1(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit);
private:
void FilterAnime4K(Surface& surface, const VideoCore::TextureBlit& blit);
@ -43,10 +47,10 @@ private:
Common::Rectangle<u32> dst_rect);
private:
TextureRuntime& runtime;
const Driver& driver;
OGLVertexArray vao;
OpenGLState state;
OGLFramebuffer filter_fbo;
OGLFramebuffer draw_fbo;
OGLSampler linear_sampler;
OGLSampler nearest_sampler;
@ -57,6 +61,12 @@ private:
OGLProgram gradient_x_program;
OGLProgram gradient_y_program;
OGLProgram refine_program;
OGLProgram d24s8_to_rgba8;
OGLProgram rgba4_to_rgb5a1;
OGLTexture temp_tex;
Common::Rectangle<u32> temp_rect{};
bool use_texture_view{true};
};
} // namespace OpenGL

View File

@ -1,134 +0,0 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/scope_exit.h"
#include "video_core/renderer_opengl/gl_format_reinterpreter.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_runtime.h"
#include "video_core/host_shaders/format_reinterpreter/d24s8_to_rgba8_frag.h"
#include "video_core/host_shaders/format_reinterpreter/fullscreen_quad_vert.h"
#include "video_core/host_shaders/format_reinterpreter/rgba4_to_rgb5a1_frag.h"
namespace OpenGL {
RGBA4toRGB5A1::RGBA4toRGB5A1() {
program.Create(HostShaders::FULLSCREEN_QUAD_VERT, HostShaders::RGBA4_TO_RGB5A1_FRAG);
dst_size_loc = glGetUniformLocation(program.handle, "dst_size");
src_size_loc = glGetUniformLocation(program.handle, "src_size");
src_offset_loc = glGetUniformLocation(program.handle, "src_offset");
vao.Create();
}
void RGBA4toRGB5A1::Reinterpret(Surface& source, Common::Rectangle<u32> src_rect, Surface& dest,
Common::Rectangle<u32> dst_rect) {
OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
OpenGLState state;
state.texture_units[0].texture_2d = source.Handle();
state.draw.draw_framebuffer = draw_fbo.handle;
state.draw.shader_program = program.handle;
state.draw.vertex_array = vao.handle;
state.viewport = {static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.bottom),
static_cast<GLsizei>(dst_rect.GetWidth()),
static_cast<GLsizei>(dst_rect.GetHeight())};
state.Apply();
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dest.Handle(),
0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight());
glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight());
glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
ShaderD24S8toRGBA8::ShaderD24S8toRGBA8() {
program.Create(HostShaders::FULLSCREEN_QUAD_VERT, HostShaders::D24S8_TO_RGBA8_FRAG);
dst_size_loc = glGetUniformLocation(program.handle, "dst_size");
src_size_loc = glGetUniformLocation(program.handle, "src_size");
src_offset_loc = glGetUniformLocation(program.handle, "src_offset");
vao.Create();
auto state = OpenGLState::GetCurState();
auto cur_program = state.draw.shader_program;
state.draw.shader_program = program.handle;
state.Apply();
glUniform1i(glGetUniformLocation(program.handle, "stencil"), 1);
state.draw.shader_program = cur_program;
state.Apply();
// Nvidia seem to be the only one to support D24S8 views, at least on windows
// so for everyone else it will do an intermediate copy before running through the shader
std::string_view vendor{reinterpret_cast<const char*>(glGetString(GL_VENDOR))};
if (vendor.find("NVIDIA") != vendor.npos) {
use_texture_view = true;
} else {
LOG_INFO(Render_OpenGL,
"Texture views are unsupported, reinterpretation will do intermediate copy");
temp_tex.Create();
}
}
void ShaderD24S8toRGBA8::Reinterpret(Surface& source, Common::Rectangle<u32> src_rect,
Surface& dest, Common::Rectangle<u32> dst_rect) {
OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
OpenGLState state;
state.texture_units[0].texture_2d = source.Handle();
if (use_texture_view) {
temp_tex.Create();
glActiveTexture(GL_TEXTURE1);
glTextureView(temp_tex.handle, GL_TEXTURE_2D, source.Handle(), GL_DEPTH24_STENCIL8, 0, 1, 0,
1);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
} else if (src_rect.top > temp_rect.top || src_rect.right > temp_rect.right) {
temp_tex.Release();
temp_tex.Create();
state.texture_units[1].texture_2d = temp_tex.handle;
state.Apply();
glActiveTexture(GL_TEXTURE1);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, src_rect.right, src_rect.top);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
temp_rect = src_rect;
}
state.texture_units[1].texture_2d = temp_tex.handle;
state.draw.draw_framebuffer = draw_fbo.handle;
state.draw.shader_program = program.handle;
state.draw.vertex_array = vao.handle;
state.viewport = {static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.bottom),
static_cast<GLsizei>(dst_rect.GetWidth()),
static_cast<GLsizei>(dst_rect.GetHeight())};
state.Apply();
glActiveTexture(GL_TEXTURE1);
if (!use_texture_view) {
glCopyImageSubData(source.Handle(), GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
temp_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
src_rect.GetWidth(), src_rect.GetHeight(), 1);
}
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dest.Handle(),
0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight());
glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight());
glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
if (use_texture_view) {
temp_tex.Release();
}
}
} // namespace OpenGL

View File

@ -1,76 +0,0 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/math_util.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class Surface;
class FormatReinterpreterBase {
public:
FormatReinterpreterBase() {
read_fbo.Create();
draw_fbo.Create();
}
virtual ~FormatReinterpreterBase() = default;
virtual VideoCore::PixelFormat GetSourceFormat() const = 0;
virtual void Reinterpret(Surface& source, Common::Rectangle<u32> src_rect, Surface& dest,
Common::Rectangle<u32> dst_rect) = 0;
protected:
OGLFramebuffer read_fbo;
OGLFramebuffer draw_fbo;
};
using ReinterpreterList = std::vector<std::unique_ptr<FormatReinterpreterBase>>;
class RGBA4toRGB5A1 final : public FormatReinterpreterBase {
public:
RGBA4toRGB5A1();
VideoCore::PixelFormat GetSourceFormat() const override {
return VideoCore::PixelFormat::RGBA4;
}
void Reinterpret(Surface& source, Common::Rectangle<u32> src_rect, Surface& dest,
Common::Rectangle<u32> dst_rect) override;
private:
OGLProgram program;
GLint dst_size_loc{-1};
GLint src_size_loc{-1};
GLint src_offset_loc{-1};
OGLVertexArray vao;
};
class ShaderD24S8toRGBA8 final : public FormatReinterpreterBase {
public:
ShaderD24S8toRGBA8();
VideoCore::PixelFormat GetSourceFormat() const override {
return VideoCore::PixelFormat::D24S8;
}
void Reinterpret(Surface& source, Common::Rectangle<u32> src_rect, Surface& dest,
Common::Rectangle<u32> dst_rect) override;
private:
bool use_texture_view{};
OGLProgram program{};
GLint dst_size_loc{-1};
GLint src_size_loc{-1};
GLint src_offset_loc{-1};
OGLVertexArray vao{};
OGLTexture temp_tex{};
Common::Rectangle<u32> temp_rect{0, 0, 0, 0};
};
} // namespace OpenGL

View File

@ -25,7 +25,7 @@ MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128,
MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128));
MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128));
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
MICROPROFILE_DEFINE(OpenGL_Display, "OpenGL", "Display", MP_RGB(128, 128, 192));
using VideoCore::SurfaceType;
@ -97,16 +97,6 @@ RasterizerOpenGL::RasterizerOpenGL(Memory::MemorySystem& memory,
u8 framebuffer_data[4] = {0, 0, 0, 1};
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
// Create sampler objects
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
}
// Create cubemap texture and sampler objects
texture_cube_sampler.Create();
state.texture_cube_unit.sampler = texture_cube_sampler.sampler.handle;
// Generate VAO
sw_vao.Create();
hw_vao.Create();
@ -251,14 +241,14 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset,
}
}
PAddr data_addr =
const PAddr data_addr =
base_address + loader.data_offset + (vs_input_index_min * loader.byte_count);
u32 vertex_num = vs_input_index_max - vs_input_index_min + 1;
u32 data_size = loader.byte_count * vertex_num;
const u32 vertex_num = vs_input_index_max - vs_input_index_min + 1;
const u32 data_size = loader.byte_count * vertex_num;
res_cache.FlushRegion(data_addr, data_size, nullptr);
std::memcpy(array_ptr, VideoCore::g_memory->GetPhysicalPointer(data_addr), data_size);
res_cache.FlushRegion(data_addr, data_size);
std::memcpy(array_ptr, memory.GetPhysicalPointer(data_addr), data_size);
array_ptr += data_size;
buffer_offset += data_size;
@ -287,8 +277,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset,
bool RasterizerOpenGL::SetupVertexShader() {
MICROPROFILE_SCOPE(OpenGL_VS);
return shader_program_manager->UseProgrammableVertexShader(Pica::g_state.regs,
Pica::g_state.vs);
return shader_program_manager->UseProgrammableVertexShader(regs, Pica::g_state.vs);
}
bool RasterizerOpenGL::SetupGeometryShader() {
@ -400,8 +389,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
const Framebuffer framebuffer =
res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb);
const bool has_color = framebuffer.HasAttachment(SurfaceType::Color);
const bool has_depth_stencil = framebuffer.HasAttachment(SurfaceType::DepthStencil);
if (!has_color && (shadow_rendering || !has_depth_stencil)) {
if (!has_color && shadow_rendering) {
return true;
}
@ -520,8 +508,9 @@ void RasterizerOpenGL::SyncTextureUnits(const Framebuffer& framebuffer) {
if (texture_index == 0) {
switch (texture.config.type.Value()) {
case TextureType::Shadow2D: {
auto surface = res_cache.GetTextureSurface(texture);
state.image_shadow_texture_px = surface->Handle();
Surface& surface = res_cache.GetTextureSurface(texture);
surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap;
state.image_shadow_texture_px = surface.Handle();
continue;
}
case TextureType::ShadowCube: {
@ -538,22 +527,14 @@ void RasterizerOpenGL::SyncTextureUnits(const Framebuffer& framebuffer) {
}
// Sync texture unit sampler
texture_samplers[texture_index].SyncWithConfig(texture.config);
Sampler& sampler = res_cache.GetSampler(texture.config);
state.texture_units[texture_index].sampler = sampler.Handle();
// Bind the texture provided by the rasterizer cache
auto surface = res_cache.GetTextureSurface(texture);
if (!surface) {
// Can occur when texture addr is null or its memory is unmapped/invalid
// HACK: In this case, the correct behaviour for the PICA is to use the last
// rendered colour. But because this would be impractical to implement, the
// next best alternative is to use a clear texture, essentially skipping
// the geometry in question.
// For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn
// on the male character's face, which in the OpenGL default appear black.
state.texture_units[texture_index].texture_2d = default_texture;
} else if (!IsFeedbackLoop(texture_index, framebuffer, *surface)) {
BindMaterial(texture_index, *surface);
state.texture_units[texture_index].texture_2d = surface->Handle();
Surface& surface = res_cache.GetTextureSurface(texture);
if (!IsFeedbackLoop(texture_index, framebuffer, surface)) {
BindMaterial(texture_index, surface);
state.texture_units[texture_index].texture_2d = surface.Handle();
}
}
}
@ -570,8 +551,10 @@ void RasterizerOpenGL::BindShadowCube(const Pica::TexturingRegs::FullTextureConf
const u32 binding = static_cast<u32>(face);
info.physical_address = regs.texturing.GetCubePhysicalAddress(face);
auto surface = res_cache.GetTextureSurface(info);
state.image_shadow_texture[binding] = surface->Handle();
VideoCore::SurfaceId surface_id = res_cache.GetTextureSurface(info);
Surface& surface = res_cache.GetSurface(surface_id);
surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap;
state.image_shadow_texture[binding] = surface.Handle();
}
}
@ -589,10 +572,11 @@ void RasterizerOpenGL::BindTextureCube(const Pica::TexturingRegs::FullTextureCon
.format = texture.format,
};
auto surface = res_cache.GetTextureCube(config);
texture_cube_sampler.SyncWithConfig(texture.config);
Surface& surface = res_cache.GetTextureCube(config);
Sampler& sampler = res_cache.GetSampler(texture.config);
state.texture_cube_unit.texture_cube = surface->Handle();
state.texture_cube_unit.texture_cube = surface.Handle();
state.texture_cube_unit.sampler = sampler.Handle();
state.texture_units[0].texture_2d = 0;
}
@ -608,7 +592,7 @@ void RasterizerOpenGL::BindMaterial(u32 texture_index, Surface& surface) {
glBindSampler(unit.id, sampler);
};
const GLuint sampler = texture_samplers[texture_index].sampler.handle;
const GLuint sampler = state.texture_units[texture_index].sampler;
if (surface.HasNormalMap()) {
if (regs.lighting.disable) {
LOG_WARNING(Render_OpenGL, "Custom normal map used but scene has no light enabled");
@ -726,24 +710,20 @@ void RasterizerOpenGL::NotifyFixedFunctionPicaRegisterChanged(u32 id) {
}
void RasterizerOpenGL::FlushAll() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.FlushAll();
}
void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.FlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.InvalidateRegion(addr, size, nullptr);
res_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.FlushRegion(addr, size);
res_cache.InvalidateRegion(addr, size, nullptr);
res_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::ClearAll(bool flush) {
@ -768,7 +748,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
if (framebuffer_addr == 0) {
return false;
}
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
MICROPROFILE_SCOPE(OpenGL_Display);
VideoCore::SurfaceParams src_params;
src_params.addr = framebuffer_addr;
@ -779,85 +759,27 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
src_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.color_format);
src_params.UpdateParams();
auto [src_surface, src_rect] =
const auto [src_surface_id, src_rect] =
res_cache.GetSurfaceSubRect(src_params, VideoCore::ScaleMatch::Ignore, true);
if (src_surface == nullptr) {
if (!src_surface_id) {
return false;
}
const u32 scaled_width = src_surface->GetScaledWidth();
const u32 scaled_height = src_surface->GetScaledHeight();
const Surface& src_surface = res_cache.GetSurface(src_surface_id);
const u32 scaled_width = src_surface.GetScaledWidth();
const u32 scaled_height = src_surface.GetScaledHeight();
screen_info.display_texcoords = Common::Rectangle<float>(
(float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width,
(float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width);
screen_info.display_texture = src_surface->Handle();
screen_info.display_texture = src_surface.Handle();
return true;
}
void RasterizerOpenGL::SamplerInfo::Create() {
sampler.Create();
mag_filter = min_filter = mip_filter = TextureConfig::Linear;
wrap_s = wrap_t = TextureConfig::Repeat;
border_color = 0;
lod_min = lod_max = 0;
// default is 1000 and -1000
// Other attributes have correct defaults
glSamplerParameterf(sampler.handle, GL_TEXTURE_MAX_LOD, static_cast<float>(lod_max));
glSamplerParameterf(sampler.handle, GL_TEXTURE_MIN_LOD, static_cast<float>(lod_min));
}
void RasterizerOpenGL::SamplerInfo::SyncWithConfig(
const Pica::TexturingRegs::TextureConfig& config) {
GLuint s = sampler.handle;
if (mag_filter != config.mag_filter) {
mag_filter = config.mag_filter;
glSamplerParameteri(s, GL_TEXTURE_MAG_FILTER, PicaToGL::TextureMagFilterMode(mag_filter));
}
if (min_filter != config.min_filter || mip_filter != config.mip_filter) {
min_filter = config.min_filter;
mip_filter = config.mip_filter;
glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER,
PicaToGL::TextureMinFilterMode(min_filter, mip_filter));
}
if (wrap_s != config.wrap_s) {
wrap_s = config.wrap_s;
glSamplerParameteri(s, GL_TEXTURE_WRAP_S, PicaToGL::WrapMode(wrap_s));
}
if (wrap_t != config.wrap_t) {
wrap_t = config.wrap_t;
glSamplerParameteri(s, GL_TEXTURE_WRAP_T, PicaToGL::WrapMode(wrap_t));
}
if (wrap_s == TextureConfig::ClampToBorder || wrap_t == TextureConfig::ClampToBorder) {
if (border_color != config.border_color.raw) {
border_color = config.border_color.raw;
auto gl_color = PicaToGL::ColorRGBA8(border_color);
glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, gl_color.AsArray());
}
}
if (lod_min != config.lod.min_level) {
lod_min = config.lod.min_level;
glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, static_cast<float>(lod_min));
}
if (lod_max != config.lod.max_level) {
lod_max = config.lod.max_level;
glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, static_cast<float>(lod_max));
}
}
void RasterizerOpenGL::SyncClipEnabled() {
state.clip_distance[1] = Pica::g_state.regs.rasterizer.clip_enable != 0;
state.clip_distance[1] = regs.rasterizer.clip_enable != 0;
}
void RasterizerOpenGL::SyncCullMode() {
@ -885,7 +807,7 @@ void RasterizerOpenGL::SyncCullMode() {
}
void RasterizerOpenGL::SyncBlendEnabled() {
state.blend.enabled = (Pica::g_state.regs.framebuffer.output_merger.alphablend_enable == 1);
state.blend.enabled = (regs.framebuffer.output_merger.alphablend_enable == 1);
}
void RasterizerOpenGL::SyncBlendFuncs() {
@ -904,8 +826,7 @@ void RasterizerOpenGL::SyncBlendFuncs() {
}
void RasterizerOpenGL::SyncBlendColor() {
auto blend_color =
PicaToGL::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw);
auto blend_color = PicaToGL::ColorRGBA8(regs.framebuffer.output_merger.blend_const.raw);
state.blend.color.red = blend_color[0];
state.blend.color.green = blend_color[1];
state.blend.color.blue = blend_color[2];

View File

@ -55,28 +55,6 @@ private:
void SyncFixedState() override;
void NotifyFixedFunctionPicaRegisterChanged(u32 id) override;
struct SamplerInfo {
using TextureConfig = Pica::TexturingRegs::TextureConfig;
OGLSampler sampler;
/// Creates the sampler object, initializing its state so that it's in sync with the
/// SamplerInfo struct.
void Create();
/// Syncs the sampler object with the config, updating any necessary state.
void SyncWithConfig(const TextureConfig& config);
private:
TextureConfig::TextureFilter mag_filter;
TextureConfig::TextureFilter min_filter;
TextureConfig::TextureFilter mip_filter;
TextureConfig::WrapMode wrap_s;
TextureConfig::WrapMode wrap_t;
u32 border_color;
u32 lod_min;
u32 lod_max;
};
/// Syncs the clip enabled status to match the PICA register
void SyncClipEnabled();
@ -156,14 +134,13 @@ private:
OpenGLState state;
GLuint default_texture;
TextureRuntime runtime;
VideoCore::RasterizerCache res_cache;
RasterizerCache res_cache;
std::unique_ptr<ShaderProgramManager> shader_program_manager;
OGLVertexArray sw_vao; // VAO for software shader draw
OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw
std::array<bool, 16> hw_vao_enabled_attributes{};
std::array<SamplerInfo, 3> texture_samplers;
GLsizeiptr texture_buffer_size;
OGLStreamBuffer vertex_buffer;
OGLStreamBuffer uniform_buffer;
@ -175,8 +152,6 @@ private:
std::size_t uniform_size_aligned_vs;
std::size_t uniform_size_aligned_fs;
SamplerInfo texture_cube_sampler;
OGLTexture texture_buffer_lut_lf;
OGLTexture texture_buffer_lut_rg;
OGLTexture texture_buffer_lut_rgba;

View File

@ -0,0 +1,10 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/rasterizer_cache/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_texture_runtime.h"
namespace VideoCore {
template class RasterizerCache<OpenGL::Traits>;
} // namespace VideoCore

View File

@ -9,6 +9,7 @@
#include "video_core/renderer_opengl/gl_driver.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_runtime.h"
#include "video_core/renderer_opengl/pica_to_gl.h"
namespace OpenGL {
@ -16,6 +17,7 @@ namespace {
using VideoCore::MapType;
using VideoCore::PixelFormat;
using VideoCore::SurfaceFlagBits;
using VideoCore::SurfaceType;
using VideoCore::TextureType;
@ -116,20 +118,11 @@ struct FramebufferInfo {
} // Anonymous namespace
TextureRuntime::TextureRuntime(const Driver& driver_, VideoCore::RendererBase& renderer)
: driver{driver_}, blit_helper{*this} {
: driver{driver_}, blit_helper{driver} {
for (std::size_t i = 0; i < draw_fbos.size(); ++i) {
draw_fbos[i].Create();
read_fbos[i].Create();
}
auto add_reinterpreter = [this](PixelFormat dest,
std::unique_ptr<FormatReinterpreterBase>&& obj) {
const u32 dst_index = static_cast<u32>(dest);
return reinterpreters[dst_index].push_back(std::move(obj));
};
add_reinterpreter(PixelFormat::RGBA8, std::make_unique<ShaderD24S8toRGBA8>());
add_reinterpreter(PixelFormat::RGB5A1, std::make_unique<RGBA4toRGB5A1>());
}
TextureRuntime::~TextureRuntime() = default;
@ -241,14 +234,30 @@ Allocation TextureRuntime::Allocate(const VideoCore::SurfaceParams& params,
.height = params.height,
.levels = params.levels,
.res_scale = params.res_scale,
.is_custom = is_custom,
};
}
bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear) {
const auto prev_state = OpenGLState::GetCurState();
bool TextureRuntime::Reinterpret(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) {
const PixelFormat src_format = source.pixel_format;
const PixelFormat dst_format = dest.pixel_format;
ASSERT_MSG(src_format != dst_format, "Reinterpretation with the same format is invalid");
if (src_format == PixelFormat::D24S8 && dst_format == PixelFormat::RGBA8) {
blit_helper.ConvertDS24S8ToRGBA8(source, dest, blit);
} else if (src_format == PixelFormat::RGBA4 && dst_format == PixelFormat::RGB5A1) {
blit_helper.ConvertRGBA4ToRGB5A1(source, dest, blit);
} else {
LOG_WARNING(Render_OpenGL, "Unimplemented reinterpretation {} -> {}",
VideoCore::PixelFormatAsString(src_format),
VideoCore::PixelFormatAsString(dst_format));
return false;
}
return true;
}
// Setup scissor rectangle according to the clear rectangle
OpenGLState state;
bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear) {
OpenGLState state = OpenGLState::GetCurState();
state.scissor.enabled = true;
state.scissor.x = clear.texture_rect.left;
state.scissor.y = clear.texture_rect.bottom;
@ -257,42 +266,27 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
state.draw.draw_framebuffer = draw_fbos[FboIndex(surface.type)].handle;
state.Apply();
surface.Attach(GL_DRAW_FRAMEBUFFER, clear.texture_level, 0);
switch (surface.type) {
case SurfaceType::Color:
case SurfaceType::Texture:
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
surface.Handle(), clear.texture_level);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
state.color_mask.red_enabled = true;
state.color_mask.green_enabled = true;
state.color_mask.blue_enabled = true;
state.color_mask.alpha_enabled = true;
state.Apply();
glClearBufferfv(GL_COLOR, 0, clear.value.color.AsArray());
break;
case SurfaceType::Depth:
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
surface.Handle(), clear.texture_level);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
state.depth.write_mask = GL_TRUE;
state.Apply();
glClearBufferfv(GL_DEPTH, 0, &clear.value.depth);
break;
case SurfaceType::DepthStencil:
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
surface.Handle(), clear.texture_level);
state.depth.write_mask = GL_TRUE;
state.stencil.write_mask = -1;
state.Apply();
glClearBufferfi(GL_DEPTH_STENCIL, 0, clear.value.depth, clear.value.stencil);
break;
default:
@ -300,7 +294,6 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
return false;
}
prev_state.Apply();
return true;
}
@ -329,13 +322,12 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest,
source.Attach(GL_READ_FRAMEBUFFER, blit.src_level, blit.src_layer);
dest.Attach(GL_DRAW_FRAMEBUFFER, blit.dst_level, blit.dst_layer);
// TODO (wwylele): use GL_NEAREST for shadow map texture
// Note: shadow map is treated as RGBA8 format in PICA, as well as in the rasterizer cache, but
// doing linear intepolation componentwise would cause incorrect value. However, for a
// well-programmed game this code path should be rarely executed for shadow map with
// inconsistent scale.
// doing linear intepolation componentwise would cause incorrect value.
const GLbitfield buffer_mask = MakeBufferMask(source.type);
const GLenum filter = buffer_mask == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST;
const bool is_shadow_map = True(source.flags & SurfaceFlagBits::ShadowMap);
const GLenum filter =
buffer_mask == GL_COLOR_BUFFER_BIT && !is_shadow_map ? GL_LINEAR : GL_NEAREST;
glBlitFramebuffer(blit.src_rect.left, blit.src_rect.bottom, blit.src_rect.right,
blit.src_rect.top, blit.dst_rect.left, blit.dst_rect.bottom,
blit.dst_rect.right, blit.dst_rect.top, buffer_mask, filter);
@ -359,11 +351,6 @@ void TextureRuntime::GenerateMipmaps(Surface& surface) {
}
}
const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations(
PixelFormat dest_format) const {
return reinterpreters[static_cast<u32>(dest_format)];
}
Surface::Surface(TextureRuntime& runtime_, const VideoCore::SurfaceParams& params)
: SurfaceBase{params}, driver{&runtime_.GetDriver()}, runtime{&runtime_} {
if (pixel_format == PixelFormat::Invalid) {
@ -422,15 +409,19 @@ void Surface::UploadCustom(const VideoCore::Material* material, u32 level) {
glActiveTexture(GL_TEXTURE0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, width);
glBindTexture(GL_TEXTURE_2D, Handle(0));
if (VideoCore::IsCustomFormatCompressed(custom_format)) {
const GLsizei image_size = static_cast<GLsizei>(color->data.size());
glCompressedTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, width, height, tuple.format,
image_size, color->data.data());
} else {
glTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, width, height, tuple.format, tuple.type,
color->data.data());
}
const auto upload = [&](u32 index, VideoCore::CustomTexture* texture) {
glBindTexture(GL_TEXTURE_2D, Handle(index));
if (VideoCore::IsCustomFormatCompressed(custom_format)) {
const GLsizei image_size = static_cast<GLsizei>(texture->data.size());
glCompressedTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, width, height, tuple.format,
image_size, texture->data.data());
} else {
glTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, width, height, tuple.format, tuple.type,
texture->data.data());
}
};
upload(0, color);
const VideoCore::TextureBlit blit = {
.src_rect = filter_rect,
@ -444,15 +435,7 @@ void Surface::UploadCustom(const VideoCore::Material* material, u32 level) {
if (!texture) {
continue;
}
glBindTexture(GL_TEXTURE_2D, Handle(i + 1));
if (VideoCore::IsCustomFormatCompressed(custom_format)) {
const GLsizei image_size = static_cast<GLsizei>(texture->data.size());
glCompressedTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, width, height, tuple.format,
image_size, texture->data.data());
} else {
glTexSubImage2D(GL_TEXTURE_2D, level, 0, 0, width, height, tuple.format, tuple.type,
texture->data.data());
}
upload(i + 1, texture);
}
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
@ -572,7 +555,6 @@ bool Surface::Swap(const VideoCore::Material* mat) {
GetScaledWidth(), GetScaledHeight(), VideoCore::PixelFormatAsString(pixel_format),
addr, width, height, VideoCore::CustomPixelFormatAsString(format));
is_custom = true;
custom_format = format;
material = mat;
@ -614,13 +596,13 @@ HostTextureTag Surface::MakeTag() const noexcept {
.res_scale = alloc.res_scale,
.tuple = alloc.tuple,
.type = texture_type,
.is_custom = is_custom,
.is_custom = alloc.is_custom,
.has_normal = HasNormalMap(),
};
}
Framebuffer::Framebuffer(TextureRuntime& runtime, Surface* const color, u32 color_level,
Surface* const depth_stencil, u32 depth_level, const Pica::Regs& regs,
Framebuffer::Framebuffer(TextureRuntime& runtime, const Surface* color, u32 color_level,
const Surface* depth_stencil, u32 depth_level, const Pica::Regs& regs,
Common::Rectangle<u32> surfaces_rect)
: VideoCore::FramebufferBase{regs, color, color_level,
depth_stencil, depth_level, surfaces_rect} {
@ -692,4 +674,30 @@ Framebuffer::Framebuffer(TextureRuntime& runtime, Surface* const color, u32 colo
Framebuffer::~Framebuffer() = default;
Sampler::Sampler(TextureRuntime&, VideoCore::SamplerParams params) {
const GLenum mag_filter = PicaToGL::TextureMagFilterMode(params.mag_filter);
const GLenum min_filter = PicaToGL::TextureMinFilterMode(params.min_filter, params.mip_filter);
const GLenum wrap_s = PicaToGL::WrapMode(params.wrap_s);
const GLenum wrap_t = PicaToGL::WrapMode(params.wrap_t);
const Common::Vec4f gl_color = PicaToGL::ColorRGBA8(params.border_color);
const float lod_min = params.lod_min;
const float lod_max = params.lod_max;
sampler.Create();
const GLuint handle = sampler.handle;
glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag_filter);
glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min_filter);
glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, wrap_s);
glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, wrap_t);
glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, gl_color.AsArray());
glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, lod_min);
glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, lod_max);
}
Sampler::~Sampler() = default;
} // namespace OpenGL

View File

@ -5,9 +5,8 @@
#pragma once
#include "video_core/rasterizer_cache/framebuffer_base.h"
#include "video_core/rasterizer_cache/surface_base.h"
#include "video_core/rasterizer_cache/rasterizer_cache_base.h"
#include "video_core/renderer_opengl/gl_blit_helper.h"
#include "video_core/renderer_opengl/gl_format_reinterpreter.h"
namespace VideoCore {
struct Material;
@ -60,6 +59,7 @@ struct Allocation {
u32 height;
u32 levels;
u32 res_scale;
bool is_custom;
operator bool() const noexcept {
return textures[0].handle;
@ -76,7 +76,6 @@ class Driver;
class TextureRuntime {
friend class Surface;
friend class Framebuffer;
friend class BlitHelper;
public:
explicit TextureRuntime(const Driver& driver, VideoCore::RendererBase& renderer);
@ -95,12 +94,8 @@ public:
const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format) const;
const FormatTuple& GetFormatTuple(VideoCore::CustomPixelFormat pixel_format);
/// Takes back ownership of the allocation for recycling
void Recycle(const HostTextureTag tag, Allocation&& alloc);
/// Allocates a texture with the specified dimentions and format
Allocation Allocate(const VideoCore::SurfaceParams& params,
const VideoCore::Material* material = nullptr);
/// Attempts to reinterpret a rectangle of source to another rectangle of dest
bool Reinterpret(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit);
/// Fills the rectangle of the texture with the clear value provided
bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear);
@ -114,10 +109,14 @@ public:
/// Generates mipmaps for all the available levels of the texture
void GenerateMipmaps(Surface& surface);
/// Returns all source formats that support reinterpretation to the dest format
const ReinterpreterList& GetPossibleReinterpretations(VideoCore::PixelFormat dest_format) const;
private:
/// Takes back ownership of the allocation for recycling
void Recycle(const HostTextureTag tag, Allocation&& alloc);
/// Allocates a texture with the specified dimentions and format
Allocation Allocate(const VideoCore::SurfaceParams& params,
const VideoCore::Material* material = nullptr);
/// Returns the OpenGL driver class
const Driver& GetDriver() const {
return driver;
@ -127,7 +126,6 @@ private:
const Driver& driver;
BlitHelper blit_helper;
std::vector<u8> staging_buffer;
std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
std::unordered_multimap<HostTextureTag, Allocation, HostTextureTag::Hash> alloc_cache;
std::unordered_map<u64, OGLFramebuffer, Common::IdentityHash<u64>> framebuffer_cache;
std::array<OGLFramebuffer, 3> draw_fbos;
@ -145,24 +143,14 @@ public:
Surface(Surface&& o) noexcept = default;
Surface& operator=(Surface&& o) noexcept = default;
/// Returns the surface image handle at the provided index.
GLuint Handle(u32 index = 1) const noexcept {
[[nodiscard]] GLuint Handle(u32 index = 1) const noexcept {
return alloc.handles[index];
}
/// Returns the tuple of the surface allocation.
const FormatTuple& Tuple() const noexcept {
[[nodiscard]] const FormatTuple& Tuple() const noexcept {
return alloc.tuple;
}
/// Returns the extent of the underlying surface allocation
VideoCore::Extent Extent() const noexcept {
return {
.width = alloc.width,
.height = alloc.height,
};
}
/// Uploads pixel data in staging to a rectangle region of the surface texture
void Upload(const VideoCore::BufferTextureCopy& upload, const VideoCore::StagingData& staging);
@ -201,8 +189,8 @@ private:
class Framebuffer : public VideoCore::FramebufferBase {
public:
explicit Framebuffer(TextureRuntime& runtime, Surface* const color, u32 color_level,
Surface* const depth_stencil, u32 depth_level, const Pica::Regs& regs,
explicit Framebuffer(TextureRuntime& runtime, const Surface* color, u32 color_level,
const Surface* depth_stencil, u32 depth_level, const Pica::Regs& regs,
Common::Rectangle<u32> surfaces_rect);
~Framebuffer();
@ -223,4 +211,32 @@ private:
GLuint handle{};
};
class Sampler {
public:
explicit Sampler(TextureRuntime&, VideoCore::SamplerParams params);
~Sampler();
Sampler(const Sampler&) = delete;
Sampler& operator=(const Sampler&) = delete;
Sampler(Sampler&&) = default;
Sampler& operator=(Sampler&&) = default;
[[nodiscard]] GLuint Handle() const noexcept {
return sampler.handle;
}
private:
OGLSampler sampler;
};
struct Traits {
using Runtime = OpenGL::TextureRuntime;
using Sampler = OpenGL::Sampler;
using Surface = OpenGL::Surface;
using Framebuffer = OpenGL::Framebuffer;
};
using RasterizerCache = VideoCore::RasterizerCache<Traits>;
} // namespace OpenGL