rasterizer_cache: Fixes to (unaligned) texture downloads (#6697)

* rasterizer_cache: Header cleanup

* gl_texture_runtime: Fix incorrect stride in single scanline downloads

* texture_codec: Fix unaligned texture downloads
This commit is contained in:
GPUCode 2023-07-18 17:31:31 +03:00 committed by GitHub
parent 700c00f021
commit e783b0d4a9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 105 additions and 75 deletions

View File

@ -42,11 +42,13 @@ add_library(video_core STATIC
rasterizer_cache/rasterizer_cache.h
rasterizer_cache/rasterizer_cache_base.h
rasterizer_cache/sampler_params.h
rasterizer_cache/slot_id.h
rasterizer_cache/surface_base.cpp
rasterizer_cache/surface_base.h
rasterizer_cache/surface_params.cpp
rasterizer_cache/surface_params.h
rasterizer_cache/texture_codec.h
rasterizer_cache/texture_cube.h
rasterizer_cache/utils.cpp
rasterizer_cache/utils.h
renderer_opengl/frame_dumper_opengl.cpp

View File

@ -14,6 +14,7 @@
#include "core/memory.h"
#include "video_core/custom_textures/custom_tex_manager.h"
#include "video_core/rasterizer_cache/rasterizer_cache_base.h"
#include "video_core/rasterizer_cache/surface_base.h"
#include "video_core/regs.h"
#include "video_core/renderer_base.h"
#include "video_core/texture/texture_decode.h"
@ -1212,7 +1213,7 @@ void RasterizerCache<T>::ClearAll(bool flush) {
// Remove the whole cache without really looking at it.
cached_pages -= flush_interval;
dirty_regions -= SurfaceInterval(0x0, 0xFFFFFFFF);
dirty_regions.clear();
page_table.clear();
remove_surfaces.clear();
}

View File

@ -11,7 +11,8 @@
#include <boost/icl/interval_map.hpp>
#include <tsl/robin_map.h>
#include "video_core/rasterizer_cache/sampler_params.h"
#include "video_core/rasterizer_cache/surface_base.h"
#include "video_core/rasterizer_cache/surface_params.h"
#include "video_core/rasterizer_cache/texture_cube.h"
namespace Memory {
class MemorySystem;
@ -70,12 +71,6 @@ class RasterizerCache {
SurfaceId depth_id;
};
struct TextureCube {
SurfaceId surface_id;
std::array<SurfaceId, 6> face_ids;
std::array<u64, 6> ticks;
};
public:
explicit RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager,
Runtime& runtime, Pica::Regs& regs, RendererBase& renderer);

View File

@ -0,0 +1,21 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/slot_vector.h"
#pragma once
namespace VideoCore {
using SurfaceId = Common::SlotId;
using SamplerId = Common::SlotId;
/// Fake surface ID for null surfaces
constexpr SurfaceId NULL_SURFACE_ID{0};
/// Fake surface ID for null cube surfaces
constexpr SurfaceId NULL_SURFACE_CUBE_ID{1};
/// Fake sampler ID for null samplers
constexpr SamplerId NULL_SAMPLER_ID{0};
} // namespace VideoCore

View File

@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <bit>
#include <span>
@ -264,6 +265,7 @@ static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 en
const u32 aligned_down_start_offset = Common::AlignDown(start_offset, tile_size);
const u32 aligned_start_offset = Common::AlignUp(start_offset, tile_size);
const u32 aligned_end_offset = Common::AlignDown(end_offset, tile_size);
const u32 begin_pixel_index = aligned_down_start_offset * 8 / GetFormatBpp(format);
ASSERT(!morton_to_linear ||
(aligned_start_offset == start_offset && aligned_end_offset == end_offset));
@ -271,12 +273,12 @@ static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 en
// In OpenGL the texture origin is in the bottom left corner as opposed to other
// APIs that have it at the top left. To avoid flipping texture coordinates in
// the shader we read/write the linear buffer from the bottom up
u32 linear_offset = ((height - 8) * width) * aligned_bytes_per_pixel;
u32 x = (begin_pixel_index % (width * 8)) / 8;
u32 y = (begin_pixel_index / (width * 8)) * 8;
u32 linear_offset = ((height - 8 - y) * width + x) * aligned_bytes_per_pixel;
u32 tiled_offset = 0;
u32 x = 0;
u32 y = 0;
const auto LinearNextTile = [&] {
const auto linear_next_tile = [&] {
x = (x + 8) % width;
linear_offset += 8 * aligned_bytes_per_pixel;
if (!x) {
@ -300,7 +302,7 @@ static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 en
std::min(aligned_start_offset, end_offset) - start_offset);
tiled_offset += aligned_start_offset - start_offset;
LinearNextTile();
linear_next_tile();
}
// If the copy spans multiple tiles, copy the fully aligned tiles in between.
@ -313,7 +315,7 @@ static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 en
auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size);
MortonCopyTile<morton_to_linear, format, converted>(width, tiled_data, linear_data);
tiled_offset += tile_size;
LinearNextTile();
linear_next_tile();
}
}

View File

@ -0,0 +1,52 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/hash.h"
#include "video_core/rasterizer_cache/slot_id.h"
#include "video_core/regs_texturing.h"
namespace VideoCore {
struct TextureCube {
SurfaceId surface_id;
std::array<SurfaceId, 6> face_ids;
std::array<u64, 6> ticks;
};
struct TextureCubeConfig {
PAddr px;
PAddr nx;
PAddr py;
PAddr ny;
PAddr pz;
PAddr nz;
u32 width;
u32 levels;
Pica::TexturingRegs::TextureFormat format;
bool operator==(const TextureCubeConfig& rhs) const {
return std::memcmp(this, &rhs, sizeof(TextureCubeConfig)) == 0;
}
bool operator!=(const TextureCubeConfig& rhs) const {
return std::memcmp(this, &rhs, sizeof(TextureCubeConfig)) != 0;
}
const u64 Hash() const {
return Common::ComputeHash64(this, sizeof(TextureCubeConfig));
}
};
} // namespace VideoCore
namespace std {
template <>
struct hash<VideoCore::TextureCubeConfig> {
std::size_t operator()(const VideoCore::TextureCubeConfig& config) const noexcept {
return config.Hash();
}
};
} // namespace std

View File

@ -5,24 +5,11 @@
#pragma once
#include <span>
#include "common/hash.h"
#include "common/math_util.h"
#include "common/slot_vector.h"
#include "common/vector_math.h"
#include "video_core/regs_texturing.h"
namespace VideoCore {
using SurfaceId = Common::SlotId;
using SamplerId = Common::SlotId;
/// Fake surface ID for null surfaces
constexpr SurfaceId NULL_SURFACE_ID{0};
/// Fake surface ID for null cube surfaces
constexpr SurfaceId NULL_SURFACE_CUBE_ID{1};
/// Fake sampler ID for null samplers
constexpr SamplerId NULL_SAMPLER_ID{0};
struct Offset {
u32 x = 0;
u32 y = 0;
@ -79,30 +66,6 @@ struct StagingData {
std::span<u8> mapped;
};
struct TextureCubeConfig {
PAddr px;
PAddr nx;
PAddr py;
PAddr ny;
PAddr pz;
PAddr nz;
u32 width;
u32 levels;
Pica::TexturingRegs::TextureFormat format;
bool operator==(const TextureCubeConfig& rhs) const {
return std::memcmp(this, &rhs, sizeof(TextureCubeConfig)) == 0;
}
bool operator!=(const TextureCubeConfig& rhs) const {
return std::memcmp(this, &rhs, sizeof(TextureCubeConfig)) != 0;
}
const u64 Hash() const {
return Common::ComputeHash64(this, sizeof(TextureCubeConfig));
}
};
class SurfaceParams;
u32 MipLevels(u32 width, u32 height, u32 max_level);
@ -134,12 +97,3 @@ void DecodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr en
std::span<u8> source, std::span<u8> dest, bool convert = false);
} // namespace VideoCore
namespace std {
template <>
struct hash<VideoCore::TextureCubeConfig> {
std::size_t operator()(const VideoCore::TextureCubeConfig& config) const noexcept {
return config.Hash();
}
};
} // namespace std

View File

@ -484,20 +484,19 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download,
bool Surface::DownloadWithoutFbo(const VideoCore::BufferTextureCopy& download,
const VideoCore::StagingData& staging) {
const bool is_full_download = download.texture_rect == GetRect();
const bool has_sub_image = driver->HasArbGetTextureSubImage();
if (driver->IsOpenGLES() || (!is_full_download && !has_sub_image)) {
if (driver->IsOpenGLES()) {
return false;
}
const GLuint old_tex = OpenGLState::GetCurState().texture_units[0].texture_2d;
const auto& tuple = runtime->GetFormatTuple(pixel_format);
const u32 unscaled_width = download.texture_rect.GetWidth();
glActiveTexture(GL_TEXTURE0);
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride));
glPixelStorei(GL_PACK_ROW_LENGTH, unscaled_width);
SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
// Prefer glGetTextureSubImage in most cases since it's the fastest and most convenient option
const bool is_full_download = download.texture_rect == GetRect();
const bool has_sub_image = driver->HasArbGetTextureSubImage();
if (has_sub_image) {
const GLsizei buf_size = static_cast<GLsizei>(staging.mapped.size());
glGetTextureSubImage(Handle(0), download.texture_level, download.texture_rect.left,
@ -505,16 +504,19 @@ bool Surface::DownloadWithoutFbo(const VideoCore::BufferTextureCopy& download,
download.texture_rect.GetHeight(), 1, tuple.format, tuple.type,
buf_size, staging.mapped.data());
return true;
}
} else if (is_full_download) {
// This should only trigger for full texture downloads in oldish intel drivers
// that only support up to 4.3
glBindTexture(GL_TEXTURE_2D, Handle(0));
OpenGLState state = OpenGLState::GetCurState();
state.texture_units[0].texture_2d = Handle(0);
state.Apply();
glGetTexImage(GL_TEXTURE_2D, download.texture_level, tuple.format, tuple.type,
staging.mapped.data());
glBindTexture(GL_TEXTURE_2D, old_tex);
return true;
}
return false;
}
void Surface::Attach(GLenum target, u32 level, u32 layer, bool scaled) {

View File

@ -6,6 +6,7 @@
#include "video_core/rasterizer_cache/framebuffer_base.h"
#include "video_core/rasterizer_cache/rasterizer_cache_base.h"
#include "video_core/rasterizer_cache/surface_base.h"
#include "video_core/renderer_opengl/gl_blit_helper.h"
namespace VideoCore {