mirror of
https://github.com/citra-emu/citra.git
synced 2024-11-25 23:20:15 +00:00
Integrated the texture submodule into gl backend
This commit is contained in:
parent
1a4c8d510d
commit
30f0d1dbf4
@ -407,6 +407,11 @@ inline float Vec3<float>::Normalize() {
|
||||
return length;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline unsigned int Vec3<unsigned char>::ToRGB() const {
|
||||
return (z << 16) | (y << 8) | x;
|
||||
}
|
||||
|
||||
typedef Vec3<float> Vec3f;
|
||||
|
||||
template <typename T>
|
||||
@ -611,6 +616,11 @@ public:
|
||||
#undef _DEFINE_SWIZZLER3
|
||||
};
|
||||
|
||||
template <>
|
||||
inline unsigned int Vec4<unsigned char>::ToRGBA() const {
|
||||
return (w << 24) | (z << 16) | (y << 8) | x;
|
||||
}
|
||||
|
||||
template <typename T, typename V>
|
||||
Vec4<decltype(V{} * T{})> operator*(const V& f, const Vec4<T>& vec) {
|
||||
return MakeVec(f * vec.x, f * vec.y, f * vec.z, f * vec.w);
|
||||
|
@ -1,14 +1,14 @@
|
||||
set(SRCS
|
||||
texture/internal/morton.cpp
|
||||
texture/internal/etc1.cpp
|
||||
texture/codec.cpp
|
||||
texture/internal/codecs.cpp
|
||||
renderer_opengl/gl_rasterizer.cpp
|
||||
renderer_opengl/gl_rasterizer_cache.cpp
|
||||
renderer_opengl/gl_shader_gen.cpp
|
||||
renderer_opengl/gl_shader_util.cpp
|
||||
renderer_opengl/gl_state.cpp
|
||||
renderer_opengl/renderer_opengl.cpp
|
||||
texture/internal/morton.cpp
|
||||
texture/internal/etc1.cpp
|
||||
texture/internal/codecs.cpp
|
||||
texture/codec.cpp
|
||||
debug_utils/debug_utils.cpp
|
||||
clipper.cpp
|
||||
command_processor.cpp
|
||||
@ -25,12 +25,6 @@ set(SRCS
|
||||
|
||||
set(HEADERS
|
||||
debug_utils/debug_utils.h
|
||||
texture/internal/texture_utils.h
|
||||
texture/internal/morton.h
|
||||
texture/internal/etc1.h
|
||||
texture/codec.h
|
||||
texture/formats.h
|
||||
texture/internal/codecs.h
|
||||
renderer_opengl/gl_rasterizer.h
|
||||
renderer_opengl/gl_rasterizer_cache.h
|
||||
renderer_opengl/gl_resource_manager.h
|
||||
@ -39,6 +33,12 @@ set(HEADERS
|
||||
renderer_opengl/gl_state.h
|
||||
renderer_opengl/pica_to_gl.h
|
||||
renderer_opengl/renderer_opengl.h
|
||||
texture/internal/texture_utils.h
|
||||
texture/internal/morton.h
|
||||
texture/internal/etc1.h
|
||||
texture/internal/codecs.h
|
||||
texture/codec.h
|
||||
texture/formats.h
|
||||
clipper.h
|
||||
command_processor.h
|
||||
gpu_debugger.h
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||
#include "video_core/renderer_opengl/pica_to_gl.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/texture/formats.h"
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
|
||||
@ -716,7 +717,6 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
|
||||
|
||||
bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||
using PixelFormat = CachedSurface::PixelFormat;
|
||||
using SurfaceType = CachedSurface::SurfaceType;
|
||||
|
||||
CachedSurface src_params;
|
||||
@ -728,7 +728,7 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe
|
||||
// the image, and it allows for smaller texture cache lookup rectangles.
|
||||
src_params.height = config.output_height;
|
||||
src_params.is_tiled = !config.input_linear;
|
||||
src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format);
|
||||
src_params.pixel_format = Pica::Texture::Format::FromGPUPixelFormat(config.input_format);
|
||||
|
||||
CachedSurface dst_params;
|
||||
dst_params.addr = config.GetPhysicalOutputAddress();
|
||||
@ -737,7 +737,7 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe
|
||||
dst_params.height =
|
||||
config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value();
|
||||
dst_params.is_tiled = config.input_linear != config.dont_swizzle;
|
||||
dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format);
|
||||
dst_params.pixel_format = Pica::Texture::Format::FromGPUPixelFormat(config.output_format);
|
||||
|
||||
MathUtil::Rectangle<int> src_rect;
|
||||
CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
|
||||
@ -776,7 +776,7 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe
|
||||
}
|
||||
|
||||
u32 dst_size = dst_params.width * dst_params.height *
|
||||
CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8;
|
||||
Pica::Texture::Format::GetBpp(dst_params.pixel_format) / 8;
|
||||
dst_surface->dirty = true;
|
||||
res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true);
|
||||
return true;
|
||||
@ -789,7 +789,6 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon
|
||||
|
||||
bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||
using PixelFormat = CachedSurface::PixelFormat;
|
||||
using SurfaceType = CachedSurface::SurfaceType;
|
||||
|
||||
CachedSurface* dst_surface = res_cache.TryGetFillSurface(config);
|
||||
@ -824,7 +823,7 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
|
||||
|
||||
if (config.fill_24bit) {
|
||||
switch (dst_surface->pixel_format) {
|
||||
case PixelFormat::RGB8:
|
||||
case Pica::Texture::Format::Type::RGB8:
|
||||
color_values[0] = config.value_24bit_r / 255.0f;
|
||||
color_values[1] = config.value_24bit_g / 255.0f;
|
||||
color_values[2] = config.value_24bit_b / 255.0f;
|
||||
@ -836,7 +835,7 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
|
||||
u32 value = config.value_32bit;
|
||||
|
||||
switch (dst_surface->pixel_format) {
|
||||
case PixelFormat::RGBA8:
|
||||
case Pica::Texture::Format::Type::RGBA8:
|
||||
color_values[0] = (value >> 24) / 255.0f;
|
||||
color_values[1] = ((value >> 16) & 0xFF) / 255.0f;
|
||||
color_values[2] = ((value >> 8) & 0xFF) / 255.0f;
|
||||
@ -850,34 +849,34 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
|
||||
Math::Vec4<u8> color;
|
||||
|
||||
switch (dst_surface->pixel_format) {
|
||||
case PixelFormat::RGBA8:
|
||||
case Pica::Texture::Format::Type::RGBA8:
|
||||
color_values[0] = (value_16bit >> 8) / 255.0f;
|
||||
color_values[1] = (value_16bit & 0xFF) / 255.0f;
|
||||
color_values[2] = color_values[0];
|
||||
color_values[3] = color_values[1];
|
||||
break;
|
||||
case PixelFormat::RGB5A1:
|
||||
case Pica::Texture::Format::Type::RGB5A1:
|
||||
color = Color::DecodeRGB5A1((const u8*)&value_16bit);
|
||||
color_values[0] = color[0] / 31.0f;
|
||||
color_values[1] = color[1] / 31.0f;
|
||||
color_values[2] = color[2] / 31.0f;
|
||||
color_values[3] = color[3];
|
||||
break;
|
||||
case PixelFormat::RGB565:
|
||||
case Pica::Texture::Format::Type::RGB565:
|
||||
color = Color::DecodeRGB565((const u8*)&value_16bit);
|
||||
color_values[0] = color[0] / 31.0f;
|
||||
color_values[1] = color[1] / 63.0f;
|
||||
color_values[2] = color[2] / 31.0f;
|
||||
break;
|
||||
case PixelFormat::RGBA4:
|
||||
case Pica::Texture::Format::Type::RGBA4:
|
||||
color = Color::DecodeRGBA4((const u8*)&value_16bit);
|
||||
color_values[0] = color[0] / 15.0f;
|
||||
color_values[1] = color[1] / 15.0f;
|
||||
color_values[2] = color[2] / 15.0f;
|
||||
color_values[3] = color[3] / 15.0f;
|
||||
break;
|
||||
case PixelFormat::IA8:
|
||||
case PixelFormat::RG8:
|
||||
case Pica::Texture::Format::Type::IA8:
|
||||
case Pica::Texture::Format::Type::RG8:
|
||||
color_values[0] = (value_16bit >> 8) / 255.0f;
|
||||
color_values[1] = (value_16bit & 0xFF) / 255.0f;
|
||||
break;
|
||||
@ -899,9 +898,9 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
|
||||
|
||||
GLfloat value_float;
|
||||
if (dst_surface->pixel_format == CachedSurface::PixelFormat::D16) {
|
||||
if (dst_surface->pixel_format == Pica::Texture::Format::Type::D16) {
|
||||
value_float = config.value_32bit / 65535.0f; // 2^16 - 1
|
||||
} else if (dst_surface->pixel_format == CachedSurface::PixelFormat::D24) {
|
||||
} else if (dst_surface->pixel_format == Pica::Texture::Format::Type::D24) {
|
||||
value_float = config.value_32bit / 16777215.0f; // 2^24 - 1
|
||||
}
|
||||
|
||||
@ -945,7 +944,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
|
||||
src_params.height = config.height;
|
||||
src_params.pixel_stride = pixel_stride;
|
||||
src_params.is_tiled = false;
|
||||
src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format);
|
||||
src_params.pixel_format = Pica::Texture::Format::FromGPUPixelFormat(config.color_format);
|
||||
|
||||
MathUtil::Rectangle<int> src_rect;
|
||||
CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
|
||||
|
@ -21,6 +21,8 @@
|
||||
#include "video_core/pica_state.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
#include "video_core/texture/codec.h"
|
||||
#include "video_core/texture/formats.h"
|
||||
#include "video_core/utils.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
@ -30,21 +32,48 @@ struct FormatTuple {
|
||||
GLenum type;
|
||||
};
|
||||
|
||||
static const std::array<FormatTuple, 5> fb_format_tuples = {{
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8
|
||||
{GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8
|
||||
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1
|
||||
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565
|
||||
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4
|
||||
}};
|
||||
|
||||
static const std::array<FormatTuple, 4> depth_format_tuples = {{
|
||||
static const std::array<FormatTuple, 18> format_tuples = {{
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8
|
||||
{GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8
|
||||
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1
|
||||
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565
|
||||
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // IA8
|
||||
{GL_RG8, GL_RG8, GL_UNSIGNED_BYTE}, // RG8
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // I8
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // A8
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // IA4
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // I4
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // A4
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // ETC1
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // ETC1A4
|
||||
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16
|
||||
{},
|
||||
{GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8
|
||||
}};
|
||||
|
||||
static const std::array<bool, 18> native_format = {
|
||||
true, // RGBA8
|
||||
true, // RGB8
|
||||
true, // RGB5A1
|
||||
true, // RGB565
|
||||
true, // RGBA4
|
||||
false, // IA8
|
||||
true, // RG8
|
||||
false, // I8
|
||||
false, // A8
|
||||
false, // IA4
|
||||
false, // I4
|
||||
false, // A4
|
||||
false, // ETC1
|
||||
false, // ETC1A4
|
||||
true, // D16
|
||||
false,
|
||||
false, // D24
|
||||
false, // D24S8
|
||||
};
|
||||
|
||||
RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
|
||||
transfer_framebuffers[0].Create();
|
||||
transfer_framebuffers[1].Create();
|
||||
@ -54,55 +83,6 @@ RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
|
||||
FlushAll();
|
||||
}
|
||||
|
||||
static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height,
|
||||
u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data,
|
||||
u8* gl_data, bool morton_to_gl) {
|
||||
using PixelFormat = CachedSurface::PixelFormat;
|
||||
|
||||
u8* data_ptrs[2];
|
||||
u32 depth_stencil_shifts[2] = {24, 8};
|
||||
|
||||
if (morton_to_gl) {
|
||||
std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]);
|
||||
}
|
||||
|
||||
if (pixel_format == PixelFormat::D24S8) {
|
||||
for (unsigned y = 0; y < height; ++y) {
|
||||
for (unsigned x = 0; x < width; ++x) {
|
||||
const u32 coarse_y = y & ~7;
|
||||
u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
|
||||
coarse_y * width * bytes_per_pixel;
|
||||
u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
|
||||
|
||||
data_ptrs[morton_to_gl] = morton_data + morton_offset;
|
||||
data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
|
||||
|
||||
// Swap depth and stencil value ordering since 3DS does not match OpenGL
|
||||
u32 depth_stencil;
|
||||
memcpy(&depth_stencil, data_ptrs[1], sizeof(u32));
|
||||
depth_stencil = (depth_stencil << depth_stencil_shifts[0]) |
|
||||
(depth_stencil >> depth_stencil_shifts[1]);
|
||||
|
||||
memcpy(data_ptrs[0], &depth_stencil, sizeof(u32));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (unsigned y = 0; y < height; ++y) {
|
||||
for (unsigned x = 0; x < width; ++x) {
|
||||
const u32 coarse_y = y & ~7;
|
||||
u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
|
||||
coarse_y * width * bytes_per_pixel;
|
||||
u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
|
||||
|
||||
data_ptrs[morton_to_gl] = morton_data + morton_offset;
|
||||
data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
|
||||
|
||||
memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex,
|
||||
CachedSurface::SurfaceType type,
|
||||
const MathUtil::Rectangle<int>& src_rect,
|
||||
@ -184,7 +164,7 @@ bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface,
|
||||
return true;
|
||||
}
|
||||
|
||||
static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format,
|
||||
static void AllocateSurfaceTexture(GLuint texture, Pica::Texture::Format::Type pixel_format,
|
||||
u32 width, u32 height) {
|
||||
// Allocate an uninitialized texture of appropriate size and format for the surface
|
||||
using SurfaceType = CachedSurface::SurfaceType;
|
||||
@ -199,17 +179,8 @@ static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pi
|
||||
|
||||
SurfaceType type = CachedSurface::GetFormatType(pixel_format);
|
||||
|
||||
FormatTuple tuple;
|
||||
if (type == SurfaceType::Color) {
|
||||
ASSERT((size_t)pixel_format < fb_format_tuples.size());
|
||||
tuple = fb_format_tuples[(unsigned int)pixel_format];
|
||||
} else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
|
||||
size_t tuple_idx = (size_t)pixel_format - 14;
|
||||
ASSERT(tuple_idx < depth_format_tuples.size());
|
||||
tuple = depth_format_tuples[tuple_idx];
|
||||
} else {
|
||||
tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE};
|
||||
}
|
||||
ASSERT((size_t)pixel_format < format_tuples.size());
|
||||
FormatTuple tuple = format_tuples[(unsigned int)pixel_format];
|
||||
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, tuple.format,
|
||||
tuple.type, nullptr);
|
||||
@ -227,7 +198,7 @@ static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pi
|
||||
MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192));
|
||||
CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale,
|
||||
bool load_if_create) {
|
||||
using PixelFormat = CachedSurface::PixelFormat;
|
||||
using PixelFormat = Pica::Texture::Format::Type;
|
||||
using SurfaceType = CachedSurface::SurfaceType;
|
||||
|
||||
if (params.addr == 0) {
|
||||
@ -235,7 +206,7 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
|
||||
}
|
||||
|
||||
u32 params_size =
|
||||
params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
|
||||
params.width * params.height * Pica::Texture::Format::GetBpp(params.pixel_format) / 8;
|
||||
|
||||
// Check for an exact match in existing surfaces
|
||||
CachedSurface* best_exact_surface = nullptr;
|
||||
@ -320,72 +291,36 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
|
||||
|
||||
if (!new_surface->is_tiled) {
|
||||
// TODO: Ensure this will always be a color format, not a depth or other format
|
||||
ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size());
|
||||
const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format];
|
||||
// ASSERT((size_t)new_surface->pixel_format < format_tuples.size());
|
||||
const FormatTuple& tuple = format_tuples[(unsigned int)params.pixel_format];
|
||||
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->pixel_stride);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
|
||||
tuple.format, tuple.type, texture_src_data);
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
|
||||
} else {
|
||||
SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format);
|
||||
if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
|
||||
FormatTuple tuple;
|
||||
if ((size_t)params.pixel_format < fb_format_tuples.size()) {
|
||||
tuple = fb_format_tuples[(unsigned int)params.pixel_format];
|
||||
} else {
|
||||
// Texture
|
||||
tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE};
|
||||
}
|
||||
|
||||
std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height);
|
||||
|
||||
Pica::DebugUtils::TextureInfo tex_info;
|
||||
tex_info.width = params.width;
|
||||
tex_info.height = params.height;
|
||||
tex_info.stride =
|
||||
params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
|
||||
tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format;
|
||||
tex_info.physical_address = params.addr;
|
||||
|
||||
for (unsigned y = 0; y < params.height; ++y) {
|
||||
for (unsigned x = 0; x < params.width; ++x) {
|
||||
tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture(
|
||||
texture_src_data, x, params.height - 1 - y, tex_info);
|
||||
}
|
||||
}
|
||||
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height,
|
||||
0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data());
|
||||
} else {
|
||||
// Depth/Stencil formats need special treatment since they aren't sampleable using
|
||||
// LookupTexture and can't use RGBA format
|
||||
size_t tuple_idx = (size_t)params.pixel_format - 14;
|
||||
ASSERT(tuple_idx < depth_format_tuples.size());
|
||||
const FormatTuple& tuple = depth_format_tuples[tuple_idx];
|
||||
|
||||
u32 bytes_per_pixel = CachedSurface::GetFormatBpp(params.pixel_format) / 8;
|
||||
|
||||
// OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
|
||||
bool use_4bpp = (params.pixel_format == PixelFormat::D24);
|
||||
|
||||
u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
|
||||
|
||||
std::vector<u8> temp_fb_depth_buffer(params.width * params.height *
|
||||
gl_bytes_per_pixel);
|
||||
|
||||
u8* temp_fb_depth_buffer_ptr =
|
||||
use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data();
|
||||
|
||||
MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel,
|
||||
gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr,
|
||||
true);
|
||||
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height,
|
||||
0, tuple.format, tuple.type, temp_fb_depth_buffer.data());
|
||||
}
|
||||
const FormatTuple& tuple = format_tuples[(unsigned int)params.pixel_format];
|
||||
std::unique_ptr<Pica::Texture::Codec> tmp = Pica::Texture::CodecFactory::build(
|
||||
// clang-format off
|
||||
params.pixel_format, texture_src_data, params.width, params.height
|
||||
// clang-format on
|
||||
);
|
||||
Pica::Texture::Codec* codec = tmp.get();
|
||||
codec->configTiling(true, 8); // change 8 for 32 in case the mage is tiled
|
||||
// on blocks of 32x32
|
||||
codec->configRGBATransform(!native_format[(unsigned int)params.pixel_format]);
|
||||
codec->decode();
|
||||
std::unique_ptr<u8[]> decoded_texture = codec->transferInternalBuffer();
|
||||
u32 bytes = codec->getInternalBytesPerPixel();
|
||||
if (bytes == 3)
|
||||
bytes = 1;
|
||||
else if (bytes != 2)
|
||||
bytes = 4;
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, bytes);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
|
||||
tuple.format, tuple.type, decoded_texture.get());
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
|
||||
}
|
||||
|
||||
// If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface
|
||||
if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) {
|
||||
OGLTexture scaled_texture;
|
||||
@ -430,7 +365,7 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params
|
||||
}
|
||||
|
||||
u32 total_pixels = params.width * params.height;
|
||||
u32 params_size = total_pixels * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
|
||||
u32 params_size = total_pixels * Pica::Texture::Format::GetBpp(params.pixel_format) / 8;
|
||||
|
||||
// Attempt to find encompassing surfaces
|
||||
CachedSurface* best_subrect_surface = nullptr;
|
||||
@ -467,7 +402,7 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params
|
||||
// Return the best subrect surface if found
|
||||
if (best_subrect_surface != nullptr) {
|
||||
unsigned int bytes_per_pixel =
|
||||
(CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8);
|
||||
(Pica::Texture::Format::GetBpp(best_subrect_surface->pixel_format) / 8);
|
||||
|
||||
int x0, y0;
|
||||
|
||||
@ -521,7 +456,7 @@ CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(
|
||||
params.width = info.width;
|
||||
params.height = info.height;
|
||||
params.is_tiled = true;
|
||||
params.pixel_format = CachedSurface::PixelFormatFromTextureFormat(info.format);
|
||||
params.pixel_format = Pica::Texture::Format::FromTextureFormat(info.format);
|
||||
return GetSurface(params, false, true);
|
||||
}
|
||||
|
||||
@ -574,10 +509,10 @@ RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfi
|
||||
}
|
||||
|
||||
color_params.addr = config.GetColorBufferPhysicalAddress();
|
||||
color_params.pixel_format = CachedSurface::PixelFormatFromColorFormat(config.color_format);
|
||||
color_params.pixel_format = Pica::Texture::Format::FromColorFormat(config.color_format);
|
||||
|
||||
depth_params.addr = config.GetDepthBufferPhysicalAddress();
|
||||
depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format);
|
||||
depth_params.pixel_format = Pica::Texture::Format::FromDepthFormat(config.depth_format);
|
||||
|
||||
MathUtil::Rectangle<int> color_rect;
|
||||
CachedSurface* color_surface =
|
||||
@ -648,9 +583,9 @@ CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryF
|
||||
CachedSurface* surface = it2->get();
|
||||
|
||||
if (surface->addr == config.GetStartAddress() &&
|
||||
CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value &&
|
||||
Pica::Texture::Format::GetBpp(surface->pixel_format) == bits_per_value &&
|
||||
(surface->width * surface->height *
|
||||
CachedSurface::GetFormatBpp(surface->pixel_format) / 8) ==
|
||||
Pica::Texture::Format::GetBpp(surface->pixel_format) / 8) ==
|
||||
(config.GetEndAddress() - config.GetStartAddress())) {
|
||||
return surface;
|
||||
}
|
||||
@ -662,7 +597,6 @@ CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryF
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_SurfaceDownload, "OpenGL", "Surface Download", MP_RGB(128, 192, 64));
|
||||
void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
|
||||
using PixelFormat = CachedSurface::PixelFormat;
|
||||
using SurfaceType = CachedSurface::SurfaceType;
|
||||
|
||||
if (!surface->dirty) {
|
||||
@ -703,53 +637,32 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
|
||||
|
||||
if (!surface->is_tiled) {
|
||||
// TODO: Ensure this will always be a color format, not a depth or other format
|
||||
ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
|
||||
const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
|
||||
// ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
|
||||
const FormatTuple& tuple = format_tuples[(unsigned int)surface->pixel_format];
|
||||
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->pixel_stride);
|
||||
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer);
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||
} else {
|
||||
SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format);
|
||||
if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
|
||||
ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
|
||||
const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
|
||||
const FormatTuple& tuple = format_tuples[(u32)surface->pixel_format];
|
||||
u32 bytes_per_pixel = Pica::Texture::Format::GetBpp(surface->pixel_format) / 8;
|
||||
if (!native_format[(u32)surface->pixel_format])
|
||||
bytes_per_pixel = 4;
|
||||
std::vector<u8> temp_gl_buffer(surface->width * surface->height * bytes_per_pixel);
|
||||
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
|
||||
|
||||
u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
|
||||
|
||||
std::vector<u8> temp_gl_buffer(surface->width * surface->height * bytes_per_pixel);
|
||||
|
||||
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
|
||||
|
||||
// Directly copy pixels. Internal OpenGL color formats are consistent so no conversion
|
||||
// is necessary.
|
||||
MortonCopyPixels(surface->pixel_format, surface->width, surface->height,
|
||||
bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(),
|
||||
false);
|
||||
} else {
|
||||
// Depth/Stencil formats need special treatment since they aren't sampleable using
|
||||
// LookupTexture and can't use RGBA format
|
||||
size_t tuple_idx = (size_t)surface->pixel_format - 14;
|
||||
ASSERT(tuple_idx < depth_format_tuples.size());
|
||||
const FormatTuple& tuple = depth_format_tuples[tuple_idx];
|
||||
|
||||
u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
|
||||
|
||||
// OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
|
||||
bool use_4bpp = (surface->pixel_format == PixelFormat::D24);
|
||||
|
||||
u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
|
||||
|
||||
std::vector<u8> temp_gl_buffer(surface->width * surface->height * gl_bytes_per_pixel);
|
||||
|
||||
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
|
||||
|
||||
u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data();
|
||||
|
||||
MortonCopyPixels(surface->pixel_format, surface->width, surface->height,
|
||||
bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr,
|
||||
false);
|
||||
}
|
||||
std::unique_ptr<Pica::Texture::Codec> tmp = Pica::Texture::CodecFactory::build(
|
||||
// clang-format off
|
||||
surface->pixel_format, temp_gl_buffer.data(), surface->width, surface->height
|
||||
// clang-format on
|
||||
);
|
||||
Pica::Texture::Codec* codec = tmp.get();
|
||||
codec->configTiling(true, 8); // change 8 for 32 in case the mage is tiled
|
||||
// on blocks of 32x32
|
||||
codec->configRGBATransform(!native_format[(u32)surface->pixel_format]);
|
||||
codec->configPreConvertedRGBA(!native_format[(u32)surface->pixel_format]);
|
||||
codec->setExternalBuffer(dst_buffer);
|
||||
codec->encode();
|
||||
}
|
||||
|
||||
surface->dirty = false;
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/pica.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/texture/formats.h"
|
||||
|
||||
namespace MathUtil {
|
||||
template <class T>
|
||||
@ -27,33 +28,6 @@ struct CachedSurface;
|
||||
using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<CachedSurface>>>;
|
||||
|
||||
struct CachedSurface {
|
||||
enum class PixelFormat {
|
||||
// First 5 formats are shared between textures and color buffers
|
||||
RGBA8 = 0,
|
||||
RGB8 = 1,
|
||||
RGB5A1 = 2,
|
||||
RGB565 = 3,
|
||||
RGBA4 = 4,
|
||||
|
||||
// Texture-only formats
|
||||
IA8 = 5,
|
||||
RG8 = 6,
|
||||
I8 = 7,
|
||||
A8 = 8,
|
||||
IA4 = 9,
|
||||
I4 = 10,
|
||||
A4 = 11,
|
||||
ETC1 = 12,
|
||||
ETC1A4 = 13,
|
||||
|
||||
// Depth buffer-only formats
|
||||
D16 = 14,
|
||||
// gap
|
||||
D24 = 16,
|
||||
D24S8 = 17,
|
||||
|
||||
Invalid = 255,
|
||||
};
|
||||
|
||||
enum class SurfaceType {
|
||||
Color = 0,
|
||||
@ -63,58 +37,8 @@ struct CachedSurface {
|
||||
Invalid = 4,
|
||||
};
|
||||
|
||||
static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) {
|
||||
static const std::array<unsigned int, 18> bpp_table = {
|
||||
32, // RGBA8
|
||||
24, // RGB8
|
||||
16, // RGB5A1
|
||||
16, // RGB565
|
||||
16, // RGBA4
|
||||
16, // IA8
|
||||
16, // RG8
|
||||
8, // I8
|
||||
8, // A8
|
||||
8, // IA4
|
||||
4, // I4
|
||||
4, // A4
|
||||
4, // ETC1
|
||||
8, // ETC1A4
|
||||
16, // D16
|
||||
0,
|
||||
24, // D24
|
||||
32, // D24S8
|
||||
};
|
||||
|
||||
ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table));
|
||||
return bpp_table[(unsigned int)format];
|
||||
}
|
||||
|
||||
static PixelFormat PixelFormatFromTextureFormat(Pica::Regs::TextureFormat format) {
|
||||
return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid;
|
||||
}
|
||||
|
||||
static PixelFormat PixelFormatFromColorFormat(Pica::Regs::ColorFormat format) {
|
||||
return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
|
||||
}
|
||||
|
||||
static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) {
|
||||
return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14)
|
||||
: PixelFormat::Invalid;
|
||||
}
|
||||
|
||||
static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) {
|
||||
switch (format) {
|
||||
// RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat
|
||||
case GPU::Regs::PixelFormat::RGB565:
|
||||
return PixelFormat::RGB565;
|
||||
case GPU::Regs::PixelFormat::RGB5A1:
|
||||
return PixelFormat::RGB5A1;
|
||||
default:
|
||||
return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
|
||||
}
|
||||
}
|
||||
|
||||
static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
|
||||
static bool CheckFormatsBlittable(Pica::Texture::Format::Type pixel_format_a,
|
||||
Pica::Texture::Format::Type pixel_format_b) {
|
||||
SurfaceType a_type = GetFormatType(pixel_format_a);
|
||||
SurfaceType b_type = GetFormatType(pixel_format_b);
|
||||
|
||||
@ -134,7 +58,7 @@ struct CachedSurface {
|
||||
return false;
|
||||
}
|
||||
|
||||
static SurfaceType GetFormatType(PixelFormat pixel_format) {
|
||||
static SurfaceType GetFormatType(Pica::Texture::Format::Type pixel_format) {
|
||||
if ((unsigned int)pixel_format < 5) {
|
||||
return SurfaceType::Color;
|
||||
}
|
||||
@ -143,11 +67,12 @@ struct CachedSurface {
|
||||
return SurfaceType::Texture;
|
||||
}
|
||||
|
||||
if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) {
|
||||
if (pixel_format == Pica::Texture::Format::Type::D16 ||
|
||||
pixel_format == Pica::Texture::Format::Type::D24) {
|
||||
return SurfaceType::Depth;
|
||||
}
|
||||
|
||||
if (pixel_format == PixelFormat::D24S8) {
|
||||
if (pixel_format == Pica::Texture::Format::Type::D24S8) {
|
||||
return SurfaceType::DepthStencil;
|
||||
}
|
||||
|
||||
@ -177,7 +102,7 @@ struct CachedSurface {
|
||||
float res_scale_height = 1.f;
|
||||
|
||||
bool is_tiled;
|
||||
PixelFormat pixel_format;
|
||||
Pica::Texture::Format::Type pixel_format;
|
||||
bool dirty;
|
||||
};
|
||||
|
||||
|
@ -1,6 +1,10 @@
|
||||
#include "codec.h"
|
||||
#include "internal\codecs.h"
|
||||
#include "internal\morton.h"
|
||||
#include "common/color.h"
|
||||
#include "common/math_util.h"
|
||||
#include "common/swap.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "video_core/texture/codec.h"
|
||||
#include "video_core/texture/internal/codecs.h"
|
||||
#include "video_core/texture/internal/morton.h"
|
||||
|
||||
namespace Pica {
|
||||
namespace Texture {
|
||||
@ -17,18 +21,6 @@ void Codec::encode() {
|
||||
this->encode_morton_pass();
|
||||
};
|
||||
|
||||
void Codec::setSize() {
|
||||
this->start_nibbles_size = format_size;
|
||||
};
|
||||
|
||||
inline void Codec::setWidth(u32 width) {
|
||||
this->width = width;
|
||||
}
|
||||
|
||||
inline void Codec::setHeight(u32 height) {
|
||||
this->height = height;
|
||||
}
|
||||
|
||||
void Codec::configTiling(bool active, u32 tiling) {
|
||||
this->morton = true;
|
||||
this->morton_pass_tiling = tiling;
|
||||
@ -63,15 +55,16 @@ bool Codec::invalid() {
|
||||
}
|
||||
|
||||
void Codec::init(bool decode) {
|
||||
this->setSize();
|
||||
this->expected_nibbles_size = this->start_nibbles_size;
|
||||
if (decode) {
|
||||
if (this->raw_RGBA)
|
||||
this->expected_nibbles_size = 8;
|
||||
} else {
|
||||
this->start_nibbles_size = this->format_size;
|
||||
if (this->raw_RGBA)
|
||||
this->expected_nibbles_size = this->format_size;
|
||||
if (this->preconverted)
|
||||
this->start_nibbles_size = 8;
|
||||
if (!this->raw_RGBA)
|
||||
this->expected_nibbles_size = this->start_nibbles_size;
|
||||
}
|
||||
if (!this->external_result_buffer) {
|
||||
size_t buff_size = this->width * this->height * this->expected_nibbles_size / 2;
|
||||
@ -80,7 +73,7 @@ void Codec::init(bool decode) {
|
||||
}
|
||||
}
|
||||
|
||||
inline void Codec::decode_morton_pass() {
|
||||
void Codec::decode_morton_pass() {
|
||||
if (this->morton_pass_tiling == 8)
|
||||
Decoders::Morton_8x8(this->target_buffer, this->passing_buffer, this->width, this->height,
|
||||
this->start_nibbles_size * 4);
|
||||
@ -89,7 +82,7 @@ inline void Codec::decode_morton_pass() {
|
||||
this->start_nibbles_size * 4);
|
||||
}
|
||||
|
||||
inline void Codec::encode_morton_pass() {
|
||||
void Codec::encode_morton_pass() {
|
||||
if (this->morton_pass_tiling == 8)
|
||||
Encoders::Morton_8x8(this->target_buffer, this->passing_buffer, this->width, this->height,
|
||||
this->start_nibbles_size * 4);
|
||||
@ -98,41 +91,41 @@ inline void Codec::encode_morton_pass() {
|
||||
this->start_nibbles_size * 4);
|
||||
}
|
||||
|
||||
std::unique_ptr<Codec> CodecFactory::build(Format format, u8* target, u32 width, u32 height) {
|
||||
std::unique_ptr<Codec> CodecFactory::build(Format::Type format, u8* target, u32 width, u32 height) {
|
||||
switch (format) {
|
||||
case Format::RGBA8:
|
||||
case Format::Type::RGBA8:
|
||||
return std::make_unique<RGBACodec>(target, width, height);
|
||||
case Format::RGB8:
|
||||
case Format::Type::RGB8:
|
||||
return std::make_unique<RGBCodec>(target, width, height);
|
||||
case Format::RGB5A1:
|
||||
case Format::Type::RGB5A1:
|
||||
return std::make_unique<RGB5A1Codec>(target, width, height);
|
||||
case Format::RGB565:
|
||||
case Format::Type::RGB565:
|
||||
return std::make_unique<RGB565Codec>(target, width, height);
|
||||
case Format::RGBA4:
|
||||
case Format::Type::RGBA4:
|
||||
return std::make_unique<RGBA4Codec>(target, width, height);
|
||||
case Format::RG8:
|
||||
case Format::Type::RG8:
|
||||
return std::make_unique<RG8Codec>(target, width, height);
|
||||
case Format::IA8:
|
||||
case Format::Type::IA8:
|
||||
return std::make_unique<IA8Codec>(target, width, height);
|
||||
case Format::I8:
|
||||
case Format::Type::I8:
|
||||
return std::make_unique<I8Codec>(target, width, height);
|
||||
case Format::A8:
|
||||
case Format::Type::A8:
|
||||
return std::make_unique<A8Codec>(target, width, height);
|
||||
case Format::IA4:
|
||||
case Format::Type::IA4:
|
||||
return std::make_unique<IA4Codec>(target, width, height);
|
||||
case Format::I4:
|
||||
case Format::Type::I4:
|
||||
return std::make_unique<I4Codec>(target, width, height);
|
||||
case Format::A4:
|
||||
case Format::Type::A4:
|
||||
return std::make_unique<A4Codec>(target, width, height);
|
||||
case Format::ETC1:
|
||||
case Format::Type::ETC1:
|
||||
return std::make_unique<ETC1Codec>(target, width, height);
|
||||
case Format::ETC1A4:
|
||||
case Format::Type::ETC1A4:
|
||||
return std::make_unique<ETC1A4Codec>(target, width, height);
|
||||
case Format::D16:
|
||||
case Format::Type::D16:
|
||||
return std::make_unique<D16Codec>(target, width, height);
|
||||
case Format::D24:
|
||||
case Format::Type::D24:
|
||||
return std::make_unique<D24Codec>(target, width, height);
|
||||
case Format::D24S8:
|
||||
case Format::Type::D24S8:
|
||||
return std::make_unique<D24S8Codec>(target, width, height);
|
||||
default:
|
||||
return nullptr;
|
||||
|
@ -1,9 +1,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include "common/common_types.h"
|
||||
#include "formats.h"
|
||||
|
||||
#pragma once
|
||||
#include "video_core/texture/formats.h"
|
||||
|
||||
namespace Pica {
|
||||
|
||||
@ -16,18 +17,23 @@ public:
|
||||
this->target_buffer = target;
|
||||
this->setWidth(width);
|
||||
this->setHeight(height);
|
||||
this->setSize();
|
||||
this->expected_nibbles_size = this->start_nibbles_size;
|
||||
}
|
||||
virtual ~Codec() {}
|
||||
|
||||
virtual void decode();
|
||||
virtual void encode();
|
||||
|
||||
void setSize();
|
||||
inline void setWidth(u32 width) {
|
||||
this->width = width;
|
||||
}
|
||||
|
||||
void setWidth(u32 width);
|
||||
void setHeight(u32 height);
|
||||
inline void setHeight(u32 height) {
|
||||
this->height = height;
|
||||
}
|
||||
|
||||
inline u32 getInternalBytesPerPixel() {
|
||||
return this->expected_nibbles_size / 2;
|
||||
}
|
||||
|
||||
// Common Passes
|
||||
void configTiling(bool active, u32 tiling);
|
||||
@ -54,7 +60,10 @@ protected:
|
||||
|
||||
u32 start_nibbles_size;
|
||||
u32 expected_nibbles_size;
|
||||
const u32 format_size = 8;
|
||||
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 8;
|
||||
};
|
||||
|
||||
u8* target_buffer; // Initial read buffer
|
||||
u8* passing_buffer; // pointer aliasing: Used and modified by passes
|
||||
@ -65,12 +74,12 @@ protected:
|
||||
|
||||
typedef Codec super;
|
||||
|
||||
inline void decode_morton_pass();
|
||||
inline void encode_morton_pass();
|
||||
void decode_morton_pass();
|
||||
void encode_morton_pass();
|
||||
};
|
||||
|
||||
namespace CodecFactory {
|
||||
std::unique_ptr<Codec> build(Pica::Texture::Format format, u8* target, u32 width, u32 height);
|
||||
std::unique_ptr<Codec> build(Pica::Texture::Format::Type format, u8* target, u32 width, u32 height);
|
||||
};
|
||||
|
||||
} // Texture
|
||||
|
@ -1,36 +1,96 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include "common/assert.h"
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/pica.h"
|
||||
|
||||
namespace Pica {
|
||||
|
||||
namespace Texture {
|
||||
|
||||
enum class Format {
|
||||
// First 5 formats are shared between textures and color buffers
|
||||
RGBA8 = 0,
|
||||
RGB8 = 1,
|
||||
RGB5A1 = 2,
|
||||
RGB565 = 3,
|
||||
RGBA4 = 4,
|
||||
struct Format {
|
||||
|
||||
// Texture-only formats
|
||||
IA8 = 5,
|
||||
RG8 = 6,
|
||||
I8 = 7,
|
||||
A8 = 8,
|
||||
IA4 = 9,
|
||||
I4 = 10,
|
||||
A4 = 11,
|
||||
ETC1 = 12,
|
||||
ETC1A4 = 13,
|
||||
enum class Type {
|
||||
// First 5 formats are shared between textures and color buffers
|
||||
RGBA8 = 0,
|
||||
RGB8 = 1,
|
||||
RGB5A1 = 2,
|
||||
RGB565 = 3,
|
||||
RGBA4 = 4,
|
||||
|
||||
// Depth buffer-only formats
|
||||
D16 = 14,
|
||||
// gap
|
||||
D24 = 16,
|
||||
D24S8 = 17,
|
||||
// Texture-only formats
|
||||
IA8 = 5,
|
||||
RG8 = 6,
|
||||
I8 = 7,
|
||||
A8 = 8,
|
||||
IA4 = 9,
|
||||
I4 = 10,
|
||||
A4 = 11,
|
||||
ETC1 = 12,
|
||||
ETC1A4 = 13,
|
||||
|
||||
Invalid = 255,
|
||||
};
|
||||
// Depth buffer-only formats
|
||||
D16 = 14,
|
||||
// gap
|
||||
D24 = 16,
|
||||
D24S8 = 17,
|
||||
|
||||
Invalid = 255,
|
||||
};
|
||||
|
||||
static u32 GetBpp(Type format) {
|
||||
static const std::array<unsigned int, 18> bpp_table = {
|
||||
32, // RGBA8
|
||||
24, // RGB8
|
||||
16, // RGB5A1
|
||||
16, // RGB565
|
||||
16, // RGBA4
|
||||
16, // IA8
|
||||
16, // RG8
|
||||
8, // I8
|
||||
8, // A8
|
||||
8, // IA4
|
||||
4, // I4
|
||||
4, // A4
|
||||
4, // ETC1
|
||||
8, // ETC1A4
|
||||
16, // D16
|
||||
0,
|
||||
24, // D24
|
||||
32, // D24S8
|
||||
};
|
||||
|
||||
ASSERT((u32)format < ARRAY_SIZE(bpp_table));
|
||||
return bpp_table[(u32)format];
|
||||
}
|
||||
|
||||
static Type FromTextureFormat(Regs::TextureFormat format) {
|
||||
return ((unsigned int)format < 14) ? (Type)format : Type::Invalid;
|
||||
}
|
||||
|
||||
static Type FromColorFormat(Regs::ColorFormat format) {
|
||||
return ((unsigned int)format < 5) ? (Type)format : Type::Invalid;
|
||||
}
|
||||
|
||||
static Type FromDepthFormat(Regs::DepthFormat format) {
|
||||
return ((unsigned int)format < 4) ? (Type)((unsigned int)format + 14) : Type::Invalid;
|
||||
}
|
||||
|
||||
static Type FromGPUPixelFormat(GPU::Regs::PixelFormat format) {
|
||||
switch (format) {
|
||||
// RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat
|
||||
case GPU::Regs::PixelFormat::RGB565:
|
||||
return Type::RGB565;
|
||||
case GPU::Regs::PixelFormat::RGB5A1:
|
||||
return Type::RGB5A1;
|
||||
default:
|
||||
return ((unsigned int)format < 5) ? (Type)format : Type::Invalid;
|
||||
}
|
||||
}
|
||||
|
||||
}; // Format
|
||||
|
||||
} // Texture
|
||||
|
||||
|
@ -1,7 +1,19 @@
|
||||
#include "codecs.h"
|
||||
#include "etc1.h"
|
||||
#include "morton.h"
|
||||
#include "texture_utils.h"
|
||||
#include "video_core/texture/internal/codecs.h"
|
||||
#include "video_core/texture/internal/etc1.h"
|
||||
#include "video_core/texture/internal/morton.h"
|
||||
#include "video_core/texture/internal/texture_utils.h"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Optimizations
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef _MSC_VER
|
||||
#pragma inline_recursion(on)
|
||||
#elif defined(CLANG_OR_GCC)
|
||||
#pragma GCC optimize("-fpeel-loops")
|
||||
#pragma GCC optimize("-fpredictive-commoning")
|
||||
#pragma GCC optimize("-ftree-loop-distribute-patterns")
|
||||
#pragma GCC optimize("-ftree-vectorize")
|
||||
#endif
|
||||
|
||||
// Decoders
|
||||
#include "decoders.cpp"
|
||||
|
@ -1,10 +1,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/texture/codec.h"
|
||||
|
||||
#pragma once
|
||||
|
||||
// each texture format codec
|
||||
class RGBACodec : public Pica::Texture::Codec {
|
||||
public:
|
||||
@ -13,7 +14,9 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 8;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 8;
|
||||
};
|
||||
};
|
||||
|
||||
class RGBCodec : public Pica::Texture::Codec {
|
||||
@ -23,7 +26,9 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 6;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 6;
|
||||
};
|
||||
};
|
||||
|
||||
class RGB5A1Codec : public Pica::Texture::Codec {
|
||||
@ -33,7 +38,9 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 4;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 4;
|
||||
};
|
||||
};
|
||||
|
||||
class RGBA4Codec : public Pica::Texture::Codec {
|
||||
@ -43,7 +50,9 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 4;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 4;
|
||||
};
|
||||
};
|
||||
|
||||
class RGB565Codec : public Pica::Texture::Codec {
|
||||
@ -53,7 +62,9 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 4;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 4;
|
||||
};
|
||||
};
|
||||
|
||||
class RG8Codec : public Pica::Texture::Codec {
|
||||
@ -63,7 +74,9 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 4;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 4;
|
||||
};
|
||||
};
|
||||
|
||||
class IA8Codec : public Pica::Texture::Codec {
|
||||
@ -73,7 +86,9 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 4;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 4;
|
||||
};
|
||||
};
|
||||
|
||||
class I8Codec : public Pica::Texture::Codec {
|
||||
@ -83,7 +98,9 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 2;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 2;
|
||||
};
|
||||
};
|
||||
|
||||
class A8Codec : public Pica::Texture::Codec {
|
||||
@ -93,7 +110,9 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 2;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 2;
|
||||
};
|
||||
};
|
||||
|
||||
class IA4Codec : public Pica::Texture::Codec {
|
||||
@ -103,7 +122,9 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 2;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 2;
|
||||
};
|
||||
};
|
||||
|
||||
class I4Codec : public Pica::Texture::Codec {
|
||||
@ -113,7 +134,9 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 1;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 1;
|
||||
};
|
||||
};
|
||||
|
||||
class A4Codec : public Pica::Texture::Codec {
|
||||
@ -123,7 +146,9 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 1;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 1;
|
||||
};
|
||||
};
|
||||
|
||||
class ETC1Codec : public Pica::Texture::Codec {
|
||||
@ -133,7 +158,9 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 1;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 1;
|
||||
};
|
||||
};
|
||||
|
||||
class ETC1A4Codec : public Pica::Texture::Codec {
|
||||
@ -143,7 +170,9 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 2;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 2;
|
||||
};
|
||||
};
|
||||
|
||||
class D16Codec : public Pica::Texture::Codec {
|
||||
@ -153,7 +182,9 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 4;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 4;
|
||||
};
|
||||
};
|
||||
|
||||
class D24Codec : public Pica::Texture::Codec {
|
||||
@ -163,7 +194,9 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 6;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 6;
|
||||
};
|
||||
};
|
||||
|
||||
class D24S8Codec : public Pica::Texture::Codec {
|
||||
@ -173,5 +206,7 @@ public:
|
||||
void encode();
|
||||
|
||||
protected:
|
||||
const u32 format_size = 8;
|
||||
virtual void setSize() {
|
||||
this->start_nibbles_size = 8;
|
||||
};
|
||||
};
|
||||
|
@ -1,9 +1,10 @@
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
template <const Math::Vec4<u8> decode_func(const u8*)>
|
||||
inline void rgba_pass(u8* read, u8* write) {
|
||||
u32 pixel = decode_func(read).ToRGBA();
|
||||
auto pixel = decode_func(read).ToRGBA();
|
||||
std::memcpy(write, &pixel, 4);
|
||||
}
|
||||
|
||||
@ -72,34 +73,36 @@ void RG8Codec::decode() {
|
||||
namespace {
|
||||
|
||||
inline u16 convert_nibbles(u8 nibbles) {
|
||||
return ((u16)Color::Convert4To8((nibbles & 0xF0) >> 4) << 8) |
|
||||
(u16)Color::Convert4To8((nibbles & 0x0F));
|
||||
u16 split = (nibbles & 0xF0) << 4 | (nibbles & 0x0F);
|
||||
split |= (split << 4);
|
||||
return split;
|
||||
}
|
||||
|
||||
inline u32 build_luminance(u8 intensity, u8 alpha) {
|
||||
inline u32 build_luminance(u32 intensity, u32 alpha) {
|
||||
return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity;
|
||||
}
|
||||
|
||||
inline void intensity_alpha_pass(u8* read, u8* write) {
|
||||
alignas(4) u8 pixel[2];
|
||||
std::memcpy(pixel, read, 2);
|
||||
u32 result = build_luminance(pixel[0], pixel[1]);
|
||||
u32 result = build_luminance(pixel[1], pixel[0]);
|
||||
std::memcpy(write, &result, 4);
|
||||
}
|
||||
|
||||
inline void intensity_alpha_nibbles_pass(u8* read, u8* write) {
|
||||
alignas(4) u8 pixel[2];
|
||||
std::memcpy(pixel, read, 1);
|
||||
u16 tmp = convert_nibbles(pixel[0]);
|
||||
std::memcpy(pixel, &tmp, 2);
|
||||
u32 result = build_luminance(pixel[0], pixel[1]);
|
||||
alignas(4) u8 pixel;
|
||||
std::memcpy(&pixel, read, 1);
|
||||
u16 tmp = convert_nibbles(pixel);
|
||||
u8 tmp2[2];
|
||||
std::memcpy(tmp2, &tmp, 2);
|
||||
u32 result = build_luminance(tmp2[1], tmp2[0]);
|
||||
std::memcpy(write, &result, 4);
|
||||
}
|
||||
|
||||
inline void intensity_pass(u8* read, u8* write) {
|
||||
alignas(4) u8 pixel[1];
|
||||
std::memcpy(pixel, read, 1);
|
||||
u32 result = build_luminance(pixel[0], 255);
|
||||
u8 pixel;
|
||||
std::memcpy(&pixel, read, 1);
|
||||
u32 result = build_luminance(pixel, 255);
|
||||
std::memcpy(write, &result, 4);
|
||||
}
|
||||
|
||||
@ -108,9 +111,9 @@ inline void intensity_nibbles_pass(u8* read, u8* write) {
|
||||
std::memcpy(pixel, read, 1);
|
||||
u16 tmp = convert_nibbles(pixel[0]);
|
||||
std::memcpy(pixel, &tmp, 2);
|
||||
u32 result = build_luminance(pixel[0], 255);
|
||||
u32 result = build_luminance(pixel[1], 255);
|
||||
std::memcpy(write, &result, 4);
|
||||
result = build_luminance(pixel[1], 255);
|
||||
result = build_luminance(pixel[0], 255);
|
||||
std::memcpy(write + 4, &result, 4);
|
||||
}
|
||||
|
||||
|
@ -9,14 +9,24 @@
|
||||
#include "common/math_util.h"
|
||||
#include "common/swap.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "etc1.h"
|
||||
#include "texture_utils.h"
|
||||
#include "video_core/texture/internal/etc1.h"
|
||||
#include "video_core/texture/internal/texture_utils.h"
|
||||
|
||||
constexpr std::array<u8[2], 8> etc1_modifier_table = {{
|
||||
namespace {
|
||||
|
||||
#ifdef _DEBUG
|
||||
#define CONST_FIX static
|
||||
#else
|
||||
#define CONST_FIX constexpr
|
||||
#endif
|
||||
|
||||
CONST_FIX std::array<u8[2], 8> etc1_modifier_table = {{
|
||||
{2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183},
|
||||
}};
|
||||
|
||||
namespace {
|
||||
constexpr u32 buildRGBA(u32 r, u32 g, u32 b, u32 a) {
|
||||
return (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
|
||||
union ETC1Tile {
|
||||
u64 raw;
|
||||
@ -62,7 +72,7 @@ union ETC1Tile {
|
||||
BitField<60, 4, u64> r1;
|
||||
} separate;
|
||||
|
||||
const Math::Vec3<u8> GetRGB(u32 x, u32 y) const {
|
||||
const u32 GetRGB(u32 x, u32 y) const {
|
||||
int texel = 4 * x + y;
|
||||
|
||||
if (flip)
|
||||
@ -106,7 +116,7 @@ union ETC1Tile {
|
||||
ret.g() = MathUtil::Clamp(ret.g() + modifier, 0, 255);
|
||||
ret.b() = MathUtil::Clamp(ret.b() + modifier, 0, 255);
|
||||
|
||||
return ret.Cast<u8>();
|
||||
return buildRGBA(ret.r(), ret.g(), ret.b(), 0);
|
||||
}
|
||||
};
|
||||
|
||||
@ -121,7 +131,8 @@ inline void etc1_pass(u8* etc1_buffer, u8* linear_buffer, u32 x_blocks) {
|
||||
std::memcpy(&tile.raw, &etc1_buffer[i * 8], 8);
|
||||
for (u32 k = 0; k < 4; k++) {
|
||||
for (u32 j = 0; j < 4; j++) {
|
||||
u32 rgba = (tile.GetRGB(j, k).ToRGB()) | 0xFF000000;
|
||||
auto rgb = tile.GetRGB(j, k);
|
||||
u32 rgba = rgb | 0xFF000000;
|
||||
std::memcpy(&tmp[k * line + j * 4 + index], &rgba, 4);
|
||||
}
|
||||
}
|
||||
@ -142,7 +153,8 @@ inline void etc1a4_pass(u8* etc1_buffer, u8* linear_buffer, u32 x_blocks) {
|
||||
for (u32 j = 0; j < 4; j++) {
|
||||
u32 alpha = (alpha_tile >> (4 * (j * 4 + k))) & 0x0F;
|
||||
alpha |= (alpha << 4);
|
||||
u32 rgba = tile.GetRGB(j, k).ToRGB() | (alpha << 24);
|
||||
auto rgb = tile.GetRGB(j, k);
|
||||
u32 rgba = rgb | (alpha << 24);
|
||||
std::memcpy(&tmp[k * line + j * 4 + index], &rgba, 4);
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
#pragma once
|
||||
|
||||
void ETC1(u8* etc1_buffer, u8* matrix_buffer, u32 width, u32 height);
|
||||
void ETC1A4(u8* etc1_buffer, u8* matrix_buffer, u32 width, u32 height);
|
||||
|
@ -2,8 +2,8 @@
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include "common/common_types.h"
|
||||
#include "morton.h"
|
||||
#include "texture_utils.h"
|
||||
#include "video_core/texture/internal/morton.h"
|
||||
#include "video_core/texture/internal/texture_utils.h"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Optimizations
|
||||
@ -15,6 +15,8 @@
|
||||
// favor fast code over small code.
|
||||
#pragma optimize("t", on)
|
||||
#pragma intrinsic(memcpy)
|
||||
#define __hot
|
||||
#define __no_inline __declspec(noinline)
|
||||
#elif defined(CLANG_OR_GCC)
|
||||
// The next 3 will swizle memory copying to help find the best sse/avx shuffling
|
||||
// in case it's possible. Compilation tests have proven effective use of these
|
||||
@ -22,12 +24,20 @@
|
||||
#pragma GCC optimize("-fpredictive-commoning")
|
||||
#pragma GCC optimize("-ftree-loop-distribute-patterns")
|
||||
#pragma GCC optimize("-ftree-vectorize")
|
||||
// limit inlining
|
||||
#pragma GCC option("--param max-inline-insns-single=128")
|
||||
|
||||
#pragma GCC option("--param inline-unit-growth=400")
|
||||
#pragma GCC option("--param large-function-growth=800")
|
||||
// The beauty of these compiler options is that they generate better code than
|
||||
// hand written intrinsics, since inline expanding memeory transfers can be pattern
|
||||
// matched with vector instructions available in the target.
|
||||
#define __no_inline __attribute__((noinline))
|
||||
#define __hot __attribute__((hot))
|
||||
#if !defined(__forceinline)
|
||||
#define __forceinline attribute__((always_inline))
|
||||
#endif
|
||||
#else
|
||||
#define __hot
|
||||
#define __no_inline
|
||||
#define __forceinline
|
||||
#endif
|
||||
|
||||
#pragma region Z_Order
|
||||
@ -54,11 +64,11 @@ constexpr u32 isBottom(u32 block_index) {
|
||||
}
|
||||
|
||||
template <void codec(u8*, u8*, size_t), size_t nibbles, u32 blocks, size_t block_size>
|
||||
inline void swizzle_block(u8*& morton_block, u8* linear_block);
|
||||
__forceinline static void swizzle_block(u8*& morton_block, u8* linear_block);
|
||||
|
||||
template <void codec(u8*, u8*, size_t), size_t nibbles, u32 block_index, u32 blocks,
|
||||
size_t block_size>
|
||||
inline void swizzle_block_aux(u8*& morton_block, u8* linear_block) {
|
||||
__forceinline static void swizzle_block_aux(u8*& morton_block, u8* linear_block) {
|
||||
// move the linear_block pointer to the appropiate block
|
||||
const size_t right = isRight(block_index) * (blocks * nibbles) / 2;
|
||||
const size_t down = isBottom(block_index) * block_size;
|
||||
@ -67,7 +77,7 @@ inline void swizzle_block_aux(u8*& morton_block, u8* linear_block) {
|
||||
}
|
||||
|
||||
template <void codec(u8*, u8*, size_t), size_t nibbles, u32 blocks, size_t block_size>
|
||||
inline void swizzle_block(u8*& morton_block, u8* linear_block) {
|
||||
__forceinline static void swizzle_block(u8*& morton_block, u8* linear_block) {
|
||||
const size_t new_block_size = block_size / 2;
|
||||
if (blocks <= 2) {
|
||||
// We handle 2*2 blocks on z-order
|
||||
@ -94,14 +104,14 @@ inline void swizzle_block(u8*& morton_block, u8* linear_block) {
|
||||
}
|
||||
|
||||
template <void codec(u8*, u8*, size_t), size_t nibbles, size_t lines_per_block>
|
||||
void swizzle_pass(u8* morton_block, u8* linear_block) {
|
||||
__forceinline static void swizzle_pass(u8* morton_block, u8* linear_block) {
|
||||
const size_t block_size = (lines_per_block * lines_per_block * nibbles) / 2;
|
||||
swizzle_block<codec, nibbles, lines_per_block, block_size>(morton_block, linear_block);
|
||||
}
|
||||
#pragma endregion Z_Order
|
||||
|
||||
template <size_t nibbles, size_t lines_per_block>
|
||||
void encode_pass(u8* morton_buffer, u8* linear_buffer, u32 x_blocks) {
|
||||
__hot inline static void encode_pass(u8* morton_buffer, u8* linear_buffer, u32 x_blocks) {
|
||||
const u32 tile_size = (lines_per_block * lines_per_block * nibbles) / 2;
|
||||
alignas(64) u8 tmp[tile_size];
|
||||
tiling_pass<&encode, nibbles, lines_per_block>(linear_buffer, tmp, x_blocks);
|
||||
@ -109,7 +119,7 @@ void encode_pass(u8* morton_buffer, u8* linear_buffer, u32 x_blocks) {
|
||||
}
|
||||
|
||||
template <size_t nibbles, size_t lines_per_block>
|
||||
void decode_pass(u8* morton_buffer, u8* linear_buffer, u32 x_blocks) {
|
||||
__hot inline static void decode_pass(u8* morton_buffer, u8* linear_buffer, u32 x_blocks) {
|
||||
const u32 tile_size = (lines_per_block * lines_per_block * nibbles) / 2;
|
||||
alignas(64) u8 tmp[tile_size];
|
||||
swizzle_pass<&decode, nibbles, lines_per_block>(morton_buffer, tmp);
|
||||
@ -117,7 +127,7 @@ void decode_pass(u8* morton_buffer, u8* linear_buffer, u32 x_blocks) {
|
||||
}
|
||||
|
||||
template <void codec(u8*, u8*, u32), size_t nibbles, size_t lines_per_block>
|
||||
void morton_pass(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height) {
|
||||
__hot static void morton_pass(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height) {
|
||||
const u32 x_blocks = (width / lines_per_block);
|
||||
const u32 y_blocks = (height / lines_per_block);
|
||||
const size_t line_size = (lines_per_block * nibbles) / 2;
|
||||
@ -135,9 +145,22 @@ void morton_pass(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height) {
|
||||
}
|
||||
}
|
||||
|
||||
// keep hot code together
|
||||
__no_inline __hot static void morton_8x8_32(u8* morton_buffer, u8* matrix_buffer, u32 width,
|
||||
u32 height, bool decode) {
|
||||
if (decode)
|
||||
morton_pass<&decode_pass<8, 8>, 8, 8>(morton_buffer, matrix_buffer, width, height);
|
||||
else
|
||||
morton_pass<&encode_pass<8, 8>, 8, 8>(morton_buffer, matrix_buffer, width, height);
|
||||
}
|
||||
|
||||
namespace Decoders {
|
||||
|
||||
bool Morton_8x8(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u32 bpp) {
|
||||
if (bpp == 32) {
|
||||
morton_8x8_32(morton_buffer, matrix_buffer, width, height, true);
|
||||
return true;
|
||||
}
|
||||
switch (bpp) {
|
||||
case 4: {
|
||||
morton_pass<&decode_pass<1, 8>, 1, 8>(morton_buffer, matrix_buffer, width, height);
|
||||
@ -159,11 +182,6 @@ bool Morton_8x8(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u32
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
case 32: {
|
||||
morton_pass<&decode_pass<8, 8>, 8, 8>(morton_buffer, matrix_buffer, width, height);
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
return false;
|
||||
break;
|
||||
@ -209,6 +227,10 @@ bool Morton_32x32(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u
|
||||
namespace Encoders {
|
||||
|
||||
bool Morton_8x8(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u32 bpp) {
|
||||
if (bpp == 32) {
|
||||
morton_8x8_32(morton_buffer, matrix_buffer, width, height, false);
|
||||
return true;
|
||||
}
|
||||
switch (bpp) {
|
||||
case 4: {
|
||||
morton_pass<&encode_pass<1, 8>, 1, 8>(morton_buffer, matrix_buffer, width, height);
|
||||
@ -230,11 +252,6 @@ bool Morton_8x8(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u32
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
case 32: {
|
||||
morton_pass<&encode_pass<8, 8>, 8, 8>(morton_buffer, matrix_buffer, width, height);
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
return false;
|
||||
break;
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include "common/common_types.h"
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
enum class MortonPass { Tile8x8, Tile32x32 };
|
||||
|
||||
namespace Decoders {
|
||||
|
@ -1,3 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
@ -5,12 +7,9 @@
|
||||
#include "common/color.h"
|
||||
#include "common/swap.h"
|
||||
|
||||
#pragma once
|
||||
|
||||
#if ((defined(__clang__) || defined(__GNUC__)) && !defined(__INTEL_COMPILER))
|
||||
#define CLANG_OR_GCC
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Optimizations
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
@ -23,16 +22,6 @@
|
||||
#pragma GCC optimize("-ftree-vectorize")
|
||||
#endif
|
||||
|
||||
// @param read_size is the amount of bytes each pixel takes
|
||||
inline void decode(u8* morton_pointer, u8* matrix_pointer, size_t read_size) {
|
||||
std::memcpy(matrix_pointer, morton_pointer, read_size);
|
||||
}
|
||||
|
||||
// @param read_size is the amount of bytes each pixel takes
|
||||
inline void encode(u8* morton_pointer, u8* matrix_pointer, size_t read_size) {
|
||||
std::memcpy(morton_pointer, matrix_pointer, read_size);
|
||||
}
|
||||
|
||||
// Pre: width % 8 == 0 && height % 8 == 0
|
||||
template <void pass(u8*, u8*), u32 read_size, u32 write_size, u32 tuning = 2>
|
||||
inline void image_pass_aux_rev(u8* target, u32 width, u32 height) {
|
||||
@ -80,9 +69,9 @@ inline void image_pass_aux(u8* target, u32 width, u32 height) {
|
||||
template <void pass(u8*, u8*), u32 read_size, u32 write_size, u32 tuning = 2>
|
||||
inline void image_pass(u8* target, u32 width, u32 height) {
|
||||
if (read_size > write_size)
|
||||
image_pass_aux<pass, read_size, write_size, tuning>;
|
||||
image_pass_aux<pass, read_size, write_size, tuning>(target, width, height);
|
||||
else
|
||||
image_pass_aux_rev<pass, read_size, write_size, tuning>;
|
||||
image_pass_aux_rev<pass, read_size, write_size, tuning>(target, width, height);
|
||||
}
|
||||
|
||||
template <void codec(u8*, u8*, size_t), size_t nibbles, size_t lines_per_block>
|
||||
@ -96,3 +85,13 @@ void tiling_pass(u8* linear, u8* tiled, u32 x_blocks) {
|
||||
codec(tiled + tiled_index, linear + linear_index, tiled_line_size);
|
||||
}
|
||||
}
|
||||
|
||||
// @param read_size is the amount of bytes each pixel takes
|
||||
inline void decode(u8* morton_pointer, u8* matrix_pointer, size_t read_size) {
|
||||
std::memcpy(matrix_pointer, morton_pointer, read_size);
|
||||
}
|
||||
|
||||
// @param read_size is the amount of bytes each pixel takes
|
||||
inline void encode(u8* morton_pointer, u8* matrix_pointer, size_t read_size) {
|
||||
std::memcpy(morton_pointer, matrix_pointer, read_size);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user