diff --git a/src/common/vector_math.h b/src/common/vector_math.h
index 7ca8e15f5..e029718bd 100644
--- a/src/common/vector_math.h
+++ b/src/common/vector_math.h
@@ -407,6 +407,11 @@ inline float Vec3<float>::Normalize() {
     return length;
 }
 
+template <>
+inline unsigned int Vec3<unsigned char>::ToRGB() const {
+    return (z << 16) | (y << 8) | x;
+}
+
 typedef Vec3<float> Vec3f;
 
 template <typename T>
@@ -611,6 +616,11 @@ public:
 #undef _DEFINE_SWIZZLER3
 };
 
+template <>
+inline unsigned int Vec4<unsigned char>::ToRGBA() const {
+    return (w << 24) | (z << 16) | (y << 8) | x;
+}
+
 template <typename T, typename V>
 Vec4<decltype(V{} * T{})> operator*(const V& f, const Vec4<T>& vec) {
     return MakeVec(f * vec.x, f * vec.y, f * vec.z, f * vec.w);
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 2522064e7..b33869c22 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,14 +1,14 @@
 set(SRCS
-            texture/internal/morton.cpp
-            texture/internal/etc1.cpp
-            texture/codec.cpp
-            texture/internal/codecs.cpp
             renderer_opengl/gl_rasterizer.cpp
             renderer_opengl/gl_rasterizer_cache.cpp
             renderer_opengl/gl_shader_gen.cpp
             renderer_opengl/gl_shader_util.cpp
             renderer_opengl/gl_state.cpp
             renderer_opengl/renderer_opengl.cpp
+            texture/internal/morton.cpp
+            texture/internal/etc1.cpp
+            texture/internal/codecs.cpp
+            texture/codec.cpp
             debug_utils/debug_utils.cpp
             clipper.cpp
             command_processor.cpp
@@ -25,12 +25,6 @@ set(SRCS
 
 set(HEADERS
             debug_utils/debug_utils.h
-            texture/internal/texture_utils.h
-            texture/internal/morton.h
-            texture/internal/etc1.h
-            texture/codec.h
-            texture/formats.h
-            texture/internal/codecs.h
             renderer_opengl/gl_rasterizer.h
             renderer_opengl/gl_rasterizer_cache.h
             renderer_opengl/gl_resource_manager.h
@@ -39,6 +33,12 @@ set(HEADERS
             renderer_opengl/gl_state.h
             renderer_opengl/pica_to_gl.h
             renderer_opengl/renderer_opengl.h
+            texture/internal/texture_utils.h
+            texture/internal/morton.h
+            texture/internal/etc1.h
+            texture/internal/codecs.h
+            texture/codec.h
+            texture/formats.h
             clipper.h
             command_processor.h
             gpu_debugger.h
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 5a306a5c8..089d9328c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -21,6 +21,7 @@
 #include "video_core/renderer_opengl/gl_shader_util.h"
 #include "video_core/renderer_opengl/pica_to_gl.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
+#include "video_core/texture/formats.h"
 
 MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
 MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
@@ -716,7 +717,6 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
 
 bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
     MICROPROFILE_SCOPE(OpenGL_Blits);
-    using PixelFormat = CachedSurface::PixelFormat;
     using SurfaceType = CachedSurface::SurfaceType;
 
     CachedSurface src_params;
@@ -728,7 +728,7 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe
     // the image, and it allows for smaller texture cache lookup rectangles.
     src_params.height = config.output_height;
     src_params.is_tiled = !config.input_linear;
-    src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format);
+    src_params.pixel_format = Pica::Texture::Format::FromGPUPixelFormat(config.input_format);
 
     CachedSurface dst_params;
     dst_params.addr = config.GetPhysicalOutputAddress();
@@ -737,7 +737,7 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe
     dst_params.height =
         config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value();
     dst_params.is_tiled = config.input_linear != config.dont_swizzle;
-    dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format);
+    dst_params.pixel_format = Pica::Texture::Format::FromGPUPixelFormat(config.output_format);
 
     MathUtil::Rectangle<int> src_rect;
     CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
@@ -776,7 +776,7 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe
     }
 
     u32 dst_size = dst_params.width * dst_params.height *
-                   CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8;
+                   Pica::Texture::Format::GetBpp(dst_params.pixel_format) / 8;
     dst_surface->dirty = true;
     res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true);
     return true;
@@ -789,7 +789,6 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon
 
 bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) {
     MICROPROFILE_SCOPE(OpenGL_Blits);
-    using PixelFormat = CachedSurface::PixelFormat;
     using SurfaceType = CachedSurface::SurfaceType;
 
     CachedSurface* dst_surface = res_cache.TryGetFillSurface(config);
@@ -824,7 +823,7 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
 
         if (config.fill_24bit) {
             switch (dst_surface->pixel_format) {
-            case PixelFormat::RGB8:
+            case Pica::Texture::Format::Type::RGB8:
                 color_values[0] = config.value_24bit_r / 255.0f;
                 color_values[1] = config.value_24bit_g / 255.0f;
                 color_values[2] = config.value_24bit_b / 255.0f;
@@ -836,7 +835,7 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
             u32 value = config.value_32bit;
 
             switch (dst_surface->pixel_format) {
-            case PixelFormat::RGBA8:
+            case Pica::Texture::Format::Type::RGBA8:
                 color_values[0] = (value >> 24) / 255.0f;
                 color_values[1] = ((value >> 16) & 0xFF) / 255.0f;
                 color_values[2] = ((value >> 8) & 0xFF) / 255.0f;
@@ -850,34 +849,34 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
             Math::Vec4<u8> color;
 
             switch (dst_surface->pixel_format) {
-            case PixelFormat::RGBA8:
+            case Pica::Texture::Format::Type::RGBA8:
                 color_values[0] = (value_16bit >> 8) / 255.0f;
                 color_values[1] = (value_16bit & 0xFF) / 255.0f;
                 color_values[2] = color_values[0];
                 color_values[3] = color_values[1];
                 break;
-            case PixelFormat::RGB5A1:
+            case Pica::Texture::Format::Type::RGB5A1:
                 color = Color::DecodeRGB5A1((const u8*)&value_16bit);
                 color_values[0] = color[0] / 31.0f;
                 color_values[1] = color[1] / 31.0f;
                 color_values[2] = color[2] / 31.0f;
                 color_values[3] = color[3];
                 break;
-            case PixelFormat::RGB565:
+            case Pica::Texture::Format::Type::RGB565:
                 color = Color::DecodeRGB565((const u8*)&value_16bit);
                 color_values[0] = color[0] / 31.0f;
                 color_values[1] = color[1] / 63.0f;
                 color_values[2] = color[2] / 31.0f;
                 break;
-            case PixelFormat::RGBA4:
+            case Pica::Texture::Format::Type::RGBA4:
                 color = Color::DecodeRGBA4((const u8*)&value_16bit);
                 color_values[0] = color[0] / 15.0f;
                 color_values[1] = color[1] / 15.0f;
                 color_values[2] = color[2] / 15.0f;
                 color_values[3] = color[3] / 15.0f;
                 break;
-            case PixelFormat::IA8:
-            case PixelFormat::RG8:
+            case Pica::Texture::Format::Type::IA8:
+            case Pica::Texture::Format::Type::RG8:
                 color_values[0] = (value_16bit >> 8) / 255.0f;
                 color_values[1] = (value_16bit & 0xFF) / 255.0f;
                 break;
@@ -899,9 +898,9 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config)
         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
 
         GLfloat value_float;
-        if (dst_surface->pixel_format == CachedSurface::PixelFormat::D16) {
+        if (dst_surface->pixel_format == Pica::Texture::Format::Type::D16) {
             value_float = config.value_32bit / 65535.0f; // 2^16 - 1
-        } else if (dst_surface->pixel_format == CachedSurface::PixelFormat::D24) {
+        } else if (dst_surface->pixel_format == Pica::Texture::Format::Type::D24) {
             value_float = config.value_32bit / 16777215.0f; // 2^24 - 1
         }
 
@@ -945,7 +944,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
     src_params.height = config.height;
     src_params.pixel_stride = pixel_stride;
     src_params.is_tiled = false;
-    src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format);
+    src_params.pixel_format = Pica::Texture::Format::FromGPUPixelFormat(config.color_format);
 
     MathUtil::Rectangle<int> src_rect;
     CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index ef3b06a7b..618a4e1f7 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -21,6 +21,8 @@
 #include "video_core/pica_state.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/texture/codec.h"
+#include "video_core/texture/formats.h"
 #include "video_core/utils.h"
 #include "video_core/video_core.h"
 
@@ -30,21 +32,48 @@ struct FormatTuple {
     GLenum type;
 };
 
-static const std::array<FormatTuple, 5> fb_format_tuples = {{
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8},     // RGBA8
-    {GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE},              // RGB8
-    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1
-    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},     // RGB565
-    {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4},   // RGBA4
-}};
-
-static const std::array<FormatTuple, 4> depth_format_tuples = {{
+static const std::array<FormatTuple, 18> format_tuples = {{
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8},                  // RGBA8
+    {GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE},                           // RGB8
+    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1},              // RGB5A1
+    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},                  // RGB565
+    {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4},                // RGBA4
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                         // IA8
+    {GL_RG8, GL_RG8, GL_UNSIGNED_BYTE},                            // RG8
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                         // I8
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                         // A8
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                         // IA4
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                         // I4
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                         // A4
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                         // ETC1
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                         // ETC1A4
     {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16
     {},
     {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT},   // D24
     {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8
 }};
 
+static const std::array<bool, 18> native_format = {
+    true,  // RGBA8
+    true,  // RGB8
+    true,  // RGB5A1
+    true,  // RGB565
+    true,  // RGBA4
+    false, // IA8
+    true,  // RG8
+    false, // I8
+    false, // A8
+    false, // IA4
+    false, // I4
+    false, // A4
+    false, // ETC1
+    false, // ETC1A4
+    true,  // D16
+    false,
+    false, // D24
+    false, // D24S8
+};
+
 RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
     transfer_framebuffers[0].Create();
     transfer_framebuffers[1].Create();
@@ -54,55 +83,6 @@ RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
     FlushAll();
 }
 
-static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height,
-                             u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data,
-                             u8* gl_data, bool morton_to_gl) {
-    using PixelFormat = CachedSurface::PixelFormat;
-
-    u8* data_ptrs[2];
-    u32 depth_stencil_shifts[2] = {24, 8};
-
-    if (morton_to_gl) {
-        std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]);
-    }
-
-    if (pixel_format == PixelFormat::D24S8) {
-        for (unsigned y = 0; y < height; ++y) {
-            for (unsigned x = 0; x < width; ++x) {
-                const u32 coarse_y = y & ~7;
-                u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
-                                    coarse_y * width * bytes_per_pixel;
-                u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
-
-                data_ptrs[morton_to_gl] = morton_data + morton_offset;
-                data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
-
-                // Swap depth and stencil value ordering since 3DS does not match OpenGL
-                u32 depth_stencil;
-                memcpy(&depth_stencil, data_ptrs[1], sizeof(u32));
-                depth_stencil = (depth_stencil << depth_stencil_shifts[0]) |
-                                (depth_stencil >> depth_stencil_shifts[1]);
-
-                memcpy(data_ptrs[0], &depth_stencil, sizeof(u32));
-            }
-        }
-    } else {
-        for (unsigned y = 0; y < height; ++y) {
-            for (unsigned x = 0; x < width; ++x) {
-                const u32 coarse_y = y & ~7;
-                u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
-                                    coarse_y * width * bytes_per_pixel;
-                u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
-
-                data_ptrs[morton_to_gl] = morton_data + morton_offset;
-                data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
-
-                memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
-            }
-        }
-    }
-}
-
 void RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex,
                                          CachedSurface::SurfaceType type,
                                          const MathUtil::Rectangle<int>& src_rect,
@@ -184,7 +164,7 @@ bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface,
     return true;
 }
 
-static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format,
+static void AllocateSurfaceTexture(GLuint texture, Pica::Texture::Format::Type pixel_format,
                                    u32 width, u32 height) {
     // Allocate an uninitialized texture of appropriate size and format for the surface
     using SurfaceType = CachedSurface::SurfaceType;
@@ -199,17 +179,8 @@ static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pi
 
     SurfaceType type = CachedSurface::GetFormatType(pixel_format);
 
-    FormatTuple tuple;
-    if (type == SurfaceType::Color) {
-        ASSERT((size_t)pixel_format < fb_format_tuples.size());
-        tuple = fb_format_tuples[(unsigned int)pixel_format];
-    } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
-        size_t tuple_idx = (size_t)pixel_format - 14;
-        ASSERT(tuple_idx < depth_format_tuples.size());
-        tuple = depth_format_tuples[tuple_idx];
-    } else {
-        tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE};
-    }
+    ASSERT((size_t)pixel_format < format_tuples.size());
+    FormatTuple tuple = format_tuples[(unsigned int)pixel_format];
 
     glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0, tuple.format,
                  tuple.type, nullptr);
@@ -227,7 +198,7 @@ static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pi
 MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192));
 CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale,
                                                  bool load_if_create) {
-    using PixelFormat = CachedSurface::PixelFormat;
+    using PixelFormat = Pica::Texture::Format::Type;
     using SurfaceType = CachedSurface::SurfaceType;
 
     if (params.addr == 0) {
@@ -235,7 +206,7 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
     }
 
     u32 params_size =
-        params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
+        params.width * params.height * Pica::Texture::Format::GetBpp(params.pixel_format) / 8;
 
     // Check for an exact match in existing surfaces
     CachedSurface* best_exact_surface = nullptr;
@@ -320,72 +291,36 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
 
         if (!new_surface->is_tiled) {
             // TODO: Ensure this will always be a color format, not a depth or other format
-            ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size());
-            const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format];
+            // ASSERT((size_t)new_surface->pixel_format < format_tuples.size());
+            const FormatTuple& tuple = format_tuples[(unsigned int)params.pixel_format];
 
             glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->pixel_stride);
             glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
                          tuple.format, tuple.type, texture_src_data);
             glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
         } else {
-            SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format);
-            if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
-                FormatTuple tuple;
-                if ((size_t)params.pixel_format < fb_format_tuples.size()) {
-                    tuple = fb_format_tuples[(unsigned int)params.pixel_format];
-                } else {
-                    // Texture
-                    tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE};
-                }
-
-                std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height);
-
-                Pica::DebugUtils::TextureInfo tex_info;
-                tex_info.width = params.width;
-                tex_info.height = params.height;
-                tex_info.stride =
-                    params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
-                tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format;
-                tex_info.physical_address = params.addr;
-
-                for (unsigned y = 0; y < params.height; ++y) {
-                    for (unsigned x = 0; x < params.width; ++x) {
-                        tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture(
-                            texture_src_data, x, params.height - 1 - y, tex_info);
-                    }
-                }
-
-                glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height,
-                             0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data());
-            } else {
-                // Depth/Stencil formats need special treatment since they aren't sampleable using
-                // LookupTexture and can't use RGBA format
-                size_t tuple_idx = (size_t)params.pixel_format - 14;
-                ASSERT(tuple_idx < depth_format_tuples.size());
-                const FormatTuple& tuple = depth_format_tuples[tuple_idx];
-
-                u32 bytes_per_pixel = CachedSurface::GetFormatBpp(params.pixel_format) / 8;
-
-                // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
-                bool use_4bpp = (params.pixel_format == PixelFormat::D24);
-
-                u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
-
-                std::vector<u8> temp_fb_depth_buffer(params.width * params.height *
-                                                     gl_bytes_per_pixel);
-
-                u8* temp_fb_depth_buffer_ptr =
-                    use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data();
-
-                MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel,
-                                 gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr,
-                                 true);
-
-                glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height,
-                             0, tuple.format, tuple.type, temp_fb_depth_buffer.data());
-            }
+            const FormatTuple& tuple = format_tuples[(unsigned int)params.pixel_format];
+            std::unique_ptr<Pica::Texture::Codec> tmp = Pica::Texture::CodecFactory::build(
+                // clang-format off
+                params.pixel_format, texture_src_data, params.width, params.height
+                // clang-format on
+                );
+            Pica::Texture::Codec* codec = tmp.get();
+            codec->configTiling(true, 8); // change 8 for 32 in case the mage is tiled
+                                          // on blocks of 32x32
+            codec->configRGBATransform(!native_format[(unsigned int)params.pixel_format]);
+            codec->decode();
+            std::unique_ptr<u8[]> decoded_texture = codec->transferInternalBuffer();
+            u32 bytes = codec->getInternalBytesPerPixel();
+            if (bytes == 3)
+                bytes = 1;
+            else if (bytes != 2)
+                bytes = 4;
+            glPixelStorei(GL_UNPACK_ALIGNMENT, bytes);
+            glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
+                         tuple.format, tuple.type, decoded_texture.get());
+            glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
         }
-
         // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface
         if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) {
             OGLTexture scaled_texture;
@@ -430,7 +365,7 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params
     }
 
     u32 total_pixels = params.width * params.height;
-    u32 params_size = total_pixels * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
+    u32 params_size = total_pixels * Pica::Texture::Format::GetBpp(params.pixel_format) / 8;
 
     // Attempt to find encompassing surfaces
     CachedSurface* best_subrect_surface = nullptr;
@@ -467,7 +402,7 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params
     // Return the best subrect surface if found
     if (best_subrect_surface != nullptr) {
         unsigned int bytes_per_pixel =
-            (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8);
+            (Pica::Texture::Format::GetBpp(best_subrect_surface->pixel_format) / 8);
 
         int x0, y0;
 
@@ -521,7 +456,7 @@ CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(
     params.width = info.width;
     params.height = info.height;
     params.is_tiled = true;
-    params.pixel_format = CachedSurface::PixelFormatFromTextureFormat(info.format);
+    params.pixel_format = Pica::Texture::Format::FromTextureFormat(info.format);
     return GetSurface(params, false, true);
 }
 
@@ -574,10 +509,10 @@ RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfi
     }
 
     color_params.addr = config.GetColorBufferPhysicalAddress();
-    color_params.pixel_format = CachedSurface::PixelFormatFromColorFormat(config.color_format);
+    color_params.pixel_format = Pica::Texture::Format::FromColorFormat(config.color_format);
 
     depth_params.addr = config.GetDepthBufferPhysicalAddress();
-    depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format);
+    depth_params.pixel_format = Pica::Texture::Format::FromDepthFormat(config.depth_format);
 
     MathUtil::Rectangle<int> color_rect;
     CachedSurface* color_surface =
@@ -648,9 +583,9 @@ CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryF
             CachedSurface* surface = it2->get();
 
             if (surface->addr == config.GetStartAddress() &&
-                CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value &&
+                Pica::Texture::Format::GetBpp(surface->pixel_format) == bits_per_value &&
                 (surface->width * surface->height *
-                 CachedSurface::GetFormatBpp(surface->pixel_format) / 8) ==
+                 Pica::Texture::Format::GetBpp(surface->pixel_format) / 8) ==
                     (config.GetEndAddress() - config.GetStartAddress())) {
                 return surface;
             }
@@ -662,7 +597,6 @@ CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryF
 
 MICROPROFILE_DEFINE(OpenGL_SurfaceDownload, "OpenGL", "Surface Download", MP_RGB(128, 192, 64));
 void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
-    using PixelFormat = CachedSurface::PixelFormat;
     using SurfaceType = CachedSurface::SurfaceType;
 
     if (!surface->dirty) {
@@ -703,53 +637,32 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
 
     if (!surface->is_tiled) {
         // TODO: Ensure this will always be a color format, not a depth or other format
-        ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
-        const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
+        // ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
+        const FormatTuple& tuple = format_tuples[(unsigned int)surface->pixel_format];
 
         glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->pixel_stride);
         glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer);
         glPixelStorei(GL_PACK_ROW_LENGTH, 0);
     } else {
-        SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format);
-        if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
-            ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
-            const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
+        const FormatTuple& tuple = format_tuples[(u32)surface->pixel_format];
+        u32 bytes_per_pixel = Pica::Texture::Format::GetBpp(surface->pixel_format) / 8;
+        if (!native_format[(u32)surface->pixel_format])
+            bytes_per_pixel = 4;
+        std::vector<u8> temp_gl_buffer(surface->width * surface->height * bytes_per_pixel);
+        glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
 
-            u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
-
-            std::vector<u8> temp_gl_buffer(surface->width * surface->height * bytes_per_pixel);
-
-            glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
-
-            // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion
-            // is necessary.
-            MortonCopyPixels(surface->pixel_format, surface->width, surface->height,
-                             bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(),
-                             false);
-        } else {
-            // Depth/Stencil formats need special treatment since they aren't sampleable using
-            // LookupTexture and can't use RGBA format
-            size_t tuple_idx = (size_t)surface->pixel_format - 14;
-            ASSERT(tuple_idx < depth_format_tuples.size());
-            const FormatTuple& tuple = depth_format_tuples[tuple_idx];
-
-            u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
-
-            // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
-            bool use_4bpp = (surface->pixel_format == PixelFormat::D24);
-
-            u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
-
-            std::vector<u8> temp_gl_buffer(surface->width * surface->height * gl_bytes_per_pixel);
-
-            glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
-
-            u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data();
-
-            MortonCopyPixels(surface->pixel_format, surface->width, surface->height,
-                             bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr,
-                             false);
-        }
+        std::unique_ptr<Pica::Texture::Codec> tmp = Pica::Texture::CodecFactory::build(
+            // clang-format off
+            surface->pixel_format, temp_gl_buffer.data(), surface->width, surface->height
+            // clang-format on
+            );
+        Pica::Texture::Codec* codec = tmp.get();
+        codec->configTiling(true, 8); // change 8 for 32 in case the mage is tiled
+                                      // on blocks of 32x32
+        codec->configRGBATransform(!native_format[(u32)surface->pixel_format]);
+        codec->configPreConvertedRGBA(!native_format[(u32)surface->pixel_format]);
+        codec->setExternalBuffer(dst_buffer);
+        codec->encode();
     }
 
     surface->dirty = false;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index b50e8292b..dc17cf6f4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -16,6 +16,7 @@
 #include "core/hw/gpu.h"
 #include "video_core/pica.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/texture/formats.h"
 
 namespace MathUtil {
 template <class T>
@@ -27,33 +28,6 @@ struct CachedSurface;
 using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<CachedSurface>>>;
 
 struct CachedSurface {
-    enum class PixelFormat {
-        // First 5 formats are shared between textures and color buffers
-        RGBA8 = 0,
-        RGB8 = 1,
-        RGB5A1 = 2,
-        RGB565 = 3,
-        RGBA4 = 4,
-
-        // Texture-only formats
-        IA8 = 5,
-        RG8 = 6,
-        I8 = 7,
-        A8 = 8,
-        IA4 = 9,
-        I4 = 10,
-        A4 = 11,
-        ETC1 = 12,
-        ETC1A4 = 13,
-
-        // Depth buffer-only formats
-        D16 = 14,
-        // gap
-        D24 = 16,
-        D24S8 = 17,
-
-        Invalid = 255,
-    };
 
     enum class SurfaceType {
         Color = 0,
@@ -63,58 +37,8 @@ struct CachedSurface {
         Invalid = 4,
     };
 
-    static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) {
-        static const std::array<unsigned int, 18> bpp_table = {
-            32, // RGBA8
-            24, // RGB8
-            16, // RGB5A1
-            16, // RGB565
-            16, // RGBA4
-            16, // IA8
-            16, // RG8
-            8,  // I8
-            8,  // A8
-            8,  // IA4
-            4,  // I4
-            4,  // A4
-            4,  // ETC1
-            8,  // ETC1A4
-            16, // D16
-            0,
-            24, // D24
-            32, // D24S8
-        };
-
-        ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table));
-        return bpp_table[(unsigned int)format];
-    }
-
-    static PixelFormat PixelFormatFromTextureFormat(Pica::Regs::TextureFormat format) {
-        return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid;
-    }
-
-    static PixelFormat PixelFormatFromColorFormat(Pica::Regs::ColorFormat format) {
-        return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
-    }
-
-    static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) {
-        return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14)
-                                          : PixelFormat::Invalid;
-    }
-
-    static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) {
-        switch (format) {
-        // RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat
-        case GPU::Regs::PixelFormat::RGB565:
-            return PixelFormat::RGB565;
-        case GPU::Regs::PixelFormat::RGB5A1:
-            return PixelFormat::RGB5A1;
-        default:
-            return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
-        }
-    }
-
-    static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
+    static bool CheckFormatsBlittable(Pica::Texture::Format::Type pixel_format_a,
+                                      Pica::Texture::Format::Type pixel_format_b) {
         SurfaceType a_type = GetFormatType(pixel_format_a);
         SurfaceType b_type = GetFormatType(pixel_format_b);
 
@@ -134,7 +58,7 @@ struct CachedSurface {
         return false;
     }
 
-    static SurfaceType GetFormatType(PixelFormat pixel_format) {
+    static SurfaceType GetFormatType(Pica::Texture::Format::Type pixel_format) {
         if ((unsigned int)pixel_format < 5) {
             return SurfaceType::Color;
         }
@@ -143,11 +67,12 @@ struct CachedSurface {
             return SurfaceType::Texture;
         }
 
-        if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) {
+        if (pixel_format == Pica::Texture::Format::Type::D16 ||
+            pixel_format == Pica::Texture::Format::Type::D24) {
             return SurfaceType::Depth;
         }
 
-        if (pixel_format == PixelFormat::D24S8) {
+        if (pixel_format == Pica::Texture::Format::Type::D24S8) {
             return SurfaceType::DepthStencil;
         }
 
@@ -177,7 +102,7 @@ struct CachedSurface {
     float res_scale_height = 1.f;
 
     bool is_tiled;
-    PixelFormat pixel_format;
+    Pica::Texture::Format::Type pixel_format;
     bool dirty;
 };
 
diff --git a/src/video_core/texture/codec.cpp b/src/video_core/texture/codec.cpp
index 5992dcdac..f63aa29ff 100644
--- a/src/video_core/texture/codec.cpp
+++ b/src/video_core/texture/codec.cpp
@@ -1,6 +1,10 @@
-#include "codec.h"
-#include "internal\codecs.h"
-#include "internal\morton.h"
+#include "common/color.h"
+#include "common/math_util.h"
+#include "common/swap.h"
+#include "common/vector_math.h"
+#include "video_core/texture/codec.h"
+#include "video_core/texture/internal/codecs.h"
+#include "video_core/texture/internal/morton.h"
 
 namespace Pica {
 namespace Texture {
@@ -17,18 +21,6 @@ void Codec::encode() {
         this->encode_morton_pass();
 };
 
-void Codec::setSize() {
-    this->start_nibbles_size = format_size;
-};
-
-inline void Codec::setWidth(u32 width) {
-    this->width = width;
-}
-
-inline void Codec::setHeight(u32 height) {
-    this->height = height;
-}
-
 void Codec::configTiling(bool active, u32 tiling) {
     this->morton = true;
     this->morton_pass_tiling = tiling;
@@ -63,15 +55,16 @@ bool Codec::invalid() {
 }
 
 void Codec::init(bool decode) {
+    this->setSize();
+    this->expected_nibbles_size = this->start_nibbles_size;
     if (decode) {
         if (this->raw_RGBA)
             this->expected_nibbles_size = 8;
     } else {
-        this->start_nibbles_size = this->format_size;
-        if (this->raw_RGBA)
-            this->expected_nibbles_size = this->format_size;
         if (this->preconverted)
             this->start_nibbles_size = 8;
+        if (!this->raw_RGBA)
+            this->expected_nibbles_size = this->start_nibbles_size;
     }
     if (!this->external_result_buffer) {
         size_t buff_size = this->width * this->height * this->expected_nibbles_size / 2;
@@ -80,7 +73,7 @@ void Codec::init(bool decode) {
     }
 }
 
-inline void Codec::decode_morton_pass() {
+void Codec::decode_morton_pass() {
     if (this->morton_pass_tiling == 8)
         Decoders::Morton_8x8(this->target_buffer, this->passing_buffer, this->width, this->height,
                              this->start_nibbles_size * 4);
@@ -89,7 +82,7 @@ inline void Codec::decode_morton_pass() {
                                this->start_nibbles_size * 4);
 }
 
-inline void Codec::encode_morton_pass() {
+void Codec::encode_morton_pass() {
     if (this->morton_pass_tiling == 8)
         Encoders::Morton_8x8(this->target_buffer, this->passing_buffer, this->width, this->height,
                              this->start_nibbles_size * 4);
@@ -98,41 +91,41 @@ inline void Codec::encode_morton_pass() {
                                this->start_nibbles_size * 4);
 }
 
-std::unique_ptr<Codec> CodecFactory::build(Format format, u8* target, u32 width, u32 height) {
+std::unique_ptr<Codec> CodecFactory::build(Format::Type format, u8* target, u32 width, u32 height) {
     switch (format) {
-    case Format::RGBA8:
+    case Format::Type::RGBA8:
         return std::make_unique<RGBACodec>(target, width, height);
-    case Format::RGB8:
+    case Format::Type::RGB8:
         return std::make_unique<RGBCodec>(target, width, height);
-    case Format::RGB5A1:
+    case Format::Type::RGB5A1:
         return std::make_unique<RGB5A1Codec>(target, width, height);
-    case Format::RGB565:
+    case Format::Type::RGB565:
         return std::make_unique<RGB565Codec>(target, width, height);
-    case Format::RGBA4:
+    case Format::Type::RGBA4:
         return std::make_unique<RGBA4Codec>(target, width, height);
-    case Format::RG8:
+    case Format::Type::RG8:
         return std::make_unique<RG8Codec>(target, width, height);
-    case Format::IA8:
+    case Format::Type::IA8:
         return std::make_unique<IA8Codec>(target, width, height);
-    case Format::I8:
+    case Format::Type::I8:
         return std::make_unique<I8Codec>(target, width, height);
-    case Format::A8:
+    case Format::Type::A8:
         return std::make_unique<A8Codec>(target, width, height);
-    case Format::IA4:
+    case Format::Type::IA4:
         return std::make_unique<IA4Codec>(target, width, height);
-    case Format::I4:
+    case Format::Type::I4:
         return std::make_unique<I4Codec>(target, width, height);
-    case Format::A4:
+    case Format::Type::A4:
         return std::make_unique<A4Codec>(target, width, height);
-    case Format::ETC1:
+    case Format::Type::ETC1:
         return std::make_unique<ETC1Codec>(target, width, height);
-    case Format::ETC1A4:
+    case Format::Type::ETC1A4:
         return std::make_unique<ETC1A4Codec>(target, width, height);
-    case Format::D16:
+    case Format::Type::D16:
         return std::make_unique<D16Codec>(target, width, height);
-    case Format::D24:
+    case Format::Type::D24:
         return std::make_unique<D24Codec>(target, width, height);
-    case Format::D24S8:
+    case Format::Type::D24S8:
         return std::make_unique<D24S8Codec>(target, width, height);
     default:
         return nullptr;
diff --git a/src/video_core/texture/codec.h b/src/video_core/texture/codec.h
index fe873556d..1c0a6b1d9 100644
--- a/src/video_core/texture/codec.h
+++ b/src/video_core/texture/codec.h
@@ -1,9 +1,10 @@
+
+#pragma once
+
 #include <iostream>
 #include <memory>
 #include "common/common_types.h"
-#include "formats.h"
-
-#pragma once
+#include "video_core/texture/formats.h"
 
 namespace Pica {
 
@@ -16,18 +17,23 @@ public:
         this->target_buffer = target;
         this->setWidth(width);
         this->setHeight(height);
-        this->setSize();
-        this->expected_nibbles_size = this->start_nibbles_size;
     }
     virtual ~Codec() {}
 
     virtual void decode();
     virtual void encode();
 
-    void setSize();
+    inline void setWidth(u32 width) {
+        this->width = width;
+    }
 
-    void setWidth(u32 width);
-    void setHeight(u32 height);
+    inline void setHeight(u32 height) {
+        this->height = height;
+    }
+
+    inline u32 getInternalBytesPerPixel() {
+        return this->expected_nibbles_size / 2;
+    }
 
     // Common Passes
     void configTiling(bool active, u32 tiling);
@@ -54,7 +60,10 @@ protected:
 
     u32 start_nibbles_size;
     u32 expected_nibbles_size;
-    const u32 format_size = 8;
+
+    virtual void setSize() {
+        this->start_nibbles_size = 8;
+    };
 
     u8* target_buffer;                     // Initial read buffer
     u8* passing_buffer;                    // pointer aliasing: Used and modified by passes
@@ -65,12 +74,12 @@ protected:
 
     typedef Codec super;
 
-    inline void decode_morton_pass();
-    inline void encode_morton_pass();
+    void decode_morton_pass();
+    void encode_morton_pass();
 };
 
 namespace CodecFactory {
-std::unique_ptr<Codec> build(Pica::Texture::Format format, u8* target, u32 width, u32 height);
+std::unique_ptr<Codec> build(Pica::Texture::Format::Type format, u8* target, u32 width, u32 height);
 };
 
 } // Texture
diff --git a/src/video_core/texture/formats.h b/src/video_core/texture/formats.h
index c15d40c1d..ffb24e615 100644
--- a/src/video_core/texture/formats.h
+++ b/src/video_core/texture/formats.h
@@ -1,36 +1,96 @@
+
 #pragma once
 
+#include <array>
+#include "common/assert.h"
+#include "core/hw/gpu.h"
+#include "video_core/pica.h"
+
 namespace Pica {
 
 namespace Texture {
 
-enum class Format {
-    // First 5 formats are shared between textures and color buffers
-    RGBA8 = 0,
-    RGB8 = 1,
-    RGB5A1 = 2,
-    RGB565 = 3,
-    RGBA4 = 4,
+struct Format {
 
-    // Texture-only formats
-    IA8 = 5,
-    RG8 = 6,
-    I8 = 7,
-    A8 = 8,
-    IA4 = 9,
-    I4 = 10,
-    A4 = 11,
-    ETC1 = 12,
-    ETC1A4 = 13,
+    enum class Type {
+        // First 5 formats are shared between textures and color buffers
+        RGBA8 = 0,
+        RGB8 = 1,
+        RGB5A1 = 2,
+        RGB565 = 3,
+        RGBA4 = 4,
 
-    // Depth buffer-only formats
-    D16 = 14,
-    // gap
-    D24 = 16,
-    D24S8 = 17,
+        // Texture-only formats
+        IA8 = 5,
+        RG8 = 6,
+        I8 = 7,
+        A8 = 8,
+        IA4 = 9,
+        I4 = 10,
+        A4 = 11,
+        ETC1 = 12,
+        ETC1A4 = 13,
 
-    Invalid = 255,
-};
+        // Depth buffer-only formats
+        D16 = 14,
+        // gap
+        D24 = 16,
+        D24S8 = 17,
+
+        Invalid = 255,
+    };
+
+    static u32 GetBpp(Type format) {
+        static const std::array<unsigned int, 18> bpp_table = {
+            32, // RGBA8
+            24, // RGB8
+            16, // RGB5A1
+            16, // RGB565
+            16, // RGBA4
+            16, // IA8
+            16, // RG8
+            8,  // I8
+            8,  // A8
+            8,  // IA4
+            4,  // I4
+            4,  // A4
+            4,  // ETC1
+            8,  // ETC1A4
+            16, // D16
+            0,
+            24, // D24
+            32, // D24S8
+        };
+
+        ASSERT((u32)format < ARRAY_SIZE(bpp_table));
+        return bpp_table[(u32)format];
+    }
+
+    static Type FromTextureFormat(Regs::TextureFormat format) {
+        return ((unsigned int)format < 14) ? (Type)format : Type::Invalid;
+    }
+
+    static Type FromColorFormat(Regs::ColorFormat format) {
+        return ((unsigned int)format < 5) ? (Type)format : Type::Invalid;
+    }
+
+    static Type FromDepthFormat(Regs::DepthFormat format) {
+        return ((unsigned int)format < 4) ? (Type)((unsigned int)format + 14) : Type::Invalid;
+    }
+
+    static Type FromGPUPixelFormat(GPU::Regs::PixelFormat format) {
+        switch (format) {
+        // RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat
+        case GPU::Regs::PixelFormat::RGB565:
+            return Type::RGB565;
+        case GPU::Regs::PixelFormat::RGB5A1:
+            return Type::RGB5A1;
+        default:
+            return ((unsigned int)format < 5) ? (Type)format : Type::Invalid;
+        }
+    }
+
+}; // Format
 
 } // Texture
 
diff --git a/src/video_core/texture/internal/codecs.cpp b/src/video_core/texture/internal/codecs.cpp
index d647c9ec7..753f33bdd 100644
--- a/src/video_core/texture/internal/codecs.cpp
+++ b/src/video_core/texture/internal/codecs.cpp
@@ -1,7 +1,19 @@
-#include "codecs.h"
-#include "etc1.h"
-#include "morton.h"
-#include "texture_utils.h"
+#include "video_core/texture/internal/codecs.h"
+#include "video_core/texture/internal/etc1.h"
+#include "video_core/texture/internal/morton.h"
+#include "video_core/texture/internal/texture_utils.h"
+
+///////////////////////////////////////////////////////////////////////////////
+// Optimizations
+//////////////////////////////////////////////////////////////////////////////
+#ifdef _MSC_VER
+#pragma inline_recursion(on)
+#elif defined(CLANG_OR_GCC)
+#pragma GCC optimize("-fpeel-loops")
+#pragma GCC optimize("-fpredictive-commoning")
+#pragma GCC optimize("-ftree-loop-distribute-patterns")
+#pragma GCC optimize("-ftree-vectorize")
+#endif
 
 // Decoders
 #include "decoders.cpp"
diff --git a/src/video_core/texture/internal/codecs.h b/src/video_core/texture/internal/codecs.h
index 97a5e2869..9fa40908a 100644
--- a/src/video_core/texture/internal/codecs.h
+++ b/src/video_core/texture/internal/codecs.h
@@ -1,10 +1,11 @@
+
+#pragma once
+
 #include <iostream>
 #include <memory>
 #include "common/common_types.h"
 #include "video_core/texture/codec.h"
 
-#pragma once
-
 // each texture format codec
 class RGBACodec : public Pica::Texture::Codec {
 public:
@@ -13,7 +14,9 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 8;
+    virtual void setSize() {
+        this->start_nibbles_size = 8;
+    };
 };
 
 class RGBCodec : public Pica::Texture::Codec {
@@ -23,7 +26,9 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 6;
+    virtual void setSize() {
+        this->start_nibbles_size = 6;
+    };
 };
 
 class RGB5A1Codec : public Pica::Texture::Codec {
@@ -33,7 +38,9 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 4;
+    virtual void setSize() {
+        this->start_nibbles_size = 4;
+    };
 };
 
 class RGBA4Codec : public Pica::Texture::Codec {
@@ -43,7 +50,9 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 4;
+    virtual void setSize() {
+        this->start_nibbles_size = 4;
+    };
 };
 
 class RGB565Codec : public Pica::Texture::Codec {
@@ -53,7 +62,9 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 4;
+    virtual void setSize() {
+        this->start_nibbles_size = 4;
+    };
 };
 
 class RG8Codec : public Pica::Texture::Codec {
@@ -63,7 +74,9 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 4;
+    virtual void setSize() {
+        this->start_nibbles_size = 4;
+    };
 };
 
 class IA8Codec : public Pica::Texture::Codec {
@@ -73,7 +86,9 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 4;
+    virtual void setSize() {
+        this->start_nibbles_size = 4;
+    };
 };
 
 class I8Codec : public Pica::Texture::Codec {
@@ -83,7 +98,9 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 2;
+    virtual void setSize() {
+        this->start_nibbles_size = 2;
+    };
 };
 
 class A8Codec : public Pica::Texture::Codec {
@@ -93,7 +110,9 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 2;
+    virtual void setSize() {
+        this->start_nibbles_size = 2;
+    };
 };
 
 class IA4Codec : public Pica::Texture::Codec {
@@ -103,7 +122,9 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 2;
+    virtual void setSize() {
+        this->start_nibbles_size = 2;
+    };
 };
 
 class I4Codec : public Pica::Texture::Codec {
@@ -113,7 +134,9 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 1;
+    virtual void setSize() {
+        this->start_nibbles_size = 1;
+    };
 };
 
 class A4Codec : public Pica::Texture::Codec {
@@ -123,7 +146,9 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 1;
+    virtual void setSize() {
+        this->start_nibbles_size = 1;
+    };
 };
 
 class ETC1Codec : public Pica::Texture::Codec {
@@ -133,7 +158,9 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 1;
+    virtual void setSize() {
+        this->start_nibbles_size = 1;
+    };
 };
 
 class ETC1A4Codec : public Pica::Texture::Codec {
@@ -143,7 +170,9 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 2;
+    virtual void setSize() {
+        this->start_nibbles_size = 2;
+    };
 };
 
 class D16Codec : public Pica::Texture::Codec {
@@ -153,7 +182,9 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 4;
+    virtual void setSize() {
+        this->start_nibbles_size = 4;
+    };
 };
 
 class D24Codec : public Pica::Texture::Codec {
@@ -163,7 +194,9 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 6;
+    virtual void setSize() {
+        this->start_nibbles_size = 6;
+    };
 };
 
 class D24S8Codec : public Pica::Texture::Codec {
@@ -173,5 +206,7 @@ public:
     void encode();
 
 protected:
-    const u32 format_size = 8;
+    virtual void setSize() {
+        this->start_nibbles_size = 8;
+    };
 };
diff --git a/src/video_core/texture/internal/decoders.cpp b/src/video_core/texture/internal/decoders.cpp
index d0b80d013..28672e8fb 100644
--- a/src/video_core/texture/internal/decoders.cpp
+++ b/src/video_core/texture/internal/decoders.cpp
@@ -1,9 +1,10 @@
 
+
 namespace {
 
 template <const Math::Vec4<u8> decode_func(const u8*)>
 inline void rgba_pass(u8* read, u8* write) {
-    u32 pixel = decode_func(read).ToRGBA();
+    auto pixel = decode_func(read).ToRGBA();
     std::memcpy(write, &pixel, 4);
 }
 
@@ -72,34 +73,36 @@ void RG8Codec::decode() {
 namespace {
 
 inline u16 convert_nibbles(u8 nibbles) {
-    return ((u16)Color::Convert4To8((nibbles & 0xF0) >> 4) << 8) |
-           (u16)Color::Convert4To8((nibbles & 0x0F));
+    u16 split = (nibbles & 0xF0) << 4 | (nibbles & 0x0F);
+    split |= (split << 4);
+    return split;
 }
 
-inline u32 build_luminance(u8 intensity, u8 alpha) {
+inline u32 build_luminance(u32 intensity, u32 alpha) {
     return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity;
 }
 
 inline void intensity_alpha_pass(u8* read, u8* write) {
     alignas(4) u8 pixel[2];
     std::memcpy(pixel, read, 2);
-    u32 result = build_luminance(pixel[0], pixel[1]);
+    u32 result = build_luminance(pixel[1], pixel[0]);
     std::memcpy(write, &result, 4);
 }
 
 inline void intensity_alpha_nibbles_pass(u8* read, u8* write) {
-    alignas(4) u8 pixel[2];
-    std::memcpy(pixel, read, 1);
-    u16 tmp = convert_nibbles(pixel[0]);
-    std::memcpy(pixel, &tmp, 2);
-    u32 result = build_luminance(pixel[0], pixel[1]);
+    alignas(4) u8 pixel;
+    std::memcpy(&pixel, read, 1);
+    u16 tmp = convert_nibbles(pixel);
+    u8 tmp2[2];
+    std::memcpy(tmp2, &tmp, 2);
+    u32 result = build_luminance(tmp2[1], tmp2[0]);
     std::memcpy(write, &result, 4);
 }
 
 inline void intensity_pass(u8* read, u8* write) {
-    alignas(4) u8 pixel[1];
-    std::memcpy(pixel, read, 1);
-    u32 result = build_luminance(pixel[0], 255);
+    u8 pixel;
+    std::memcpy(&pixel, read, 1);
+    u32 result = build_luminance(pixel, 255);
     std::memcpy(write, &result, 4);
 }
 
@@ -108,9 +111,9 @@ inline void intensity_nibbles_pass(u8* read, u8* write) {
     std::memcpy(pixel, read, 1);
     u16 tmp = convert_nibbles(pixel[0]);
     std::memcpy(pixel, &tmp, 2);
-    u32 result = build_luminance(pixel[0], 255);
+    u32 result = build_luminance(pixel[1], 255);
     std::memcpy(write, &result, 4);
-    result = build_luminance(pixel[1], 255);
+    result = build_luminance(pixel[0], 255);
     std::memcpy(write + 4, &result, 4);
 }
 
diff --git a/src/video_core/texture/internal/etc1.cpp b/src/video_core/texture/internal/etc1.cpp
index a20dee6d5..5a7edec33 100644
--- a/src/video_core/texture/internal/etc1.cpp
+++ b/src/video_core/texture/internal/etc1.cpp
@@ -9,14 +9,24 @@
 #include "common/math_util.h"
 #include "common/swap.h"
 #include "common/vector_math.h"
-#include "etc1.h"
-#include "texture_utils.h"
+#include "video_core/texture/internal/etc1.h"
+#include "video_core/texture/internal/texture_utils.h"
 
-constexpr std::array<u8[2], 8> etc1_modifier_table = {{
+namespace {
+
+#ifdef _DEBUG
+#define CONST_FIX static
+#else
+#define CONST_FIX constexpr
+#endif
+
+CONST_FIX std::array<u8[2], 8> etc1_modifier_table = {{
     {2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183},
 }};
 
-namespace {
+constexpr u32 buildRGBA(u32 r, u32 g, u32 b, u32 a) {
+    return (a << 24) | (b << 16) | (g << 8) | r;
+}
 
 union ETC1Tile {
     u64 raw;
@@ -62,7 +72,7 @@ union ETC1Tile {
         BitField<60, 4, u64> r1;
     } separate;
 
-    const Math::Vec3<u8> GetRGB(u32 x, u32 y) const {
+    const u32 GetRGB(u32 x, u32 y) const {
         int texel = 4 * x + y;
 
         if (flip)
@@ -106,7 +116,7 @@ union ETC1Tile {
         ret.g() = MathUtil::Clamp(ret.g() + modifier, 0, 255);
         ret.b() = MathUtil::Clamp(ret.b() + modifier, 0, 255);
 
-        return ret.Cast<u8>();
+        return buildRGBA(ret.r(), ret.g(), ret.b(), 0);
     }
 };
 
@@ -121,7 +131,8 @@ inline void etc1_pass(u8* etc1_buffer, u8* linear_buffer, u32 x_blocks) {
         std::memcpy(&tile.raw, &etc1_buffer[i * 8], 8);
         for (u32 k = 0; k < 4; k++) {
             for (u32 j = 0; j < 4; j++) {
-                u32 rgba = (tile.GetRGB(j, k).ToRGB()) | 0xFF000000;
+                auto rgb = tile.GetRGB(j, k);
+                u32 rgba = rgb | 0xFF000000;
                 std::memcpy(&tmp[k * line + j * 4 + index], &rgba, 4);
             }
         }
@@ -142,7 +153,8 @@ inline void etc1a4_pass(u8* etc1_buffer, u8* linear_buffer, u32 x_blocks) {
             for (u32 j = 0; j < 4; j++) {
                 u32 alpha = (alpha_tile >> (4 * (j * 4 + k))) & 0x0F;
                 alpha |= (alpha << 4);
-                u32 rgba = tile.GetRGB(j, k).ToRGB() | (alpha << 24);
+                auto rgb = tile.GetRGB(j, k);
+                u32 rgba = rgb | (alpha << 24);
                 std::memcpy(&tmp[k * line + j * 4 + index], &rgba, 4);
             }
         }
diff --git a/src/video_core/texture/internal/etc1.h b/src/video_core/texture/internal/etc1.h
index fa4535da2..492f19729 100644
--- a/src/video_core/texture/internal/etc1.h
+++ b/src/video_core/texture/internal/etc1.h
@@ -1,7 +1,6 @@
+#pragma once
 
 #include "common/common_types.h"
 
-#pragma once
-
 void ETC1(u8* etc1_buffer, u8* matrix_buffer, u32 width, u32 height);
 void ETC1A4(u8* etc1_buffer, u8* matrix_buffer, u32 width, u32 height);
diff --git a/src/video_core/texture/internal/morton.cpp b/src/video_core/texture/internal/morton.cpp
index b50f4e34a..9bd74c800 100644
--- a/src/video_core/texture/internal/morton.cpp
+++ b/src/video_core/texture/internal/morton.cpp
@@ -2,8 +2,8 @@
 #include <memory>
 #include <utility>
 #include "common/common_types.h"
-#include "morton.h"
-#include "texture_utils.h"
+#include "video_core/texture/internal/morton.h"
+#include "video_core/texture/internal/texture_utils.h"
 
 ///////////////////////////////////////////////////////////////////////////////
 // Optimizations
@@ -15,6 +15,8 @@
 // favor fast code over small code.
 #pragma optimize("t", on)
 #pragma intrinsic(memcpy)
+#define __hot
+#define __no_inline __declspec(noinline)
 #elif defined(CLANG_OR_GCC)
 // The next 3 will swizle memory copying to help find the best sse/avx shuffling
 // in case it's possible. Compilation tests have proven effective use of these
@@ -22,12 +24,20 @@
 #pragma GCC optimize("-fpredictive-commoning")
 #pragma GCC optimize("-ftree-loop-distribute-patterns")
 #pragma GCC optimize("-ftree-vectorize")
-// limit inlining
-#pragma GCC option("--param max-inline-insns-single=128")
-
+#pragma GCC option("--param inline-unit-growth=400")
+#pragma GCC option("--param large-function-growth=800")
 // The beauty of these compiler options is that they generate better code than
 // hand written intrinsics, since inline expanding memeory transfers can be pattern
 // matched with vector instructions available in the target.
+#define __no_inline __attribute__((noinline))
+#define __hot __attribute__((hot))
+#if !defined(__forceinline)
+#define __forceinline attribute__((always_inline))
+#endif
+#else
+#define __hot
+#define __no_inline
+#define __forceinline
 #endif
 
 #pragma region Z_Order
@@ -54,11 +64,11 @@ constexpr u32 isBottom(u32 block_index) {
 }
 
 template <void codec(u8*, u8*, size_t), size_t nibbles, u32 blocks, size_t block_size>
-inline void swizzle_block(u8*& morton_block, u8* linear_block);
+__forceinline static void swizzle_block(u8*& morton_block, u8* linear_block);
 
 template <void codec(u8*, u8*, size_t), size_t nibbles, u32 block_index, u32 blocks,
           size_t block_size>
-inline void swizzle_block_aux(u8*& morton_block, u8* linear_block) {
+__forceinline static void swizzle_block_aux(u8*& morton_block, u8* linear_block) {
     // move the linear_block pointer to the appropiate block
     const size_t right = isRight(block_index) * (blocks * nibbles) / 2;
     const size_t down = isBottom(block_index) * block_size;
@@ -67,7 +77,7 @@ inline void swizzle_block_aux(u8*& morton_block, u8* linear_block) {
 }
 
 template <void codec(u8*, u8*, size_t), size_t nibbles, u32 blocks, size_t block_size>
-inline void swizzle_block(u8*& morton_block, u8* linear_block) {
+__forceinline static void swizzle_block(u8*& morton_block, u8* linear_block) {
     const size_t new_block_size = block_size / 2;
     if (blocks <= 2) {
         // We handle 2*2 blocks on z-order
@@ -94,14 +104,14 @@ inline void swizzle_block(u8*& morton_block, u8* linear_block) {
 }
 
 template <void codec(u8*, u8*, size_t), size_t nibbles, size_t lines_per_block>
-void swizzle_pass(u8* morton_block, u8* linear_block) {
+__forceinline static void swizzle_pass(u8* morton_block, u8* linear_block) {
     const size_t block_size = (lines_per_block * lines_per_block * nibbles) / 2;
     swizzle_block<codec, nibbles, lines_per_block, block_size>(morton_block, linear_block);
 }
 #pragma endregion Z_Order
 
 template <size_t nibbles, size_t lines_per_block>
-void encode_pass(u8* morton_buffer, u8* linear_buffer, u32 x_blocks) {
+__hot inline static void encode_pass(u8* morton_buffer, u8* linear_buffer, u32 x_blocks) {
     const u32 tile_size = (lines_per_block * lines_per_block * nibbles) / 2;
     alignas(64) u8 tmp[tile_size];
     tiling_pass<&encode, nibbles, lines_per_block>(linear_buffer, tmp, x_blocks);
@@ -109,7 +119,7 @@ void encode_pass(u8* morton_buffer, u8* linear_buffer, u32 x_blocks) {
 }
 
 template <size_t nibbles, size_t lines_per_block>
-void decode_pass(u8* morton_buffer, u8* linear_buffer, u32 x_blocks) {
+__hot inline static void decode_pass(u8* morton_buffer, u8* linear_buffer, u32 x_blocks) {
     const u32 tile_size = (lines_per_block * lines_per_block * nibbles) / 2;
     alignas(64) u8 tmp[tile_size];
     swizzle_pass<&decode, nibbles, lines_per_block>(morton_buffer, tmp);
@@ -117,7 +127,7 @@ void decode_pass(u8* morton_buffer, u8* linear_buffer, u32 x_blocks) {
 }
 
 template <void codec(u8*, u8*, u32), size_t nibbles, size_t lines_per_block>
-void morton_pass(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height) {
+__hot static void morton_pass(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height) {
     const u32 x_blocks = (width / lines_per_block);
     const u32 y_blocks = (height / lines_per_block);
     const size_t line_size = (lines_per_block * nibbles) / 2;
@@ -135,9 +145,22 @@ void morton_pass(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height) {
     }
 }
 
+// keep hot code together
+__no_inline __hot static void morton_8x8_32(u8* morton_buffer, u8* matrix_buffer, u32 width,
+                                            u32 height, bool decode) {
+    if (decode)
+        morton_pass<&decode_pass<8, 8>, 8, 8>(morton_buffer, matrix_buffer, width, height);
+    else
+        morton_pass<&encode_pass<8, 8>, 8, 8>(morton_buffer, matrix_buffer, width, height);
+}
+
 namespace Decoders {
 
 bool Morton_8x8(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u32 bpp) {
+    if (bpp == 32) {
+        morton_8x8_32(morton_buffer, matrix_buffer, width, height, true);
+        return true;
+    }
     switch (bpp) {
     case 4: {
         morton_pass<&decode_pass<1, 8>, 1, 8>(morton_buffer, matrix_buffer, width, height);
@@ -159,11 +182,6 @@ bool Morton_8x8(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u32
         return true;
         break;
     }
-    case 32: {
-        morton_pass<&decode_pass<8, 8>, 8, 8>(morton_buffer, matrix_buffer, width, height);
-        return true;
-        break;
-    }
     default: {
         return false;
         break;
@@ -209,6 +227,10 @@ bool Morton_32x32(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u
 namespace Encoders {
 
 bool Morton_8x8(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u32 bpp) {
+    if (bpp == 32) {
+        morton_8x8_32(morton_buffer, matrix_buffer, width, height, false);
+        return true;
+    }
     switch (bpp) {
     case 4: {
         morton_pass<&encode_pass<1, 8>, 1, 8>(morton_buffer, matrix_buffer, width, height);
@@ -230,11 +252,6 @@ bool Morton_8x8(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u32
         return true;
         break;
     }
-    case 32: {
-        morton_pass<&encode_pass<8, 8>, 8, 8>(morton_buffer, matrix_buffer, width, height);
-        return true;
-        break;
-    }
     default: {
         return false;
         break;
diff --git a/src/video_core/texture/internal/morton.h b/src/video_core/texture/internal/morton.h
index 36879ecb4..95473744f 100644
--- a/src/video_core/texture/internal/morton.h
+++ b/src/video_core/texture/internal/morton.h
@@ -1,7 +1,7 @@
-#include "common/common_types.h"
-
 #pragma once
 
+#include "common/common_types.h"
+
 enum class MortonPass { Tile8x8, Tile32x32 };
 
 namespace Decoders {
diff --git a/src/video_core/texture/internal/texture_utils.h b/src/video_core/texture/internal/texture_utils.h
index ecd7a557b..38d7f96f9 100644
--- a/src/video_core/texture/internal/texture_utils.h
+++ b/src/video_core/texture/internal/texture_utils.h
@@ -1,3 +1,5 @@
+#pragma once
+
 #include <array>
 #include <cstring>
 #include <memory>
@@ -5,12 +7,9 @@
 #include "common/color.h"
 #include "common/swap.h"
 
-#pragma once
-
 #if ((defined(__clang__) || defined(__GNUC__)) && !defined(__INTEL_COMPILER))
 #define CLANG_OR_GCC
 #endif
-
 ///////////////////////////////////////////////////////////////////////////////
 // Optimizations
 //////////////////////////////////////////////////////////////////////////////
@@ -23,16 +22,6 @@
 #pragma GCC optimize("-ftree-vectorize")
 #endif
 
-// @param read_size is the amount of bytes each pixel takes
-inline void decode(u8* morton_pointer, u8* matrix_pointer, size_t read_size) {
-    std::memcpy(matrix_pointer, morton_pointer, read_size);
-}
-
-// @param read_size is the amount of bytes each pixel takes
-inline void encode(u8* morton_pointer, u8* matrix_pointer, size_t read_size) {
-    std::memcpy(morton_pointer, matrix_pointer, read_size);
-}
-
 // Pre: width % 8 == 0 && height % 8 == 0
 template <void pass(u8*, u8*), u32 read_size, u32 write_size, u32 tuning = 2>
 inline void image_pass_aux_rev(u8* target, u32 width, u32 height) {
@@ -80,9 +69,9 @@ inline void image_pass_aux(u8* target, u32 width, u32 height) {
 template <void pass(u8*, u8*), u32 read_size, u32 write_size, u32 tuning = 2>
 inline void image_pass(u8* target, u32 width, u32 height) {
     if (read_size > write_size)
-        image_pass_aux<pass, read_size, write_size, tuning>;
+        image_pass_aux<pass, read_size, write_size, tuning>(target, width, height);
     else
-        image_pass_aux_rev<pass, read_size, write_size, tuning>;
+        image_pass_aux_rev<pass, read_size, write_size, tuning>(target, width, height);
 }
 
 template <void codec(u8*, u8*, size_t), size_t nibbles, size_t lines_per_block>
@@ -96,3 +85,13 @@ void tiling_pass(u8* linear, u8* tiled, u32 x_blocks) {
         codec(tiled + tiled_index, linear + linear_index, tiled_line_size);
     }
 }
+
+// @param read_size is the amount of bytes each pixel takes
+inline void decode(u8* morton_pointer, u8* matrix_pointer, size_t read_size) {
+    std::memcpy(matrix_pointer, morton_pointer, read_size);
+}
+
+// @param read_size is the amount of bytes each pixel takes
+inline void encode(u8* morton_pointer, u8* matrix_pointer, size_t read_size) {
+    std::memcpy(morton_pointer, matrix_pointer, read_size);
+}