Refactored Texture decoding into a new stand-alone api
This commit is contained in:
		| @@ -1,4 +1,8 @@ | ||||
| set(SRCS | ||||
|             texture/internal/morton.cpp | ||||
|             texture/internal/etc1.cpp | ||||
|             texture/codec.cpp | ||||
|             texture/internal/codecs.cpp | ||||
|             renderer_opengl/gl_rasterizer.cpp | ||||
|             renderer_opengl/gl_rasterizer_cache.cpp | ||||
|             renderer_opengl/gl_shader_gen.cpp | ||||
| @@ -21,6 +25,12 @@ set(SRCS | ||||
|  | ||||
| set(HEADERS | ||||
|             debug_utils/debug_utils.h | ||||
|             texture/internal/texture_utils.h | ||||
|             texture/internal/morton.h | ||||
|             texture/internal/etc1.h | ||||
|             texture/codec.h | ||||
|             texture/formats.h | ||||
|             texture/internal/codecs.h | ||||
|             renderer_opengl/gl_rasterizer.h | ||||
|             renderer_opengl/gl_rasterizer_cache.h | ||||
|             renderer_opengl/gl_resource_manager.h | ||||
|   | ||||
							
								
								
									
										143
									
								
								src/video_core/texture/codec.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										143
									
								
								src/video_core/texture/codec.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,143 @@ | ||||
| #include "codec.h" | ||||
| #include "internal\codecs.h" | ||||
| #include "internal\morton.h" | ||||
|  | ||||
| namespace Pica { | ||||
| namespace Texture { | ||||
|  | ||||
| void Codec::decode() { | ||||
|     this->init(true); | ||||
|     if (this->morton) | ||||
|         this->decode_morton_pass(); | ||||
| }; | ||||
|  | ||||
| void Codec::encode() { | ||||
|     this->init(false); | ||||
|     if (this->morton) | ||||
|         this->encode_morton_pass(); | ||||
| }; | ||||
|  | ||||
| void Codec::setSize() { | ||||
|     this->start_nibbles_size = format_size; | ||||
| }; | ||||
|  | ||||
| inline void Codec::setWidth(u32 width) { | ||||
|     this->width = width; | ||||
| } | ||||
|  | ||||
| inline void Codec::setHeight(u32 height) { | ||||
|     this->height = height; | ||||
| } | ||||
|  | ||||
| void Codec::configTiling(bool active, u32 tiling) { | ||||
|     this->morton = true; | ||||
|     this->morton_pass_tiling = tiling; | ||||
|     if (tiling != 8 && tiling != 32) { | ||||
|         this->invalid_state = true; | ||||
|     } | ||||
| } | ||||
|  | ||||
| void Codec::configRGBATransform(bool active) { | ||||
|     this->raw_RGBA = active; | ||||
| } | ||||
|  | ||||
| void Codec::configPreConvertedRGBA(bool active) { | ||||
|     this->preconverted = active; | ||||
| } | ||||
|  | ||||
| void Codec::setExternalBuffer(u8* external) { | ||||
|     this->external_result_buffer = true; | ||||
|     this->passing_buffer = external; | ||||
| } | ||||
|  | ||||
| std::unique_ptr<u8[]> Codec::transferInternalBuffer() { | ||||
|     if (!this->external_result_buffer) { | ||||
|         std::unique_ptr<u8[]> result(std::move(this->internal_buffer)); | ||||
|         return result; | ||||
|     } | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| bool Codec::invalid() { | ||||
|     return this->invalid_state; | ||||
| } | ||||
|  | ||||
| void Codec::init(bool decode) { | ||||
|     if (decode) { | ||||
|         if (this->raw_RGBA) | ||||
|             this->expected_nibbles_size = 8; | ||||
|     } else { | ||||
|         this->start_nibbles_size = this->format_size; | ||||
|         if (this->raw_RGBA) | ||||
|             this->expected_nibbles_size = this->format_size; | ||||
|         if (this->preconverted) | ||||
|             this->start_nibbles_size = 8; | ||||
|     } | ||||
|     if (!this->external_result_buffer) { | ||||
|         size_t buff_size = this->width * this->height * this->expected_nibbles_size / 2; | ||||
|         this->internal_buffer = std::make_unique<u8[]>(buff_size); | ||||
|         this->passing_buffer = this->internal_buffer.get(); | ||||
|     } | ||||
| } | ||||
|  | ||||
| inline void Codec::decode_morton_pass() { | ||||
|     if (this->morton_pass_tiling == 8) | ||||
|         Decoders::Morton_8x8(this->target_buffer, this->passing_buffer, this->width, this->height, | ||||
|                              this->start_nibbles_size * 4); | ||||
|     else if (this->morton_pass_tiling == 32) | ||||
|         Decoders::Morton_32x32(this->target_buffer, this->passing_buffer, this->width, this->height, | ||||
|                                this->start_nibbles_size * 4); | ||||
| } | ||||
|  | ||||
| inline void Codec::encode_morton_pass() { | ||||
|     if (this->morton_pass_tiling == 8) | ||||
|         Encoders::Morton_8x8(this->target_buffer, this->passing_buffer, this->width, this->height, | ||||
|                              this->start_nibbles_size * 4); | ||||
|     else if (this->morton_pass_tiling == 32) | ||||
|         Encoders::Morton_32x32(this->target_buffer, this->passing_buffer, this->width, this->height, | ||||
|                                this->start_nibbles_size * 4); | ||||
| } | ||||
|  | ||||
| std::unique_ptr<Codec> CodecFactory::build(Format format, u8* target, u32 width, u32 height) { | ||||
|     switch (format) { | ||||
|     case Format::RGBA8: | ||||
|         return std::make_unique<RGBACodec>(target, width, height); | ||||
|     case Format::RGB8: | ||||
|         return std::make_unique<RGBCodec>(target, width, height); | ||||
|     case Format::RGB5A1: | ||||
|         return std::make_unique<RGB5A1Codec>(target, width, height); | ||||
|     case Format::RGB565: | ||||
|         return std::make_unique<RGB565Codec>(target, width, height); | ||||
|     case Format::RGBA4: | ||||
|         return std::make_unique<RGBA4Codec>(target, width, height); | ||||
|     case Format::RG8: | ||||
|         return std::make_unique<RG8Codec>(target, width, height); | ||||
|     case Format::IA8: | ||||
|         return std::make_unique<IA8Codec>(target, width, height); | ||||
|     case Format::I8: | ||||
|         return std::make_unique<I8Codec>(target, width, height); | ||||
|     case Format::A8: | ||||
|         return std::make_unique<A8Codec>(target, width, height); | ||||
|     case Format::IA4: | ||||
|         return std::make_unique<IA4Codec>(target, width, height); | ||||
|     case Format::I4: | ||||
|         return std::make_unique<I4Codec>(target, width, height); | ||||
|     case Format::A4: | ||||
|         return std::make_unique<A4Codec>(target, width, height); | ||||
|     case Format::ETC1: | ||||
|         return std::make_unique<ETC1Codec>(target, width, height); | ||||
|     case Format::ETC1A4: | ||||
|         return std::make_unique<ETC1A4Codec>(target, width, height); | ||||
|     case Format::D16: | ||||
|         return std::make_unique<D16Codec>(target, width, height); | ||||
|     case Format::D24: | ||||
|         return std::make_unique<D24Codec>(target, width, height); | ||||
|     case Format::D24S8: | ||||
|         return std::make_unique<D24S8Codec>(target, width, height); | ||||
|     default: | ||||
|         return nullptr; | ||||
|     } | ||||
| } | ||||
|  | ||||
| } // Texture | ||||
| } // Pica | ||||
							
								
								
									
										78
									
								
								src/video_core/texture/codec.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								src/video_core/texture/codec.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,78 @@ | ||||
| #include <iostream> | ||||
| #include <memory> | ||||
| #include "common/common_types.h" | ||||
| #include "formats.h" | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| namespace Pica { | ||||
|  | ||||
| namespace Texture { | ||||
|  | ||||
| class Codec { | ||||
|  | ||||
| public: | ||||
|     Codec(u8* target, u32 width, u32 height) { | ||||
|         this->target_buffer = target; | ||||
|         this->setWidth(width); | ||||
|         this->setHeight(height); | ||||
|         this->setSize(); | ||||
|         this->expected_nibbles_size = this->start_nibbles_size; | ||||
|     } | ||||
|     virtual ~Codec() {} | ||||
|  | ||||
|     virtual void decode(); | ||||
|     virtual void encode(); | ||||
|  | ||||
|     void setSize(); | ||||
|  | ||||
|     void setWidth(u32 width); | ||||
|     void setHeight(u32 height); | ||||
|  | ||||
|     // Common Passes | ||||
|     void configTiling(bool active, u32 tiling); | ||||
|     void configRGBATransform(bool active); | ||||
|     void configPreConvertedRGBA(bool active); | ||||
|  | ||||
|     void setExternalBuffer(u8* external); | ||||
|     std::unique_ptr<u8[]> transferInternalBuffer(); | ||||
|  | ||||
|     bool invalid(); | ||||
|  | ||||
| protected: | ||||
|     u32 width; | ||||
|     u32 height; | ||||
|  | ||||
|     // passes | ||||
|     bool invalid_state = false; | ||||
|     bool morton = true; | ||||
|     u32 morton_pass_tiling = 8; | ||||
|     bool raw_RGBA = false; | ||||
|     bool preconverted = false; | ||||
|     bool disable_components = false; | ||||
|     u32 disable_components_mask = 0; | ||||
|  | ||||
|     u32 start_nibbles_size; | ||||
|     u32 expected_nibbles_size; | ||||
|     const u32 format_size = 8; | ||||
|  | ||||
|     u8* target_buffer;                     // Initial read buffer | ||||
|     u8* passing_buffer;                    // pointer aliasing: Used and modified by passes | ||||
|     std::unique_ptr<u8[]> internal_buffer; // used if no external buffer is provided | ||||
|     bool external_result_buffer = false; | ||||
|  | ||||
|     void init(bool decode); | ||||
|  | ||||
|     typedef Codec super; | ||||
|  | ||||
|     inline void decode_morton_pass(); | ||||
|     inline void encode_morton_pass(); | ||||
| }; | ||||
|  | ||||
| namespace CodecFactory { | ||||
| std::unique_ptr<Codec> build(Pica::Texture::Format format, u8* target, u32 width, u32 height); | ||||
| }; | ||||
|  | ||||
| } // Texture | ||||
|  | ||||
| } // Pica | ||||
							
								
								
									
										37
									
								
								src/video_core/texture/formats.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								src/video_core/texture/formats.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | ||||
| #pragma once | ||||
|  | ||||
| namespace Pica { | ||||
|  | ||||
| namespace Texture { | ||||
|  | ||||
| enum class Format { | ||||
|     // First 5 formats are shared between textures and color buffers | ||||
|     RGBA8 = 0, | ||||
|     RGB8 = 1, | ||||
|     RGB5A1 = 2, | ||||
|     RGB565 = 3, | ||||
|     RGBA4 = 4, | ||||
|  | ||||
|     // Texture-only formats | ||||
|     IA8 = 5, | ||||
|     RG8 = 6, | ||||
|     I8 = 7, | ||||
|     A8 = 8, | ||||
|     IA4 = 9, | ||||
|     I4 = 10, | ||||
|     A4 = 11, | ||||
|     ETC1 = 12, | ||||
|     ETC1A4 = 13, | ||||
|  | ||||
|     // Depth buffer-only formats | ||||
|     D16 = 14, | ||||
|     // gap | ||||
|     D24 = 16, | ||||
|     D24S8 = 17, | ||||
|  | ||||
|     Invalid = 255, | ||||
| }; | ||||
|  | ||||
| } // Texture | ||||
|  | ||||
| } // Pica | ||||
							
								
								
									
										10
									
								
								src/video_core/texture/internal/codecs.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								src/video_core/texture/internal/codecs.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | ||||
| #include "codecs.h" | ||||
| #include "etc1.h" | ||||
| #include "morton.h" | ||||
| #include "texture_utils.h" | ||||
|  | ||||
| // Decoders | ||||
| #include "decoders.cpp" | ||||
|  | ||||
| // Encoders | ||||
| #include "encoders.cpp" | ||||
							
								
								
									
										177
									
								
								src/video_core/texture/internal/codecs.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										177
									
								
								src/video_core/texture/internal/codecs.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,177 @@ | ||||
| #include <iostream> | ||||
| #include <memory> | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/texture/codec.h" | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| // each texture format codec | ||||
| class RGBACodec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     RGBACodec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 8; | ||||
| }; | ||||
|  | ||||
| class RGBCodec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     RGBCodec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 6; | ||||
| }; | ||||
|  | ||||
| class RGB5A1Codec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     RGB5A1Codec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 4; | ||||
| }; | ||||
|  | ||||
| class RGBA4Codec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     RGBA4Codec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 4; | ||||
| }; | ||||
|  | ||||
| class RGB565Codec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     RGB565Codec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 4; | ||||
| }; | ||||
|  | ||||
| class RG8Codec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     RG8Codec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 4; | ||||
| }; | ||||
|  | ||||
| class IA8Codec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     IA8Codec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 4; | ||||
| }; | ||||
|  | ||||
| class I8Codec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     I8Codec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 2; | ||||
| }; | ||||
|  | ||||
| class A8Codec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     A8Codec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 2; | ||||
| }; | ||||
|  | ||||
| class IA4Codec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     IA4Codec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 2; | ||||
| }; | ||||
|  | ||||
| class I4Codec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     I4Codec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 1; | ||||
| }; | ||||
|  | ||||
| class A4Codec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     A4Codec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 1; | ||||
| }; | ||||
|  | ||||
| class ETC1Codec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     ETC1Codec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 1; | ||||
| }; | ||||
|  | ||||
| class ETC1A4Codec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     ETC1A4Codec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 2; | ||||
| }; | ||||
|  | ||||
| class D16Codec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     D16Codec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 4; | ||||
| }; | ||||
|  | ||||
| class D24Codec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     D24Codec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 6; | ||||
| }; | ||||
|  | ||||
| class D24S8Codec : public Pica::Texture::Codec { | ||||
| public: | ||||
|     D24S8Codec(u8* target, u32 width, u32 height) : Pica::Texture::Codec(target, width, height) {} | ||||
|     void decode(); | ||||
|     void encode(); | ||||
|  | ||||
| protected: | ||||
|     const u32 format_size = 8; | ||||
| }; | ||||
							
								
								
									
										261
									
								
								src/video_core/texture/internal/decoders.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										261
									
								
								src/video_core/texture/internal/decoders.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,261 @@ | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| template <const Math::Vec4<u8> decode_func(const u8*)> | ||||
| inline void rgba_pass(u8* read, u8* write) { | ||||
|     u32 pixel = decode_func(read).ToRGBA(); | ||||
|     std::memcpy(write, &pixel, 4); | ||||
| } | ||||
|  | ||||
| } // Anonymous | ||||
|  | ||||
| void RGBACodec::decode() { | ||||
|     super::decode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&rgba_pass<&Color::DecodeRGBA8>, 8, 8, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
|  | ||||
| void RGBCodec::decode() { | ||||
|     super::decode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&rgba_pass<&Color::DecodeRGB8>, 6, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
|  | ||||
| void RGB5A1Codec::decode() { | ||||
|     super::decode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&rgba_pass<&Color::DecodeRGB5A1>, 4, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
|  | ||||
| void RGB565Codec::decode() { | ||||
|     super::decode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&rgba_pass<&Color::DecodeRGB565>, 4, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
|  | ||||
| void RGBA4Codec::decode() { | ||||
|     super::decode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&rgba_pass<&Color::DecodeRGBA4>, 4, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
|  | ||||
| void RG8Codec::decode() { | ||||
|     super::decode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&rgba_pass<&Color::DecodeRG8>, 4, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| inline u16 convert_nibbles(u8 nibbles) { | ||||
|     return ((u16)Color::Convert4To8((nibbles & 0xF0) >> 4) << 8) | | ||||
|            (u16)Color::Convert4To8((nibbles & 0x0F)); | ||||
| } | ||||
|  | ||||
| inline u32 build_luminance(u8 intensity, u8 alpha) { | ||||
|     return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity; | ||||
| } | ||||
|  | ||||
| inline void intensity_alpha_pass(u8* read, u8* write) { | ||||
|     alignas(4) u8 pixel[2]; | ||||
|     std::memcpy(pixel, read, 2); | ||||
|     u32 result = build_luminance(pixel[0], pixel[1]); | ||||
|     std::memcpy(write, &result, 4); | ||||
| } | ||||
|  | ||||
| inline void intensity_alpha_nibbles_pass(u8* read, u8* write) { | ||||
|     alignas(4) u8 pixel[2]; | ||||
|     std::memcpy(pixel, read, 1); | ||||
|     u16 tmp = convert_nibbles(pixel[0]); | ||||
|     std::memcpy(pixel, &tmp, 2); | ||||
|     u32 result = build_luminance(pixel[0], pixel[1]); | ||||
|     std::memcpy(write, &result, 4); | ||||
| } | ||||
|  | ||||
| inline void intensity_pass(u8* read, u8* write) { | ||||
|     alignas(4) u8 pixel[1]; | ||||
|     std::memcpy(pixel, read, 1); | ||||
|     u32 result = build_luminance(pixel[0], 255); | ||||
|     std::memcpy(write, &result, 4); | ||||
| } | ||||
|  | ||||
| inline void intensity_nibbles_pass(u8* read, u8* write) { | ||||
|     alignas(4) u8 pixel[2]; | ||||
|     std::memcpy(pixel, read, 1); | ||||
|     u16 tmp = convert_nibbles(pixel[0]); | ||||
|     std::memcpy(pixel, &tmp, 2); | ||||
|     u32 result = build_luminance(pixel[0], 255); | ||||
|     std::memcpy(write, &result, 4); | ||||
|     result = build_luminance(pixel[1], 255); | ||||
|     std::memcpy(write + 4, &result, 4); | ||||
| } | ||||
|  | ||||
| inline void alpha_pass(u8* read, u8* write) { | ||||
|     alignas(4) u8 pixel[1]; | ||||
|     std::memcpy(pixel, read, 1); | ||||
|     u32 result = build_luminance(0, pixel[0]); | ||||
|     std::memcpy(write, &result, 4); | ||||
| } | ||||
|  | ||||
| inline void alpha_nibbles_pass(u8* read, u8* write) { | ||||
|     alignas(4) u8 pixel[2]; | ||||
|     std::memcpy(pixel, read, 1); | ||||
|     u16 tmp = convert_nibbles(pixel[0]); | ||||
|     std::memcpy(pixel, &tmp, 2); | ||||
|     u32 result = build_luminance(0, pixel[0]); | ||||
|     std::memcpy(write, &result, 4); | ||||
|     result = build_luminance(0, pixel[1]); | ||||
|     std::memcpy(write + 4, &result, 4); | ||||
| } | ||||
|  | ||||
| } // Anonymous | ||||
|  | ||||
| void IA8Codec::decode() { | ||||
|     super::decode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&intensity_alpha_pass, 4, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
|  | ||||
| void IA4Codec::decode() { | ||||
|     super::decode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&intensity_alpha_nibbles_pass, 2, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
|  | ||||
| void I8Codec::decode() { | ||||
|     super::decode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&intensity_pass, 2, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
|  | ||||
| void I4Codec::decode() { | ||||
|     super::decode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&intensity_nibbles_pass, 1, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
|  | ||||
| void A8Codec::decode() { | ||||
|     super::decode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&alpha_pass, 2, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
|  | ||||
| void A4Codec::decode() { | ||||
|     super::decode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&alpha_nibbles_pass, 1, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
|  | ||||
| void ETC1Codec::decode() { | ||||
|     this->init(true); | ||||
|     ETC1(this->target_buffer, this->passing_buffer, this->width, this->height); | ||||
| } | ||||
|  | ||||
| void ETC1A4Codec::decode() { | ||||
|     this->init(true); | ||||
|     ETC1A4(this->target_buffer, this->passing_buffer, this->width, this->height); | ||||
| } | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| inline void expand_depth16_pass(u8* read, u8* write) { | ||||
|     alignas(4) u8 pixel[4]; | ||||
|     std::memcpy(pixel, read, 2); | ||||
|     pixel[2] = 255; | ||||
|     pixel[3] = 255; | ||||
|     std::memcpy(write, pixel, 4); | ||||
| } | ||||
|  | ||||
| inline void expand_depth24_pass(u8* read, u8* write) { | ||||
|     alignas(4) u8 pixel[4]; | ||||
|     std::memcpy(pixel, read, 3); | ||||
|     pixel[3] = 255; | ||||
|     std::memcpy(write, pixel, 4); | ||||
| } | ||||
|  | ||||
| inline void fix_stencil_pass(u8* read, u8* write) { | ||||
|     u32 pixel; | ||||
|     std::memcpy(&pixel, read, 4); | ||||
|     pixel = (pixel << 8) | (pixel >> 24); | ||||
|     std::memcpy(write, &pixel, 4); | ||||
| } | ||||
|  | ||||
| } // Anonymous | ||||
|  | ||||
| void D16Codec::decode() { | ||||
|     super::decode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&expand_depth16_pass, 4, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
|  | ||||
| void D24Codec::decode() { | ||||
|     super::decode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&expand_depth24_pass, 6, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
|  | ||||
| void D24S8Codec::decode() { | ||||
|     super::decode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&fix_stencil_pass, 8, 8, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
							
								
								
									
										109
									
								
								src/video_core/texture/internal/encoders.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								src/video_core/texture/internal/encoders.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,109 @@ | ||||
|  | ||||
| void RGBACodec::encode() { | ||||
|     super::encode(); | ||||
| } | ||||
|  | ||||
| void RGBCodec::encode() { | ||||
|     super::encode(); | ||||
| } | ||||
|  | ||||
| void RGB5A1Codec::encode() { | ||||
|     super::encode(); | ||||
| } | ||||
|  | ||||
| void RGB565Codec::encode() { | ||||
|     super::encode(); | ||||
| } | ||||
|  | ||||
| void RGBA4Codec::encode() { | ||||
|     super::encode(); | ||||
| } | ||||
|  | ||||
| void RG8Codec::encode() { | ||||
|     super::encode(); | ||||
| } | ||||
|  | ||||
| void IA8Codec::encode() { | ||||
|     super::encode(); | ||||
| } | ||||
|  | ||||
| void IA4Codec::encode() { | ||||
|     super::encode(); | ||||
| } | ||||
|  | ||||
| void I8Codec::encode() { | ||||
|     super::encode(); | ||||
| } | ||||
|  | ||||
| void I4Codec::encode() { | ||||
|     super::encode(); | ||||
| } | ||||
|  | ||||
| void A8Codec::encode() { | ||||
|     super::encode(); | ||||
| } | ||||
|  | ||||
| void A4Codec::encode() { | ||||
|     super::encode(); | ||||
| } | ||||
|  | ||||
| void ETC1Codec::encode() { | ||||
|     super::encode(); | ||||
| } | ||||
|  | ||||
| void ETC1A4Codec::encode() { | ||||
|     super::encode(); | ||||
| } | ||||
|  | ||||
| namespace Encode { | ||||
|  | ||||
| inline void contract_depth16_pass(u8* read, u8* write) { | ||||
|     alignas(4) u8 pixel[4]; | ||||
|     std::memcpy(pixel, read, 4); | ||||
|     std::memcpy(write, pixel, 2); | ||||
| } | ||||
|  | ||||
| inline void contract_depth24_pass(u8* read, u8* write) { | ||||
|     alignas(4) u8 pixel[4]; | ||||
|     std::memcpy(pixel, read, 4); | ||||
|     std::memcpy(write, pixel, 3); | ||||
| } | ||||
|  | ||||
| inline void fix_stencil_pass(u8* read, u8* write) { | ||||
|     u32 pixel; | ||||
|     std::memcpy(&pixel, read, 4); | ||||
|     pixel = (pixel >> 24) | (pixel << 8); | ||||
|     std::memcpy(write, &pixel, 4); | ||||
| } | ||||
|  | ||||
| } // Anonymous | ||||
|  | ||||
| void D16Codec::encode() { | ||||
|     super::encode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&Encode::contract_depth16_pass, 8, 4, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
|  | ||||
| void D24Codec::encode() { | ||||
|     super::encode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&Encode::contract_depth24_pass, 8, 6>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
|  | ||||
| void D24S8Codec::encode() { | ||||
|     super::encode(); | ||||
|     if (this->raw_RGBA) | ||||
|         image_pass<&Encode::fix_stencil_pass, 8, 8, 8>( | ||||
|             // clang-format off | ||||
|             this->passing_buffer, this->width, this->height | ||||
|             // clang-format on | ||||
|             ); | ||||
| } | ||||
							
								
								
									
										187
									
								
								src/video_core/texture/internal/etc1.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										187
									
								
								src/video_core/texture/internal/etc1.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,187 @@ | ||||
| #include <array> | ||||
| #include <cstring> | ||||
| #include <memory> | ||||
| #include <utility> | ||||
| #include "common/assert.h" | ||||
| #include "common/bit_field.h" | ||||
| #include "common/color.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/math_util.h" | ||||
| #include "common/swap.h" | ||||
| #include "common/vector_math.h" | ||||
| #include "etc1.h" | ||||
| #include "texture_utils.h" | ||||
|  | ||||
| constexpr std::array<u8[2], 8> etc1_modifier_table = {{ | ||||
|     {2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}, | ||||
| }}; | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| union ETC1Tile { | ||||
|     u64 raw; | ||||
|  | ||||
|     // Each of these two is a collection of 16 bits (one per lookup value) | ||||
|     BitField<0, 16, u64> table_subindexes; | ||||
|     BitField<16, 16, u64> negation_flags; | ||||
|  | ||||
|     unsigned GetTableSubIndex(unsigned index) const { | ||||
|         return (table_subindexes >> index) & 1; | ||||
|     } | ||||
|  | ||||
|     bool GetNegationFlag(unsigned index) const { | ||||
|         return ((negation_flags >> index) & 1) == 1; | ||||
|     } | ||||
|  | ||||
|     BitField<32, 1, u64> flip; | ||||
|     BitField<33, 1, u64> differential_mode; | ||||
|  | ||||
|     BitField<34, 3, u64> table_index_2; | ||||
|     BitField<37, 3, u64> table_index_1; | ||||
|  | ||||
|     union { | ||||
|         // delta value + base value | ||||
|         BitField<40, 3, s64> db; | ||||
|         BitField<43, 5, u64> b; | ||||
|  | ||||
|         BitField<48, 3, s64> dg; | ||||
|         BitField<51, 5, u64> g; | ||||
|  | ||||
|         BitField<56, 3, s64> dr; | ||||
|         BitField<59, 5, u64> r; | ||||
|     } differential; | ||||
|  | ||||
|     union { | ||||
|         BitField<40, 4, u64> b2; | ||||
|         BitField<44, 4, u64> b1; | ||||
|  | ||||
|         BitField<48, 4, u64> g2; | ||||
|         BitField<52, 4, u64> g1; | ||||
|  | ||||
|         BitField<56, 4, u64> r2; | ||||
|         BitField<60, 4, u64> r1; | ||||
|     } separate; | ||||
|  | ||||
|     const Math::Vec3<u8> GetRGB(u32 x, u32 y) const { | ||||
|         int texel = 4 * x + y; | ||||
|  | ||||
|         if (flip) | ||||
|             std::swap(x, y); | ||||
|  | ||||
|         // Lookup base value | ||||
|         Math::Vec3<int> ret; | ||||
|         if (differential_mode) { | ||||
|             ret.r() = static_cast<int>(differential.r); | ||||
|             ret.g() = static_cast<int>(differential.g); | ||||
|             ret.b() = static_cast<int>(differential.b); | ||||
|             if (x >= 2) { | ||||
|                 ret.r() += static_cast<int>(differential.dr); | ||||
|                 ret.g() += static_cast<int>(differential.dg); | ||||
|                 ret.b() += static_cast<int>(differential.db); | ||||
|             } | ||||
|             ret.r() = Color::Convert5To8(ret.r()); | ||||
|             ret.g() = Color::Convert5To8(ret.g()); | ||||
|             ret.b() = Color::Convert5To8(ret.b()); | ||||
|         } else { | ||||
|             if (x < 2) { | ||||
|                 ret.r() = Color::Convert4To8(static_cast<u8>(separate.r1)); | ||||
|                 ret.g() = Color::Convert4To8(static_cast<u8>(separate.g1)); | ||||
|                 ret.b() = Color::Convert4To8(static_cast<u8>(separate.b1)); | ||||
|             } else { | ||||
|                 ret.r() = Color::Convert4To8(static_cast<u8>(separate.r2)); | ||||
|                 ret.g() = Color::Convert4To8(static_cast<u8>(separate.g2)); | ||||
|                 ret.b() = Color::Convert4To8(static_cast<u8>(separate.b2)); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // Add modifier | ||||
|         unsigned table_index = | ||||
|             static_cast<int>((x < 2) ? table_index_1.Value() : table_index_2.Value()); | ||||
|  | ||||
|         int modifier = etc1_modifier_table[table_index][GetTableSubIndex(texel)]; | ||||
|         if (GetNegationFlag(texel)) | ||||
|             modifier *= -1; | ||||
|  | ||||
|         ret.r() = MathUtil::Clamp(ret.r() + modifier, 0, 255); | ||||
|         ret.g() = MathUtil::Clamp(ret.g() + modifier, 0, 255); | ||||
|         ret.b() = MathUtil::Clamp(ret.b() + modifier, 0, 255); | ||||
|  | ||||
|         return ret.Cast<u8>(); | ||||
|     } | ||||
| }; | ||||
|  | ||||
| } // anonymous namespace | ||||
|  | ||||
| inline void etc1_pass(u8* etc1_buffer, u8* linear_buffer, u32 x_blocks) { | ||||
|     const size_t line = 8 * 4; | ||||
|     alignas(64) u8 tmp[line * 8]; | ||||
|     for (u32 i = 0; i < 4; i++) { | ||||
|         ETC1Tile tile; | ||||
|         const size_t index = (i % 2) * (line / 2) + (i / 2) * line * 4; | ||||
|         std::memcpy(&tile.raw, &etc1_buffer[i * 8], 8); | ||||
|         for (u32 k = 0; k < 4; k++) { | ||||
|             for (u32 j = 0; j < 4; j++) { | ||||
|                 u32 rgba = (tile.GetRGB(j, k).ToRGB()) | 0xFF000000; | ||||
|                 std::memcpy(&tmp[k * line + j * 4 + index], &rgba, 4); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     tiling_pass<&decode, 8, 8>(linear_buffer, tmp, x_blocks); | ||||
| } | ||||
|  | ||||
| inline void etc1a4_pass(u8* etc1_buffer, u8* linear_buffer, u32 x_blocks) { | ||||
|     const size_t line = 8 * 4; | ||||
|     alignas(64) u8 tmp[line * 8]; | ||||
|     for (u32 i = 0; i < 4; i++) { | ||||
|         ETC1Tile tile; | ||||
|         u64 alpha_tile; | ||||
|         const size_t index = (i % 2) * (line / 2) + (i / 2) * line * 4; | ||||
|         std::memcpy(&alpha_tile, &etc1_buffer[i * 16], 8); | ||||
|         std::memcpy(&tile.raw, &etc1_buffer[i * 16 + 8], 8); | ||||
|         for (u32 k = 0; k < 4; k++) { | ||||
|             for (u32 j = 0; j < 4; j++) { | ||||
|                 u32 alpha = (alpha_tile >> (4 * (j * 4 + k))) & 0x0F; | ||||
|                 alpha |= (alpha << 4); | ||||
|                 u32 rgba = tile.GetRGB(j, k).ToRGB() | (alpha << 24); | ||||
|                 std::memcpy(&tmp[k * line + j * 4 + index], &rgba, 4); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     tiling_pass<&decode, 8, 8>(linear_buffer, tmp, x_blocks); | ||||
| } | ||||
|  | ||||
| void ETC1A4(u8* etc1_buffer, u8* matrix_buffer, u32 width, u32 height) { | ||||
|     const u32 x_blocks = (width / 8); | ||||
|     const u32 y_blocks = (height / 8); | ||||
|     const size_t line_size = 8 * 4; | ||||
|     const size_t tile_size = 8 * 8; | ||||
|     const size_t stride_size = width * line_size; | ||||
|     matrix_buffer = matrix_buffer + (height * width * 4) - stride_size; | ||||
|     for (u32 y = 0; y < y_blocks; y++) { | ||||
|         u8* linear_buffer = matrix_buffer; | ||||
|         for (u32 x = 0; x != x_blocks; x++) { | ||||
|             etc1a4_pass(etc1_buffer, linear_buffer, x_blocks); | ||||
|             linear_buffer += line_size; | ||||
|             etc1_buffer += tile_size; | ||||
|         } | ||||
|         matrix_buffer -= stride_size; | ||||
|     } | ||||
| } | ||||
|  | ||||
| void ETC1(u8* etc1_buffer, u8* matrix_buffer, u32 width, u32 height) { | ||||
|     const u32 x_blocks = (width / 8); | ||||
|     const u32 y_blocks = (height / 8); | ||||
|     const size_t line_size = 8 * 4; | ||||
|     const size_t tile_size = 8 * 8 / 2; | ||||
|     const size_t stride_size = width * line_size; | ||||
|     matrix_buffer = matrix_buffer + (height * width * 4) - stride_size; | ||||
|     for (u32 y = 0; y < y_blocks; y++) { | ||||
|         u8* linear_buffer = matrix_buffer; | ||||
|         for (u32 x = 0; x != x_blocks; x++) { | ||||
|             etc1_pass(etc1_buffer, linear_buffer, x_blocks); | ||||
|             linear_buffer += line_size; | ||||
|             etc1_buffer += tile_size; | ||||
|         } | ||||
|         matrix_buffer -= stride_size; | ||||
|     } | ||||
| } | ||||
							
								
								
									
										7
									
								
								src/video_core/texture/internal/etc1.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								src/video_core/texture/internal/etc1.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,7 @@ | ||||
|  | ||||
| #include "common/common_types.h" | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| void ETC1(u8* etc1_buffer, u8* matrix_buffer, u32 width, u32 height); | ||||
| void ETC1A4(u8* etc1_buffer, u8* matrix_buffer, u32 width, u32 height); | ||||
							
								
								
									
										278
									
								
								src/video_core/texture/internal/morton.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										278
									
								
								src/video_core/texture/internal/morton.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,278 @@ | ||||
| #include <cstring> | ||||
| #include <memory> | ||||
| #include <utility> | ||||
| #include "common/common_types.h" | ||||
| #include "morton.h" | ||||
| #include "texture_utils.h" | ||||
|  | ||||
| /////////////////////////////////////////////////////////////////////////////// | ||||
| // Optimizations | ||||
| ////////////////////////////////////////////////////////////////////////////// | ||||
| #ifdef _MSC_VER | ||||
| #pragma inline_recursion(on) | ||||
| // Normaly set to 16 by default, the best balance seems to be on 8 for this module | ||||
| #pragma inline_depth(8) | ||||
| // favor fast code over small code. | ||||
| #pragma optimize("t", on) | ||||
| #pragma intrinsic(memcpy) | ||||
| #elif defined(CLANG_OR_GCC) | ||||
| // The next 3 will swizle memory copying to help find the best sse/avx shuffling | ||||
| // in case it's possible. Compilation tests have proven effective use of these | ||||
| // flags on gcc and clang. | ||||
| #pragma GCC optimize("-fpredictive-commoning") | ||||
| #pragma GCC optimize("-ftree-loop-distribute-patterns") | ||||
| #pragma GCC optimize("-ftree-vectorize") | ||||
| // limit inlining | ||||
| #pragma GCC option("--param max-inline-insns-single=128") | ||||
|  | ||||
| // The beauty of these compiler options is that they generate better code than | ||||
| // hand written intrinsics, since inline expanding memeory transfers can be pattern | ||||
| // matched with vector instructions available in the target. | ||||
| #endif | ||||
|  | ||||
| #pragma region Z_Order | ||||
| ///////////////////////////////////////////////////////////////////////////// | ||||
| //          Z-Order: | ||||
| // | ||||
| //                    0-->1 | ||||
| //                      / | ||||
| //                    2-->3 | ||||
| // | ||||
| // for more information look at: https://en.wikipedia.org/wiki/Z-order_curve | ||||
| ///////////////////////////////////////////////////////////////////////////// | ||||
| #define TOP_LEFT 0 | ||||
| #define TOP_RIGHT 1 | ||||
| #define BOTTOM_LEFT 2 | ||||
| #define BOTTOM_RIGHT 3 | ||||
|  | ||||
| constexpr u32 isRight(u32 block_index) { | ||||
|     return (block_index % 2); | ||||
| } | ||||
|  | ||||
| constexpr u32 isBottom(u32 block_index) { | ||||
|     return (block_index / 2); | ||||
| } | ||||
|  | ||||
| template <void codec(u8*, u8*, size_t), size_t nibbles, u32 blocks, size_t block_size> | ||||
| inline void swizzle_block(u8*& morton_block, u8* linear_block); | ||||
|  | ||||
| template <void codec(u8*, u8*, size_t), size_t nibbles, u32 block_index, u32 blocks, | ||||
|           size_t block_size> | ||||
| inline void swizzle_block_aux(u8*& morton_block, u8* linear_block) { | ||||
|     // move the linear_block pointer to the appropiate block | ||||
|     const size_t right = isRight(block_index) * (blocks * nibbles) / 2; | ||||
|     const size_t down = isBottom(block_index) * block_size; | ||||
|     u8* new_linear = linear_block + right + down; | ||||
|     swizzle_block<codec, nibbles, blocks, block_size>(morton_block, new_linear); | ||||
| } | ||||
|  | ||||
| template <void codec(u8*, u8*, size_t), size_t nibbles, u32 blocks, size_t block_size> | ||||
| inline void swizzle_block(u8*& morton_block, u8* linear_block) { | ||||
|     const size_t new_block_size = block_size / 2; | ||||
|     if (blocks <= 2) { | ||||
|         // We handle 2*2 blocks on z-order | ||||
|         const size_t read_size = nibbles; // just for clearness. It's the same amount | ||||
|         // TOP_LEFT & TOP_RIGHT | ||||
|         codec(morton_block, linear_block, read_size); | ||||
|         morton_block += read_size; | ||||
|         // BOTTOM_LEFT & BOTTOM_RIGHT | ||||
|         codec(morton_block, linear_block + new_block_size, read_size); | ||||
|         morton_block += read_size; | ||||
|     } else { | ||||
|         // we divide the block into 4 blocks in z-order corecursively | ||||
|         // until we have 2x2 blocks. | ||||
|         const u32 subdivide = blocks / 2; | ||||
|         swizzle_block_aux<codec, nibbles, TOP_LEFT, subdivide, new_block_size>(morton_block, | ||||
|                                                                                linear_block); | ||||
|         swizzle_block_aux<codec, nibbles, TOP_RIGHT, subdivide, new_block_size>(morton_block, | ||||
|                                                                                 linear_block); | ||||
|         swizzle_block_aux<codec, nibbles, BOTTOM_LEFT, subdivide, new_block_size>(morton_block, | ||||
|                                                                                   linear_block); | ||||
|         swizzle_block_aux<codec, nibbles, BOTTOM_RIGHT, subdivide, new_block_size>(morton_block, | ||||
|                                                                                    linear_block); | ||||
|     } | ||||
| } | ||||
|  | ||||
| template <void codec(u8*, u8*, size_t), size_t nibbles, size_t lines_per_block> | ||||
| void swizzle_pass(u8* morton_block, u8* linear_block) { | ||||
|     const size_t block_size = (lines_per_block * lines_per_block * nibbles) / 2; | ||||
|     swizzle_block<codec, nibbles, lines_per_block, block_size>(morton_block, linear_block); | ||||
| } | ||||
| #pragma endregion Z_Order | ||||
|  | ||||
| template <size_t nibbles, size_t lines_per_block> | ||||
| void encode_pass(u8* morton_buffer, u8* linear_buffer, u32 x_blocks) { | ||||
|     const u32 tile_size = (lines_per_block * lines_per_block * nibbles) / 2; | ||||
|     alignas(64) u8 tmp[tile_size]; | ||||
|     tiling_pass<&encode, nibbles, lines_per_block>(linear_buffer, tmp, x_blocks); | ||||
|     swizzle_pass<&encode, nibbles, lines_per_block>(morton_buffer, tmp); | ||||
| } | ||||
|  | ||||
| template <size_t nibbles, size_t lines_per_block> | ||||
| void decode_pass(u8* morton_buffer, u8* linear_buffer, u32 x_blocks) { | ||||
|     const u32 tile_size = (lines_per_block * lines_per_block * nibbles) / 2; | ||||
|     alignas(64) u8 tmp[tile_size]; | ||||
|     swizzle_pass<&decode, nibbles, lines_per_block>(morton_buffer, tmp); | ||||
|     tiling_pass<&decode, nibbles, lines_per_block>(linear_buffer, tmp, x_blocks); | ||||
| } | ||||
|  | ||||
| template <void codec(u8*, u8*, u32), size_t nibbles, size_t lines_per_block> | ||||
| void morton_pass(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height) { | ||||
|     const u32 x_blocks = (width / lines_per_block); | ||||
|     const u32 y_blocks = (height / lines_per_block); | ||||
|     const size_t line_size = (lines_per_block * nibbles) / 2; | ||||
|     const size_t tile_size = lines_per_block * line_size; | ||||
|     const size_t stride_size = width * line_size; | ||||
|     matrix_buffer = matrix_buffer + ((height * width * nibbles) / 2) - stride_size; | ||||
|     for (u32 y = 0; y < y_blocks; y++) { | ||||
|         u8* linear_buffer = matrix_buffer; | ||||
|         for (u32 x = 0; x != x_blocks; x++) { | ||||
|             codec(morton_buffer, linear_buffer, x_blocks); | ||||
|             linear_buffer += line_size; | ||||
|             morton_buffer += tile_size; | ||||
|         } | ||||
|         matrix_buffer -= stride_size; | ||||
|     } | ||||
| } | ||||
|  | ||||
| namespace Decoders { | ||||
|  | ||||
| bool Morton_8x8(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u32 bpp) { | ||||
|     switch (bpp) { | ||||
|     case 4: { | ||||
|         morton_pass<&decode_pass<1, 8>, 1, 8>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     case 8: { | ||||
|         morton_pass<&decode_pass<2, 8>, 2, 8>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     case 16: { | ||||
|         morton_pass<&decode_pass<4, 8>, 4, 8>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     case 24: { | ||||
|         morton_pass<&decode_pass<6, 8>, 6, 8>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     case 32: { | ||||
|         morton_pass<&decode_pass<8, 8>, 8, 8>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     default: { | ||||
|         return false; | ||||
|         break; | ||||
|     } | ||||
|     } | ||||
| } | ||||
|  | ||||
| bool Morton_32x32(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u32 bpp) { | ||||
|     switch (bpp) { | ||||
|     case 4: { | ||||
|         morton_pass<&decode_pass<1, 32>, 1, 32>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     case 8: { | ||||
|         morton_pass<&decode_pass<2, 32>, 2, 32>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     case 16: { | ||||
|         morton_pass<&decode_pass<4, 32>, 4, 32>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     case 24: { | ||||
|         morton_pass<&decode_pass<6, 32>, 6, 32>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     case 32: { | ||||
|         morton_pass<&decode_pass<8, 32>, 8, 32>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     default: { | ||||
|         return false; | ||||
|         break; | ||||
|     } | ||||
|     } | ||||
| } | ||||
| } | ||||
|  | ||||
| namespace Encoders { | ||||
|  | ||||
| bool Morton_8x8(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u32 bpp) { | ||||
|     switch (bpp) { | ||||
|     case 4: { | ||||
|         morton_pass<&encode_pass<1, 8>, 1, 8>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     case 8: { | ||||
|         morton_pass<&encode_pass<2, 8>, 2, 8>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     case 16: { | ||||
|         morton_pass<&encode_pass<4, 8>, 4, 8>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     case 24: { | ||||
|         morton_pass<&encode_pass<6, 8>, 6, 8>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     case 32: { | ||||
|         morton_pass<&encode_pass<8, 8>, 8, 8>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     default: { | ||||
|         return false; | ||||
|         break; | ||||
|     } | ||||
|     } | ||||
| } | ||||
|  | ||||
| bool Morton_32x32(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u32 bpp) { | ||||
|     switch (bpp) { | ||||
|     case 4: { | ||||
|         morton_pass<&encode_pass<1, 32>, 1, 32>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     case 8: { | ||||
|         morton_pass<&encode_pass<2, 32>, 2, 32>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     case 16: { | ||||
|         morton_pass<&encode_pass<4, 32>, 4, 32>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     case 24: { | ||||
|         morton_pass<&encode_pass<6, 32>, 6, 32>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     case 32: { | ||||
|         morton_pass<&encode_pass<8, 32>, 8, 32>(morton_buffer, matrix_buffer, width, height); | ||||
|         return true; | ||||
|         break; | ||||
|     } | ||||
|     default: { | ||||
|         return false; | ||||
|         break; | ||||
|     } | ||||
|     } | ||||
| } | ||||
| } | ||||
							
								
								
									
										15
									
								
								src/video_core/texture/internal/morton.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								src/video_core/texture/internal/morton.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | ||||
| #include "common/common_types.h" | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| enum class MortonPass { Tile8x8, Tile32x32 }; | ||||
|  | ||||
| namespace Decoders { | ||||
| bool Morton_8x8(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u32 bpp); | ||||
| bool Morton_32x32(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u32 bpp); | ||||
| } | ||||
|  | ||||
| namespace Encoders { | ||||
| bool Morton_8x8(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u32 bpp); | ||||
| bool Morton_32x32(u8* morton_buffer, u8* matrix_buffer, u32 width, u32 height, u32 bpp); | ||||
| } | ||||
							
								
								
									
										98
									
								
								src/video_core/texture/internal/texture_utils.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								src/video_core/texture/internal/texture_utils.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,98 @@ | ||||
| #include <array> | ||||
| #include <cstring> | ||||
| #include <memory> | ||||
| #include <utility> | ||||
| #include "common/color.h" | ||||
| #include "common/swap.h" | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #if ((defined(__clang__) || defined(__GNUC__)) && !defined(__INTEL_COMPILER)) | ||||
| #define CLANG_OR_GCC | ||||
| #endif | ||||
|  | ||||
| /////////////////////////////////////////////////////////////////////////////// | ||||
| // Optimizations | ||||
| ////////////////////////////////////////////////////////////////////////////// | ||||
| #ifdef _MSC_VER | ||||
| #pragma inline_recursion(on) | ||||
| #elif defined(CLANG_OR_GCC) | ||||
| #pragma GCC optimize("-fpeel-loops") | ||||
| #pragma GCC optimize("-fpredictive-commoning") | ||||
| #pragma GCC optimize("-ftree-loop-distribute-patterns") | ||||
| #pragma GCC optimize("-ftree-vectorize") | ||||
| #endif | ||||
|  | ||||
| // @param read_size is the amount of bytes each pixel takes | ||||
| inline void decode(u8* morton_pointer, u8* matrix_pointer, size_t read_size) { | ||||
|     std::memcpy(matrix_pointer, morton_pointer, read_size); | ||||
| } | ||||
|  | ||||
| // @param read_size is the amount of bytes each pixel takes | ||||
| inline void encode(u8* morton_pointer, u8* matrix_pointer, size_t read_size) { | ||||
|     std::memcpy(morton_pointer, matrix_pointer, read_size); | ||||
| } | ||||
|  | ||||
| // Pre: width % 8 == 0 && height % 8 == 0 | ||||
| template <void pass(u8*, u8*), u32 read_size, u32 write_size, u32 tuning = 2> | ||||
| inline void image_pass_aux_rev(u8* target, u32 width, u32 height) { | ||||
|     const u32 nibbles = (read_size < 2) & 0x01; | ||||
|     const u32 pixels = width * height / (1 + nibbles); | ||||
|     const u32 read_size_amortized = read_size / (2 - nibbles); | ||||
|     const u32 write_size_amortized = write_size / (2 - nibbles); | ||||
|     const u32 sub_iters = tuning; | ||||
|     const u32 iters = pixels / sub_iters; | ||||
|     u8* read = target + (pixels - 1) * read_size_amortized; | ||||
|     u8* write = target + (pixels - 1) * write_size_amortized; | ||||
|     for (u32 i = 0; i < iters; i++) { | ||||
|         // Sub_iterations allow the compiler to know a set of inner | ||||
|         // iterations within compile time, thus it can do better optimizations. | ||||
|         for (u32 k = 0; k < sub_iters; k++) { | ||||
|             pass(read, write); | ||||
|             read -= read_size_amortized; | ||||
|             write -= write_size_amortized; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Pre: width % 8 == 0 && height % 8 == 0 | ||||
| template <void pass(u8*, u8*), u32 read_size, u32 write_size, u32 tuning = 2> | ||||
| inline void image_pass_aux(u8* target, u32 width, u32 height) { | ||||
|     const u32 nibbles = (write_size < 2) & 0x01; | ||||
|     const u32 pixels = width * height / (1 + nibbles); | ||||
|     const u32 read_size_amortized = read_size / (2 - nibbles); | ||||
|     const u32 write_size_amortized = write_size / (2 - nibbles); | ||||
|     const u32 sub_iters = tuning; | ||||
|     const u32 iters = pixels / sub_iters; | ||||
|     u8* read = target; | ||||
|     u8* write = target; | ||||
|     for (u32 i = 0; i < iters; i++) { | ||||
|         // Sub_iterations allow the compiler to know a set of inner | ||||
|         // iterations within compile time, thus it can do better optimizations. | ||||
|         for (u32 k = 0; k < sub_iters; k++) { | ||||
|             pass(read, write); | ||||
|             read += read_size_amortized; | ||||
|             write += write_size_amortized; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| template <void pass(u8*, u8*), u32 read_size, u32 write_size, u32 tuning = 2> | ||||
| inline void image_pass(u8* target, u32 width, u32 height) { | ||||
|     if (read_size > write_size) | ||||
|         image_pass_aux<pass, read_size, write_size, tuning>; | ||||
|     else | ||||
|         image_pass_aux_rev<pass, read_size, write_size, tuning>; | ||||
| } | ||||
|  | ||||
| template <void codec(u8*, u8*, size_t), size_t nibbles, size_t lines_per_block> | ||||
| void tiling_pass(u8* linear, u8* tiled, u32 x_blocks) { | ||||
|     const size_t tiled_line_size = (lines_per_block * nibbles) / 2; | ||||
|     const size_t row_length = x_blocks * tiled_line_size; | ||||
|     for (u32 i = 0; i < lines_per_block; i++) { | ||||
|         const u32 k = (lines_per_block - 1 - i); | ||||
|         const size_t tiled_index = i * tiled_line_size; | ||||
|         const size_t linear_index = k * row_length; | ||||
|         codec(tiled + tiled_index, linear + linear_index, tiled_line_size); | ||||
|     } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user
	 Fernando Sahmkow
					Fernando Sahmkow