Merge pull request #793 from neobrain/stencil
Pica: Implement stencil testing.
This commit is contained in:
		| @@ -55,7 +55,9 @@ GraphicsFramebufferWidget::GraphicsFramebufferWidget(std::shared_ptr<Pica::Debug | ||||
|     framebuffer_format_control->addItem(tr("RGBA4")); | ||||
|     framebuffer_format_control->addItem(tr("D16")); | ||||
|     framebuffer_format_control->addItem(tr("D24")); | ||||
|     framebuffer_format_control->addItem(tr("D24S8")); | ||||
|     framebuffer_format_control->addItem(tr("D24X8")); | ||||
|     framebuffer_format_control->addItem(tr("X24S8")); | ||||
|     framebuffer_format_control->addItem(tr("(unknown)")); | ||||
|  | ||||
|     // TODO: This QLabel should shrink the image to the available space rather than just expanding... | ||||
|     framebuffer_picture_label = new QLabel; | ||||
| @@ -184,8 +186,32 @@ void GraphicsFramebufferWidget::OnUpdate() | ||||
|         framebuffer_address = framebuffer.GetColorBufferPhysicalAddress(); | ||||
|         framebuffer_width = framebuffer.GetWidth(); | ||||
|         framebuffer_height = framebuffer.GetHeight(); | ||||
|         // TODO: It's unknown how this format is actually specified | ||||
|         framebuffer_format = Format::RGBA8; | ||||
|  | ||||
|         switch (framebuffer.color_format) { | ||||
|         case Pica::Regs::ColorFormat::RGBA8: | ||||
|             framebuffer_format = Format::RGBA8; | ||||
|             break; | ||||
|  | ||||
|         case Pica::Regs::ColorFormat::RGB8: | ||||
|             framebuffer_format = Format::RGB8; | ||||
|             break; | ||||
|  | ||||
|         case Pica::Regs::ColorFormat::RGB5A1: | ||||
|             framebuffer_format = Format::RGB5A1; | ||||
|             break; | ||||
|  | ||||
|         case Pica::Regs::ColorFormat::RGB565: | ||||
|             framebuffer_format = Format::RGB565; | ||||
|             break; | ||||
|  | ||||
|         case Pica::Regs::ColorFormat::RGBA4: | ||||
|             framebuffer_format = Format::RGBA4; | ||||
|             break; | ||||
|  | ||||
|         default: | ||||
|             framebuffer_format = Format::Unknown; | ||||
|             break; | ||||
|         } | ||||
|  | ||||
|         break; | ||||
|     } | ||||
| @@ -197,7 +223,24 @@ void GraphicsFramebufferWidget::OnUpdate() | ||||
|         framebuffer_address = framebuffer.GetDepthBufferPhysicalAddress(); | ||||
|         framebuffer_width = framebuffer.GetWidth(); | ||||
|         framebuffer_height = framebuffer.GetHeight(); | ||||
|         framebuffer_format = Format::D16; | ||||
|  | ||||
|         switch (framebuffer.depth_format) { | ||||
|         case Pica::Regs::DepthFormat::D16: | ||||
|             framebuffer_format = Format::D16; | ||||
|             break; | ||||
|  | ||||
|         case Pica::Regs::DepthFormat::D24: | ||||
|             framebuffer_format = Format::D24; | ||||
|             break; | ||||
|  | ||||
|         case Pica::Regs::DepthFormat::D24S8: | ||||
|             framebuffer_format = Format::D24X8; | ||||
|             break; | ||||
|  | ||||
|         default: | ||||
|             framebuffer_format = Format::Unknown; | ||||
|             break; | ||||
|         } | ||||
|  | ||||
|         break; | ||||
|     } | ||||
| @@ -258,7 +301,7 @@ void GraphicsFramebufferWidget::OnUpdate() | ||||
|                 color.b() = (data >> 16) & 0xFF; | ||||
|                 break; | ||||
|             } | ||||
|             case Format::D24S8: | ||||
|             case Format::D24X8: | ||||
|             { | ||||
|                 Math::Vec2<u32> data = Color::DecodeD24S8(pixel); | ||||
|                 color.r() = data.x & 0xFF; | ||||
| @@ -266,6 +309,12 @@ void GraphicsFramebufferWidget::OnUpdate() | ||||
|                 color.b() = (data.x >> 16) & 0xFF; | ||||
|                 break; | ||||
|             } | ||||
|             case Format::X24S8: | ||||
|             { | ||||
|                 Math::Vec2<u32> data = Color::DecodeD24S8(pixel); | ||||
|                 color.r() = color.g() = color.b() = data.y; | ||||
|                 break; | ||||
|             } | ||||
|             default: | ||||
|                 qDebug() << "Unknown fb color format " << static_cast<int>(framebuffer_format); | ||||
|                 break; | ||||
| @@ -286,7 +335,8 @@ void GraphicsFramebufferWidget::OnUpdate() | ||||
| u32 GraphicsFramebufferWidget::BytesPerPixel(GraphicsFramebufferWidget::Format format) { | ||||
|     switch (format) { | ||||
|         case Format::RGBA8: | ||||
|         case Format::D24S8: | ||||
|         case Format::D24X8: | ||||
|         case Format::X24S8: | ||||
|             return 4; | ||||
|         case Format::RGB8: | ||||
|         case Format::D24: | ||||
|   | ||||
| @@ -35,7 +35,9 @@ class GraphicsFramebufferWidget : public BreakPointObserverDock { | ||||
|         RGBA4    = 4, | ||||
|         D16      = 5, | ||||
|         D24      = 6, | ||||
|         D24S8    = 7 | ||||
|         D24X8    = 7, | ||||
|         X24S8    = 8, | ||||
|         Unknown  = 9 | ||||
|     }; | ||||
|  | ||||
|     static u32 BytesPerPixel(Format format); | ||||
|   | ||||
| @@ -208,7 +208,32 @@ inline void EncodeD24(u32 value, u8* bytes) { | ||||
|  * @param bytes Pointer where to store the encoded value | ||||
|  */ | ||||
| inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) { | ||||
|     *reinterpret_cast<u32_le*>(bytes) = (stencil << 24) | depth; | ||||
|     bytes[0] = depth & 0xFF; | ||||
|     bytes[1] = (depth >> 8) & 0xFF; | ||||
|     bytes[2] = (depth >> 16) & 0xFF; | ||||
|     bytes[3] = stencil; | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * Encode a 24 bit depth value as D24X8 format (32 bits per pixel with 8 bits unused) | ||||
|  * @param depth 24 bit source depth value to encode | ||||
|  * @param bytes Pointer where to store the encoded value | ||||
|  * @note unused bits will not be modified | ||||
|  */ | ||||
| inline void EncodeD24X8(u32 depth, u8* bytes) { | ||||
|     bytes[0] = depth & 0xFF; | ||||
|     bytes[1] = (depth >> 8) & 0xFF; | ||||
|     bytes[2] = (depth >> 16) & 0xFF; | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * Encode an 8 bit stencil value as X24S8 format (32 bits per pixel with 24 bits unused) | ||||
|  * @param stencil 8 bit source stencil value to encode | ||||
|  * @param bytes Pointer where to store the encoded value | ||||
|  * @note unused bits will not be modified | ||||
|  */ | ||||
| inline void EncodeX24S8(u8 stencil, u8* bytes) { | ||||
|     bytes[3] = stencil; | ||||
| } | ||||
|  | ||||
| } // namespace | ||||
|   | ||||
| @@ -420,6 +420,11 @@ struct Regs { | ||||
|         GreaterThanOrEqual = 7, | ||||
|     }; | ||||
|  | ||||
|     enum class StencilAction : u32 { | ||||
|         Keep = 0, | ||||
|         Xor  = 5, | ||||
|     }; | ||||
|  | ||||
|     struct { | ||||
|         union { | ||||
|             // If false, logic blending is used | ||||
| @@ -454,15 +459,35 @@ struct Regs { | ||||
|             BitField< 8, 8, u32> ref; | ||||
|         } alpha_test; | ||||
|  | ||||
|         union { | ||||
|             BitField< 0, 1, u32> stencil_test_enable; | ||||
|             BitField< 4, 3, CompareFunc> stencil_test_func; | ||||
|             BitField< 8, 8, u32> stencil_replacement_value; | ||||
|             BitField<16, 8, u32> stencil_reference_value; | ||||
|             BitField<24, 8, u32> stencil_mask; | ||||
|         } stencil_test; | ||||
|         struct { | ||||
|             union { | ||||
|                 // If true, enable stencil testing | ||||
|                 BitField< 0, 1, u32> enable; | ||||
|  | ||||
|         INSERT_PADDING_WORDS(0x1); | ||||
|                 // Comparison operation for stencil testing | ||||
|                 BitField< 4, 3, CompareFunc> func; | ||||
|  | ||||
|                 // Value to calculate the new stencil value from | ||||
|                 BitField< 8, 8, u32> replacement_value; | ||||
|  | ||||
|                 // Value to compare against for stencil testing | ||||
|                 BitField<16, 8, u32> reference_value; | ||||
|  | ||||
|                 // Mask to apply on stencil test inputs | ||||
|                 BitField<24, 8, u32> mask; | ||||
|             }; | ||||
|  | ||||
|             union { | ||||
|                 // Action to perform when the stencil test fails | ||||
|                 BitField< 0, 3, StencilAction> action_stencil_fail; | ||||
|  | ||||
|                 // Action to perform when stencil testing passed but depth testing fails | ||||
|                 BitField< 4, 3, StencilAction> action_depth_fail; | ||||
|  | ||||
|                 // Action to perform when both stencil and depth testing pass | ||||
|                 BitField< 8, 3, StencilAction> action_depth_pass; | ||||
|             }; | ||||
|         } stencil_test; | ||||
|  | ||||
|         union { | ||||
|             BitField< 0, 1, u32> depth_test_enable; | ||||
| @@ -512,7 +537,7 @@ struct Regs { | ||||
|     struct { | ||||
|         INSERT_PADDING_WORDS(0x6); | ||||
|  | ||||
|         DepthFormat depth_format; | ||||
|         DepthFormat depth_format; // TODO: Should be a BitField! | ||||
|         BitField<16, 3, ColorFormat> color_format; | ||||
|  | ||||
|         INSERT_PADDING_WORDS(0x4); | ||||
|   | ||||
| @@ -126,6 +126,30 @@ static u32 GetDepth(int x, int y) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| static u8 GetStencil(int x, int y) { | ||||
|     const auto& framebuffer = g_state.regs.framebuffer; | ||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||
|     u8* depth_buffer = Memory::GetPhysicalPointer(addr); | ||||
|  | ||||
|     y = framebuffer.height - y; | ||||
|  | ||||
|     const u32 coarse_y = y & ~7; | ||||
|     u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format); | ||||
|     u32 stride = framebuffer.width * bytes_per_pixel; | ||||
|  | ||||
|     u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||
|     u8* src_pixel = depth_buffer + src_offset; | ||||
|  | ||||
|     switch (framebuffer.depth_format) { | ||||
|         case Regs::DepthFormat::D24S8: | ||||
|             return Color::DecodeD24S8(src_pixel).y; | ||||
|  | ||||
|         default: | ||||
|             LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format); | ||||
|             return 0; | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void SetDepth(int x, int y, u32 value) { | ||||
|     const auto& framebuffer = g_state.regs.framebuffer; | ||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||
| @@ -144,13 +168,15 @@ static void SetDepth(int x, int y, u32 value) { | ||||
|         case Regs::DepthFormat::D16: | ||||
|             Color::EncodeD16(value, dst_pixel); | ||||
|             break; | ||||
|  | ||||
|         case Regs::DepthFormat::D24: | ||||
|             Color::EncodeD24(value, dst_pixel); | ||||
|             break; | ||||
|  | ||||
|         case Regs::DepthFormat::D24S8: | ||||
|             // TODO(Subv): Implement the stencil buffer | ||||
|             Color::EncodeD24S8(value, 0, dst_pixel); | ||||
|             Color::EncodeD24X8(value, dst_pixel); | ||||
|             break; | ||||
|  | ||||
|         default: | ||||
|             LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | ||||
|             UNIMPLEMENTED(); | ||||
| @@ -158,6 +184,53 @@ static void SetDepth(int x, int y, u32 value) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void SetStencil(int x, int y, u8 value) { | ||||
|     const auto& framebuffer = g_state.regs.framebuffer; | ||||
|     const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||||
|     u8* depth_buffer = Memory::GetPhysicalPointer(addr); | ||||
|  | ||||
|     y = framebuffer.height - y; | ||||
|  | ||||
|     const u32 coarse_y = y & ~7; | ||||
|     u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format); | ||||
|     u32 stride = framebuffer.width * bytes_per_pixel; | ||||
|  | ||||
|     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | ||||
|     u8* dst_pixel = depth_buffer + dst_offset; | ||||
|  | ||||
|     switch (framebuffer.depth_format) { | ||||
|         case Pica::Regs::DepthFormat::D16: | ||||
|         case Pica::Regs::DepthFormat::D24: | ||||
|             // Nothing to do | ||||
|             break; | ||||
|  | ||||
|         case Pica::Regs::DepthFormat::D24S8: | ||||
|             Color::EncodeX24S8(value, dst_pixel); | ||||
|             break; | ||||
|  | ||||
|         default: | ||||
|             LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | ||||
|             UNIMPLEMENTED(); | ||||
|             break; | ||||
|     } | ||||
| } | ||||
|  | ||||
| // TODO: Should the stencil mask be applied to the "dest" or "ref" operands? Most likely not! | ||||
| static u8 PerformStencilAction(Regs::StencilAction action, u8 dest, u8 ref) { | ||||
|     switch (action) { | ||||
|     case Regs::StencilAction::Keep: | ||||
|         return dest; | ||||
|  | ||||
|     case Regs::StencilAction::Xor: | ||||
|         return dest ^ ref; | ||||
|  | ||||
|     default: | ||||
|         LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action); | ||||
|         UNIMPLEMENTED(); | ||||
|         return 0; | ||||
|     } | ||||
| } | ||||
|  | ||||
| // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values | ||||
| struct Fix12P4 { | ||||
|     Fix12P4() {} | ||||
| @@ -276,6 +349,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||||
|     auto textures = regs.GetTextures(); | ||||
|     auto tev_stages = regs.GetTevStages(); | ||||
|  | ||||
|     bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; | ||||
|     const auto stencil_test = g_state.regs.output_merger.stencil_test; | ||||
|  | ||||
|     // Enter rasterization loop, starting at the center of the topleft bounding box corner. | ||||
|     // TODO: Not sure if looping through x first might be faster | ||||
|     for (u16 y = min_y + 8; y < max_y; y += 0x10) { | ||||
| @@ -647,6 +723,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||||
|             } | ||||
|  | ||||
|             const auto& output_merger = regs.output_merger; | ||||
|             // TODO: Does alpha testing happen before or after stencil? | ||||
|             if (output_merger.alpha_test.enable) { | ||||
|                 bool pass = false; | ||||
|  | ||||
| @@ -688,6 +765,54 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||||
|                     continue; | ||||
|             } | ||||
|  | ||||
|             u8 old_stencil = 0; | ||||
|             if (stencil_action_enable) { | ||||
|                 old_stencil = GetStencil(x >> 4, y >> 4); | ||||
|                 u8 dest = old_stencil & stencil_test.mask; | ||||
|                 u8 ref = stencil_test.reference_value & stencil_test.mask; | ||||
|  | ||||
|                 bool pass = false; | ||||
|                 switch (stencil_test.func) { | ||||
|                 case Regs::CompareFunc::Never: | ||||
|                     pass = false; | ||||
|                     break; | ||||
|  | ||||
|                 case Regs::CompareFunc::Always: | ||||
|                     pass = true; | ||||
|                     break; | ||||
|  | ||||
|                 case Regs::CompareFunc::Equal: | ||||
|                     pass = (ref == dest); | ||||
|                     break; | ||||
|  | ||||
|                 case Regs::CompareFunc::NotEqual: | ||||
|                     pass = (ref != dest); | ||||
|                     break; | ||||
|  | ||||
|                 case Regs::CompareFunc::LessThan: | ||||
|                     pass = (ref < dest); | ||||
|                     break; | ||||
|  | ||||
|                 case Regs::CompareFunc::LessThanOrEqual: | ||||
|                     pass = (ref <= dest); | ||||
|                     break; | ||||
|  | ||||
|                 case Regs::CompareFunc::GreaterThan: | ||||
|                     pass = (ref > dest); | ||||
|                     break; | ||||
|  | ||||
|                 case Regs::CompareFunc::GreaterThanOrEqual: | ||||
|                     pass = (ref >= dest); | ||||
|                     break; | ||||
|                 } | ||||
|  | ||||
|                 if (!pass) { | ||||
|                     u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.replacement_value); | ||||
|                     SetStencil(x >> 4, y >> 4, new_stencil); | ||||
|                     continue; | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             // TODO: Does depth indeed only get written even if depth testing is enabled? | ||||
|             if (output_merger.depth_test_enable) { | ||||
|                 unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); | ||||
| @@ -732,11 +857,22 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||||
|                     break; | ||||
|                 } | ||||
|  | ||||
|                 if (!pass) | ||||
|                 if (!pass) { | ||||
|                     if (stencil_action_enable) { | ||||
|                         u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.replacement_value); | ||||
|                         SetStencil(x >> 4, y >> 4, new_stencil); | ||||
|                     } | ||||
|                     continue; | ||||
|                 } | ||||
|  | ||||
|                 if (output_merger.depth_write_enable) | ||||
|                     SetDepth(x >> 4, y >> 4, z); | ||||
|  | ||||
|                 if (stencil_action_enable) { | ||||
|                     // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway? | ||||
|                     u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.replacement_value); | ||||
|                     SetStencil(x >> 4, y >> 4, new_stencil); | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             auto dest = GetPixel(x >> 4, y >> 4); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Tony Wasserka
					Tony Wasserka