From e2f444394f9f156b61c3763de469a8b22ae9f25e Mon Sep 17 00:00:00 2001 From: Dragios Date: Sat, 16 Apr 2016 01:17:00 +0800 Subject: [PATCH] Buffer allow read (#7) * Rasterizer: Implement W-Buffer * Fix ftoi and disable VFPv3 * removeme-debug-hw-depth-print * GL: Fix HW depth-buffering * Rasterizer: Respect buffer-read allow registers * OpenGL: Respect color-read allow register * OpenGL: Respect depth-read allow register * OpenGL: Respect stencil-read allow register --- src/core/arm/dyncom/arm_dyncom_dec.cpp | 4 + src/core/arm/skyeye_common/vfp/vfpdouble.cpp | 4 +- src/video_core/clipper.cpp | 4 +- src/video_core/command_processor.cpp | 9 + src/video_core/pica.h | 30 ++- src/video_core/rasterizer.cpp | 34 ++- .../renderer_opengl/gl_rasterizer.cpp | 255 ++++++++++++++++-- .../renderer_opengl/gl_rasterizer.h | 5 + .../renderer_opengl/gl_shader_gen.cpp | 11 +- src/video_core/renderer_opengl/pica_to_gl.h | 48 ++++ 10 files changed, 367 insertions(+), 37 deletions(-) diff --git a/src/core/arm/dyncom/arm_dyncom_dec.cpp b/src/core/arm/dyncom/arm_dyncom_dec.cpp index 8cd6755cb..247d379e3 100644 --- a/src/core/arm/dyncom/arm_dyncom_dec.cpp +++ b/src/core/arm/dyncom/arm_dyncom_dec.cpp @@ -422,6 +422,10 @@ ARMDecodeStatus DecodeARMInstruction(u32 instr, s32* idx) { n = arm_instruction[i].attribute_value; base = 0; + // 3DS has no VFP3 support + if (arm_instruction[i].version == ARMVFP3) + continue; + while (n) { if (arm_instruction[i].content[base + 1] == 31 && arm_instruction[i].content[base] == 0) { // clrex diff --git a/src/core/arm/skyeye_common/vfp/vfpdouble.cpp b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp index 45914d479..482924e4a 100644 --- a/src/core/arm/skyeye_common/vfp/vfpdouble.cpp +++ b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp @@ -560,7 +560,7 @@ static u32 vfp_double_ftoui(ARMul_State* state, int sd, int unused, int dm, u32 if (vdm.exponent >= 1023 + 32) { d = vdm.sign ? 0 : 0xffffffff; exceptions = FPSCR_IOC; - } else if (vdm.exponent >= 1023 - 1) { + } else if (vdm.exponent >= 1023) { int shift = 1023 + 63 - vdm.exponent; u64 rem, incr = 0; @@ -644,7 +644,7 @@ static u32 vfp_double_ftosi(ARMul_State* state, int sd, int unused, int dm, u32 if (vdm.sign) d = ~d; exceptions |= FPSCR_IOC; - } else if (vdm.exponent >= 1023 - 1) { + } else if (vdm.exponent >= 1023) { int shift = 1023 + 63 - vdm.exponent; /* 58 */ u64 rem, incr = 0; diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 3d503486e..409b980ba 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp @@ -64,8 +64,6 @@ static void InitScreenCoordinates(OutputVertex& vtx) viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); viewport.offset_x = float24::FromFloat32(static_cast(regs.viewport_corner.x)); viewport.offset_y = float24::FromFloat32(static_cast(regs.viewport_corner.y)); - viewport.zscale = float24::FromRaw(regs.viewport_depth_range); - viewport.offset_z = float24::FromRaw(regs.viewport_depth_far_plane); float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; vtx.color *= inv_w; @@ -78,7 +76,7 @@ static void InitScreenCoordinates(OutputVertex& vtx) vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; - vtx.screenpos[2] = viewport.offset_z + vtx.pos.z * inv_w * viewport.zscale; + vtx.screenpos[2] = vtx.pos.z * inv_w; } void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) { diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 3abe79c09..a889ec0e1 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -23,6 +23,8 @@ #include "video_core/debug_utils/debug_utils.h" #include "video_core/shader/shader_interpreter.h" +#include + namespace Pica { namespace CommandProcessor { @@ -169,6 +171,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { if (g_debug_context) { g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); } + + float depth; + uint8_t stencil; + glReadPixels(regs.framebuffer.GetWidth() / 2, regs.framebuffer.GetHeight() / 2, 1, 1, GL_DEPTH_COMPONENT, GL_FLOAT, &depth); + glReadPixels(regs.framebuffer.GetWidth() / 2, regs.framebuffer.GetHeight() / 2, 1, 1, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, &stencil); + printf("Read depth: %f, stencil: 0x%02X\n", depth, stencil); + } break; diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 4552ff81c..f066c9719 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -69,7 +69,7 @@ struct Regs { INSERT_PADDING_WORDS(0x9); BitField<0, 24, u32> viewport_depth_range; // float24 - BitField<0, 24, u32> viewport_depth_far_plane; // float24 + BitField<0, 24, u32> viewport_depth_near_plane; // float24 BitField<0, 3, u32> vs_output_total; @@ -121,7 +121,20 @@ struct Regs { BitField<16, 10, s32> y; } viewport_corner; - INSERT_PADDING_WORDS(0x17); + INSERT_PADDING_WORDS(0x1); + + //TODO: early depth + INSERT_PADDING_WORDS(0x1); + + INSERT_PADDING_WORDS(0x2); + + enum DepthBuffering : u32 { + WBuffering = 0, + ZBuffering = 1, + }; + BitField< 0, 1, DepthBuffering> depthmap_enable; + + INSERT_PADDING_WORDS(0x12); struct TextureConfig { enum WrapMode : u32 { @@ -578,13 +591,19 @@ struct Regs { } struct { - INSERT_PADDING_WORDS(0x3); + INSERT_PADDING_WORDS(0x2); + + union { + BitField<0, 4, u32> allow_color_read; // 0 = disable, else enable + }; union { BitField<0, 4, u32> allow_color_write; // 0 = disable, else enable }; - INSERT_PADDING_WORDS(0x1); + union { + BitField<0, 2, u32> allow_depth_stencil_read; // 0 = disable, else enable + }; union { BitField<0, 2, u32> allow_depth_stencil_write; // 0 = disable, else enable @@ -1273,10 +1292,11 @@ ASSERT_REG_POSITION(cull_mode, 0x40); ASSERT_REG_POSITION(viewport_size_x, 0x41); ASSERT_REG_POSITION(viewport_size_y, 0x43); ASSERT_REG_POSITION(viewport_depth_range, 0x4d); -ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e); +ASSERT_REG_POSITION(viewport_depth_near_plane, 0x4e); ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); ASSERT_REG_POSITION(viewport_corner, 0x68); +ASSERT_REG_POSITION(depthmap_enable, 0x6D); ASSERT_REG_POSITION(texture0_enable, 0x80); ASSERT_REG_POSITION(texture0, 0x81); ASSERT_REG_POSITION(texture0_format, 0x8e); diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 5b9ed7c64..ab8a6f451 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -814,7 +814,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, }; if (stencil_action_enable) { - old_stencil = GetStencil(x >> 4, y >> 4); + old_stencil = (regs.framebuffer.allow_depth_stencil_read != 0) ? GetStencil(x >> 4, y >> 4) : 0; u8 dest = old_stencil & stencil_test.input_mask; u8 ref = stencil_test.reference_value & stencil_test.input_mask; @@ -859,13 +859,34 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } } + // interpolated_z = z / w + float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 + + v1.screenpos[2].ToFloat32() * w1 + + v2.screenpos[2].ToFloat32() * w2) / wsum; + + // Z-Buffer (z / w * scale + offset) + float depth_scale = float24::FromRaw(regs.viewport_depth_range).ToFloat32(); + float depth_offset = float24::FromRaw(regs.viewport_depth_near_plane).ToFloat32(); + float depth = interpolated_z_over_w * depth_scale + depth_offset; + + // Potentially switch to W-Buffer + if (regs.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) { + float interpolated_w = interpolated_w_inverse.ToFloat32() * wsum; + + // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w) + depth *= interpolated_w; + + } + + // Clamp the result + depth = MathUtil::Clamp(depth, 0.0f, 1.0f); + + // Convert float to integer unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); - u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + - v1.screenpos[2].ToFloat32() * w1 + - v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum); + u32 z = (u32)(depth * ((1 << num_bits) - 1)); if (output_merger.depth_test_enable) { - u32 ref_z = GetDepth(x >> 4, y >> 4); + u32 ref_z = (regs.framebuffer.allow_depth_stencil_read != 0) ? GetDepth(x >> 4, y >> 4) : 0; bool pass = false; @@ -917,7 +938,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, if (stencil_action_enable) UpdateStencil(stencil_test.action_depth_pass); - auto dest = GetPixel(x >> 4, y >> 4); + auto dest = (regs.framebuffer.allow_color_read != 0) ? GetPixel(x >> 4, y >> 4) : Math::Vec4(0,0,0,0); Math::Vec4 blend_output = combiner_output; if (output_merger.alphablend_enable) { @@ -991,6 +1012,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, return 255 - combiner_output.a(); case Regs::BlendFactor::DestAlpha: + case Regs::BlendFactor::DestColor: return dest.a(); case Regs::BlendFactor::OneMinusDestAlpha: diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6ca9f45e2..bdc3fc6a9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -151,8 +151,8 @@ void RasterizerOpenGL::Reset() { SyncBlendFuncs(); SyncBlendColor(); SyncLogicOp(); - SyncStencilTest(); SyncDepthTest(); + SyncStencilTest(); SyncColorWriteMask(); SyncStencilWriteMask(); SyncDepthWriteMask(); @@ -250,10 +250,15 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { // Depth modifiers case PICA_REG_INDEX(viewport_depth_range): - case PICA_REG_INDEX(viewport_depth_far_plane): + case PICA_REG_INDEX(viewport_depth_near_plane): SyncDepthModifiers(); break; + // Depth buffering + case PICA_REG_INDEX(depthmap_enable): + state.draw.shader_dirty = true; + break; + // Blending case PICA_REG_INDEX(output_merger.alphablend_enable): SyncBlendEnabled(); @@ -286,6 +291,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { // (Pica depth test function register also contains a depth and color write mask) case PICA_REG_INDEX(output_merger.depth_test_enable): SyncDepthTest(); + SyncStencilTest(); SyncDepthWriteMask(); SyncColorWriteMask(); break; @@ -303,6 +309,18 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncColorWriteMask(); break; + // Color read mask + case PICA_REG_INDEX(framebuffer.allow_color_read): + SyncLogicOp(); + SyncBlendFuncs(); + break; + + // Stencil and depth read mask + case PICA_REG_INDEX(framebuffer.allow_stencil_read): + SyncDepthTest(); + SyncStencilTest(); + break; + // Logic op case PICA_REG_INDEX(output_merger.logic_op): SyncLogicOp(); @@ -865,9 +883,9 @@ void RasterizerOpenGL::SyncCullMode() { void RasterizerOpenGL::SyncDepthModifiers() { float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); - float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; + float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); - // TODO: Implement scale modifier + uniform_block_data.data.depth_scale = depth_scale; uniform_block_data.data.depth_offset = depth_offset; uniform_block_data.dirty = true; } @@ -878,10 +896,33 @@ void RasterizerOpenGL::SyncBlendEnabled() { void RasterizerOpenGL::SyncBlendFuncs() { const auto& regs = Pica::g_state.regs; - state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb); - state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb); - state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a); - state.blend.dst_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_a); + + // This function pretends the destination read was 0x00 if the reads are not allowed + auto BlendAllowedFunc = [&](bool dest, Pica::Regs::BlendFactor factor) -> GLenum { + + if (regs.framebuffer.allow_color_read == 0x0) { + + // Destination would only read zero, so we can multiply by ZERO in blend func + if (dest) + return GL_ZERO; + + if (factor == Pica::Regs::BlendFactor::DestColor || + factor == Pica::Regs::BlendFactor::DestAlpha) + return GL_ZERO; + + if (factor == Pica::Regs::BlendFactor::OneMinusDestColor || + factor == Pica::Regs::BlendFactor::OneMinusDestAlpha) + return GL_ONE; + + } + + return PicaToGL::BlendFunc(factor); + }; + + state.blend.src_rgb_func = BlendAllowedFunc(false, regs.output_merger.alpha_blending.factor_source_rgb); + state.blend.dst_rgb_func = BlendAllowedFunc(true, regs.output_merger.alpha_blending.factor_dest_rgb); + state.blend.src_a_func = BlendAllowedFunc(false, regs.output_merger.alpha_blending.factor_source_a); + state.blend.dst_a_func = BlendAllowedFunc(true, regs.output_merger.alpha_blending.factor_dest_a); } void RasterizerOpenGL::SyncBlendColor() { @@ -901,7 +942,61 @@ void RasterizerOpenGL::SyncAlphaTest() { } void RasterizerOpenGL::SyncLogicOp() { - state.logic_op = PicaToGL::LogicOp(Pica::g_state.regs.output_merger.logic_op); + const auto& regs = Pica::g_state.regs; + + if (regs.framebuffer.allow_color_read == 0x0) { + + // Pretend that the destination reads always return 0 + switch (regs.output_merger.logic_op) { + + // Always 0 + case Pica::Regs::LogicOp::Clear: + case Pica::Regs::LogicOp::And: + case Pica::Regs::LogicOp::AndInverted: + state.logic_op = GL_CLEAR; + break; + + // Always s + case Pica::Regs::LogicOp::AndReverse: + case Pica::Regs::LogicOp::Copy: + case Pica::Regs::LogicOp::Or: + case Pica::Regs::LogicOp::Xor: + state.logic_op = GL_COPY; + break; + + // Always 1 + case Pica::Regs::LogicOp::Set: + case Pica::Regs::LogicOp::Invert: + case Pica::Regs::LogicOp::Nand: + case Pica::Regs::LogicOp::OrReverse: + state.logic_op = GL_SET; + break; + + // Always ~s + case Pica::Regs::LogicOp::CopyInverted: + case Pica::Regs::LogicOp::Nor: + case Pica::Regs::LogicOp::Equiv: + case Pica::Regs::LogicOp::OrInverted: + state.logic_op = GL_COPY_INVERTED; + break; + + // FIXME: Decide for one of those: + //a. NoOp means reading zero, writing back zero + //b. NoOp means not touching the framebuffer + case Pica::Regs::LogicOp::NoOp: + state.logic_op = GL_CLEAR; // a + state.logic_op = GL_NOOP; // b + break; + + default: + LOG_CRITICAL(Render_OpenGL, "Unknown logic op %d", regs.output_merger.logic_op); + UNREACHABLE(); + break; + } + + } else { + state.logic_op = PicaToGL::LogicOp(regs.output_merger.logic_op); + } } void RasterizerOpenGL::SyncColorWriteMask() { @@ -931,23 +1026,143 @@ void RasterizerOpenGL::SyncDepthWriteMask() { : GL_FALSE; } +// Depends on the correct GL DepthTest state! void RasterizerOpenGL::SyncStencilTest() { const auto& regs = Pica::g_state.regs; - state.stencil.test_enabled = regs.output_merger.stencil_test.enable && regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; - state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func); - state.stencil.test_ref = regs.output_merger.stencil_test.reference_value; - state.stencil.test_mask = regs.output_merger.stencil_test.input_mask; - state.stencil.action_stencil_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail); - state.stencil.action_depth_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail); - state.stencil.action_depth_pass = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass); + const auto& stencil_test = regs.output_merger.stencil_test; + + state.stencil.test_enabled = stencil_test.enable && regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; + + if (state.stencil.test_enabled) { + if (!regs.framebuffer.allow_stencil_read) { + + // All stencil reads must be emulated as 0x00 + + u8 masked_ref = (stencil_test.reference_value & stencil_test.input_mask); + + if (masked_ref == 0x00) { + // x = 0x00 + state.stencil.test_func = PicaToGL::CompareXToXFunc(stencil_test.func); + } else { + // x = masked stencil ref + state.stencil.test_func = PicaToGL::CompareXToZeroFunc(stencil_test.func); + } + + //FIXME: check if writeback is possible, otherwise this is useless + if (true) { + + // We need a stencil read if we can't decide the stencil test staticly + bool needs_stencil_read = (state.stencil.test_func != GL_NEVER && + state.stencil.test_func != GL_ALWAYS); + + //TODO: In rare cases the stencil test doesn't depend on the masked ref ('Never' and 'Always') and only 1 specific value for + // the stencil op writes is necessary. In those cases one could set the ref and ref-mask to the required value. + //FIXME: Return value instead..? + auto StencilAllowedOp = [&](Pica::Regs::StencilAction action) -> GLenum { + + switch (action) { + + // FIXME: Decide for one of those: + //a. Keeping the framebuffer value means reading zero, writing back zero + //b. Keeping the framebuffer value means not touching it + case Pica::Regs::StencilAction::Keep: + return GL_ZERO; // a + return GL_KEEP; // b + + // Always 0x01, requires 0x01 in masked_ref to work + case Pica::Regs::StencilAction::Increment: + case Pica::Regs::StencilAction::IncrementWrap: + if (masked_ref != 0x01) { + needs_stencil_read = true; + } + return GL_REPLACE; + + // Always 0xFF, requires 0xFF in masked_ref to work + case Pica::Regs::StencilAction::Invert: + case Pica::Regs::StencilAction::DecrementWrap: + if (masked_ref != 0xFF) { + needs_stencil_read = true; + } + return GL_REPLACE; + + // Always masked ref + case Pica::Regs::StencilAction::Replace: + return GL_REPLACE; + + // Always 0x00 + case Pica::Regs::StencilAction::Zero: + case Pica::Regs::StencilAction::Decrement: + return GL_ZERO; + + default: + LOG_CRITICAL(Render_OpenGL, "Unknown stencil action %x", (int)action); + UNIMPLEMENTED(); + return GL_KEEP; + } + + }; + + // If the stencil test can fail we have to check the stencil op + if (state.depth.test_func != GL_ALWAYS) { + state.stencil.action_stencil_fail = StencilAllowedOp(stencil_test.action_stencil_fail); + } + + // If the depth test can pass we have to check the stencil op + if (state.depth.test_func != GL_NEVER) { + state.stencil.action_depth_fail = StencilAllowedOp(stencil_test.action_depth_pass); + } + + // If the depth test can fail we have to check the stencil op + if (state.depth.test_func != GL_ALWAYS) { + state.stencil.action_depth_pass = StencilAllowedOp(stencil_test.action_depth_fail); + } + + // Now check if we support this mode + if (needs_stencil_read) { + LOG_CRITICAL(Render_OpenGL, "Can't emulate disabled read from stencil yet"); + UNIMPLEMENTED(); + } + + } + + } else { + state.stencil.test_func = PicaToGL::CompareFunc(stencil_test.func); + state.stencil.test_ref = stencil_test.reference_value; + state.stencil.test_mask = stencil_test.input_mask; + state.stencil.action_stencil_fail = PicaToGL::StencilOp(stencil_test.action_stencil_fail); + state.stencil.action_depth_fail = PicaToGL::StencilOp(stencil_test.action_depth_fail); + state.stencil.action_depth_pass = PicaToGL::StencilOp(stencil_test.action_depth_pass); + } + } } +// Always call SyncStencilTest after this returns! void RasterizerOpenGL::SyncDepthTest() { const auto& regs = Pica::g_state.regs; - state.depth.test_enabled = regs.output_merger.depth_test_enable == 1 || - regs.output_merger.depth_write_enable == 1; - state.depth.test_func = regs.output_merger.depth_test_enable == 1 ? - PicaToGL::CompareFunc(regs.output_merger.depth_test_func) : GL_ALWAYS; + + // Enable depth test so depth writes can still occur + state.depth.test_enabled = GL_TRUE; + + if (!regs.output_merger.depth_test_enable) { + state.depth.test_func = GL_ALWAYS; + } else { + + if (!regs.framebuffer.allow_depth_read) { + + // If reads are not allowed we have to patch the depth test accordingly + state.depth.test_func = PicaToGL::CompareXToZeroFunc(regs.output_merger.depth_test_func); + + // Check if the result is known at this point, if not it depends on the framebuffer really being zero + if (state.depth.test_func != GL_NEVER && state.depth.test_func != GL_ALWAYS) { + LOG_CRITICAL(Render_OpenGL, "Can't emulate disabled read on depth yet"); + UNIMPLEMENTED(); + } + + } else { + state.depth.test_func = PicaToGL::CompareFunc(regs.output_merger.depth_test_func); + } + + } } void RasterizerOpenGL::SyncCombinerColor() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 390349a0c..32f5b60c5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -35,6 +35,8 @@ struct PicaShaderConfig { PicaShaderConfig res; const auto& regs = Pica::g_state.regs; + res.depthmap_enable = regs.depthmap_enable; + res.alpha_test_func = regs.output_merger.alpha_test.enable ? regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; @@ -141,6 +143,8 @@ struct PicaShaderConfig { return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; }; + Pica::Regs::DepthBuffering depthmap_enable = Pica::Regs::DepthBuffering::WBuffering; + Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never; std::array tev_stages = {}; u8 combiner_buffer_input = 0; @@ -303,6 +307,7 @@ private: GLvec4 const_color[6]; GLvec4 tev_combiner_buffer_color; GLint alphatest_ref; + GLfloat depth_scale; GLfloat depth_offset; alignas(16) GLvec3 lighting_global_ambient; LightSrc light_src[8]; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index ee4b54ab9..fbde93a4e 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -525,6 +525,7 @@ layout (std140) uniform shader_data { vec4 const_color[NUM_TEV_STAGES]; vec4 tev_combiner_buffer_color; int alphatest_ref; + float depth_scale; float depth_offset; vec3 lighting_global_ambient; LightSrc light_src[NUM_LIGHTS]; @@ -566,7 +567,15 @@ vec4 secondary_fragment_color = vec4(0.0); } out += "color = last_tex_env_out;\n"; - out += "gl_FragDepth = gl_FragCoord.z + depth_offset;\n}"; + + out += "float z_over_w = gl_FragCoord.z * 2.0 - 1.0;\n"; + out += "float depth = z_over_w * depth_scale + depth_offset;\n"; + if (config.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) { + out += "depth /= gl_FragCoord.w;\n"; + } + out += "gl_FragDepth = depth;\n"; + + out += "}"; return out; } diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index fd3617d77..731260951 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -155,6 +155,54 @@ inline GLenum CompareFunc(Pica::Regs::CompareFunc func) { return compare_func_table[(unsigned)func]; } +/// Pretend we compare 'x FUNC x' +inline GLenum CompareXToXFunc(Pica::Regs::CompareFunc func) { + static const GLenum compare_func_table[] = { + GL_NEVER, // CompareFunc::Never + GL_ALWAYS, // CompareFunc::Always + GL_ALWAYS, // CompareFunc::Equal + GL_NEVER, // CompareFunc::NotEqual + GL_NEVER, // CompareFunc::LessThan + GL_ALWAYS, // CompareFunc::LessThanOrEqual + GL_NEVER, // CompareFunc::GreaterThan + GL_ALWAYS, // CompareFunc::GreaterThanOrEqual + }; + + // Range check table for input + if (static_cast(func) >= ARRAY_SIZE(compare_func_table)) { + LOG_CRITICAL(Render_OpenGL, "Unknown compare function %d", func); + UNREACHABLE(); + + return GL_ALWAYS; + } + + return compare_func_table[(unsigned)func]; +} + +/// Pretend we compare 'x FUNC 0' (unsigned) +inline GLenum CompareXToZeroFunc(Pica::Regs::CompareFunc func) { + static const GLenum compare_func_table[] = { + GL_NEVER, // CompareFunc::Never + GL_ALWAYS, // CompareFunc::Always + GL_EQUAL, // CompareFunc::Equal + GL_NOTEQUAL, // CompareFunc::NotEqual + GL_NEVER, // CompareFunc::LessThan + GL_LEQUAL, // CompareFunc::LessThanOrEqual + GL_GREATER, // CompareFunc::GreaterThan + GL_ALWAYS, // CompareFunc::GreaterThanOrEqual + }; + + // Range check table for input + if (static_cast(func) >= ARRAY_SIZE(compare_func_table)) { + LOG_CRITICAL(Render_OpenGL, "Unknown compare function %d", func); + UNREACHABLE(); + + return GL_ALWAYS; + } + + return compare_func_table[(unsigned)func]; +} + inline GLenum StencilOp(Pica::Regs::StencilAction action) { static const GLenum stencil_op_table[] = { GL_KEEP, // StencilAction::Keep