From 862dc2b2b31b8353449f61dcc68d69255721f6c6 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Mon, 23 Aug 2021 19:52:07 -0400
Subject: [PATCH 1/3] structured_control_flow: Add DemoteCombinationPass

Some drivers misread data when demotes are interleaved in the program. This moves demote branches to be checked at the end of the program.
Fixes "wireframe" issue in Pokemon SwSh on some drivers
---
 .../maxwell/structured_control_flow.cpp       | 108 +++++++++++++++++-
 1 file changed, 107 insertions(+), 1 deletion(-)

diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index 8b3e0a15c7..f124dc8c07 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -660,6 +660,9 @@ public:
         IR::Block& first_block{*syntax_list.front().data.block};
         IR::IREmitter ir(first_block, first_block.begin());
         ir.Prologue();
+        if (uses_demote_to_helper) {
+            DemoteCombinationPass();
+        }
     }
 
 private:
@@ -809,7 +812,14 @@ private:
             }
             case StatementType::Return: {
                 ensure_block();
-                IR::IREmitter{*current_block}.Epilogue();
+                IR::Block* return_block{block_pool.Create(inst_pool)};
+                IR::IREmitter{*return_block}.Epilogue();
+                current_block->AddBranch(return_block);
+
+                auto& merge{syntax_list.emplace_back()};
+                merge.type = IR::AbstractSyntaxNode::Type::Block;
+                merge.data.block = return_block;
+
                 current_block = nullptr;
                 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
                 break;
@@ -824,6 +834,7 @@ private:
                 auto& merge{syntax_list.emplace_back()};
                 merge.type = IR::AbstractSyntaxNode::Type::Block;
                 merge.data.block = demote_block;
+                uses_demote_to_helper = true;
                 break;
             }
             case StatementType::Unreachable: {
@@ -855,11 +866,106 @@ private:
         return block_pool.Create(inst_pool);
     }
 
+    void DemoteCombinationPass() {
+        using Type = IR::AbstractSyntaxNode::Type;
+        std::vector<IR::Block*> demote_blocks;
+        std::vector<IR::U1> demote_conds;
+        u32 num_epilogues{};
+        for (const IR::AbstractSyntaxNode& node : syntax_list) {
+            if (node.type != Type::Block) {
+                continue;
+            }
+            for (const IR::Inst& inst : node.data.block->Instructions()) {
+                const IR::Opcode op{inst.GetOpcode()};
+                if (op == IR::Opcode::DemoteToHelperInvocation) {
+                    demote_blocks.push_back(node.data.block);
+                    break;
+                }
+                if (op == IR::Opcode::Epilogue) {
+                    ++num_epilogues;
+                }
+            }
+        }
+        if (demote_blocks.size() == 0) {
+            return;
+        }
+        if (num_epilogues > 1) {
+            LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented.");
+            return;
+        }
+        s64 last_iterator_offset{};
+        auto& asl{syntax_list};
+        for (const IR::Block* demote_block : demote_blocks) {
+            const auto start_it{asl.begin() + last_iterator_offset};
+            auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
+                return asn.type == Type::If && asn.data.if_node.body == demote_block;
+            })};
+            if (asl_it == asl.end()) {
+                // Demote without a conditional branch.
+                // No need to proceed since all fragment instances will be demoted regardless.
+                return;
+            }
+            const IR::Block* const end_if = asl_it->data.if_node.merge;
+            demote_conds.push_back(asl_it->data.if_node.cond);
+            last_iterator_offset = std::distance(asl.begin(), asl_it);
+
+            asl_it = asl.erase(asl_it);
+            asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
+                return asn.type == Type::Block && asn.data.block == demote_block;
+            });
+
+            asl_it = asl.erase(asl_it);
+            asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
+                return asn.type == Type::EndIf && asn.data.end_if.merge == end_if;
+            });
+            asl_it = asl.erase(asl_it);
+        }
+        const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) {
+            if (asn.type != Type::Block) {
+                return false;
+            }
+            for (const auto& inst : asn.data.block->Instructions()) {
+                if (inst.GetOpcode() == IR::Opcode::Epilogue) {
+                    return true;
+                }
+            }
+            return false;
+        }};
+        const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)};
+        const auto return_block_it{(reverse_it + 1).base()};
+
+        IR::IREmitter ir{*(return_block_it - 1)->data.block};
+        IR::U1 cond(IR::Value(false));
+        for (const auto& demote_cond : demote_conds) {
+            cond = ir.LogicalOr(cond, demote_cond);
+        }
+        cond.Inst()->DestructiveAddUsage(1);
+
+        IR::AbstractSyntaxNode demote_if_node{};
+        demote_if_node.type = Type::If;
+        demote_if_node.data.if_node.cond = cond;
+        demote_if_node.data.if_node.body = demote_blocks[0];
+        demote_if_node.data.if_node.merge = return_block_it->data.block;
+
+        IR::AbstractSyntaxNode demote_node{};
+        demote_node.type = Type::Block;
+        demote_node.data.block = demote_blocks[0];
+
+        IR::AbstractSyntaxNode demote_endif_node{};
+        demote_endif_node.type = Type::EndIf;
+        demote_endif_node.data.end_if.merge = return_block_it->data.block;
+
+        asl.insert(return_block_it, demote_endif_node);
+        asl.insert(return_block_it, demote_node);
+        asl.insert(return_block_it, demote_if_node);
+    }
+
     ObjectPool<Statement>& stmt_pool;
     ObjectPool<IR::Inst>& inst_pool;
     ObjectPool<IR::Block>& block_pool;
     Environment& env;
     IR::AbstractSyntaxList& syntax_list;
+    bool uses_demote_to_helper{};
 
 // TODO: C++20 Remove this when all compilers support constexpr std::vector
 #if __cpp_lib_constexpr_vector >= 201907

From 4fda7f1c821e1000210b1dfdf1074795b1aa9d96 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Mon, 23 Aug 2021 20:00:11 -0400
Subject: [PATCH 2/3] structured_control_flow: Conditionally invoke demote
 reorder pass

This is only needed on select drivers when a fragment shader discards/demotes.
---
 .../frontend/maxwell/structured_control_flow.cpp       | 10 ++++++----
 .../frontend/maxwell/structured_control_flow.h         |  9 ++++++---
 .../frontend/maxwell/translate_program.cpp             |  2 +-
 src/shader_recompiler/host_translate_info.h            |  5 +++--
 src/video_core/renderer_opengl/gl_device.h             |  4 ++++
 src/video_core/renderer_opengl/gl_shader_cache.cpp     |  1 +
 src/video_core/renderer_vulkan/vk_pipeline_cache.cpp   |  2 ++
 7 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index f124dc8c07..77d1cd0fc3 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -20,6 +20,7 @@
 #include "shader_recompiler/frontend/maxwell/decode.h"
 #include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
 #include "shader_recompiler/frontend/maxwell/translate/translate.h"
+#include "shader_recompiler/host_translate_info.h"
 #include "shader_recompiler/object_pool.h"
 
 namespace Shader::Maxwell {
@@ -652,7 +653,7 @@ class TranslatePass {
 public:
     TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
                   ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
-                  IR::AbstractSyntaxList& syntax_list_)
+                  IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info)
         : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
           syntax_list{syntax_list_} {
         Visit(root_stmt, nullptr, nullptr);
@@ -660,7 +661,7 @@ public:
         IR::Block& first_block{*syntax_list.front().data.block};
         IR::IREmitter ir(first_block, first_block.begin());
         ir.Prologue();
-        if (uses_demote_to_helper) {
+        if (uses_demote_to_helper && host_info.needs_demote_reorder) {
             DemoteCombinationPass();
         }
     }
@@ -977,12 +978,13 @@ private:
 } // Anonymous namespace
 
 IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
-                                Environment& env, Flow::CFG& cfg) {
+                                Environment& env, Flow::CFG& cfg,
+                                const HostTranslateInfo& host_info) {
     ObjectPool<Statement> stmt_pool{64};
     GotoPass goto_pass{cfg, stmt_pool};
     Statement& root{goto_pass.RootStatement()};
     IR::AbstractSyntaxList syntax_list;
-    TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list};
+    TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info};
     return syntax_list;
 }
 
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
index 88b0836491..e38158da33 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -11,10 +11,13 @@
 #include "shader_recompiler/frontend/maxwell/control_flow.h"
 #include "shader_recompiler/object_pool.h"
 
-namespace Shader::Maxwell {
+namespace Shader {
+struct HostTranslateInfo;
+namespace Maxwell {
 
 [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
                                               ObjectPool<IR::Block>& block_pool, Environment& env,
-                                              Flow::CFG& cfg);
+                                              Flow::CFG& cfg, const HostTranslateInfo& host_info);
 
-} // namespace Shader::Maxwell
+} // namespace Maxwell
+} // namespace Shader
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index c067d459cc..012d553574 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) {
 IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
                              Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
     IR::Program program;
-    program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
+    program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info);
     program.blocks = GenerateBlocks(program.syntax_list);
     program.post_order_blocks = PostOrder(program.syntax_list.front());
     program.stage = env.ShaderStage();
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index 94a584219c..96468b2e79 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -11,8 +11,9 @@ namespace Shader {
 
 /// Misc information about the host
 struct HostTranslateInfo {
-    bool support_float16{}; ///< True when the device supports 16-bit floats
-    bool support_int64{};   ///< True when the device supports 64-bit integers
+    bool support_float16{};      ///< True when the device supports 16-bit floats
+    bool support_int64{};        ///< True when the device supports 64-bit integers
+    bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
 };
 
 } // namespace Shader
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index ee992aed4a..de9e41659d 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -156,6 +156,10 @@ public:
         return shader_backend;
     }
 
+    bool IsAmd() const {
+        return vendor_name == "ATI Technologies Inc.";
+    }
+
 private:
     static bool TestVariableAoffi();
     static bool TestPreciseBug();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 1f4dda17ef..b0e14182e3 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -219,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
       host_info{
           .support_float16 = false,
           .support_int64 = device.HasShaderInt64(),
+          .needs_demote_reorder = device.IsAmd(),
       } {
     if (use_asynchronous_shaders) {
         workers = CreateWorkers();
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index f316c4f922..31bfbcb064 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -325,6 +325,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
     host_info = Shader::HostTranslateInfo{
         .support_float16 = device.IsFloat16Supported(),
         .support_int64 = device.IsShaderInt64Supported(),
+        .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR ||
+                                driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR,
     };
 }
 

From 907dfbea71bbfd92290d1eff1d2f0f7a33b32dc1 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Wed, 25 Aug 2021 14:26:49 -0400
Subject: [PATCH 3/3] structured_control_flow: Skip reordering nested demote
 branches.

Nested demote branches add complexity with combining the condition if it has not been initialized yet. Skip them for the time being.
---
 .../frontend/maxwell/structured_control_flow.cpp      | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index 77d1cd0fc3..69eeaa3e6a 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -872,10 +872,21 @@ private:
         std::vector<IR::Block*> demote_blocks;
         std::vector<IR::U1> demote_conds;
         u32 num_epilogues{};
+        u32 branch_depth{};
         for (const IR::AbstractSyntaxNode& node : syntax_list) {
+            if (node.type == Type::If) {
+                ++branch_depth;
+            }
+            if (node.type == Type::EndIf) {
+                --branch_depth;
+            }
             if (node.type != Type::Block) {
                 continue;
             }
+            if (branch_depth > 1) {
+                // Skip reordering nested demote branches.
+                continue;
+            }
             for (const IR::Inst& inst : node.data.block->Instructions()) {
                 const IR::Opcode op{inst.GetOpcode()};
                 if (op == IR::Opcode::DemoteToHelperInvocation) {