From 939dab71202aa28b907e14876a3811dca0ed2348 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 9 May 2021 04:01:17 -0300
Subject: [PATCH] glasm: Implement more GLASM composite instructions

---
 .../backend/glasm/emit_glasm_composite.cpp    | 92 ++++++++++---------
 .../backend/glasm/emit_glasm_instructions.h   | 25 +++--
 2 files changed, 63 insertions(+), 54 deletions(-)

diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
index 063dcaf136..94dc5019d1 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
@@ -8,60 +8,71 @@
 
 namespace Shader::Backend::GLASM {
 namespace {
-template <typename... Values>
-void CompositeConstructU32(EmitContext& ctx, IR::Inst& inst, Values&&... elements) {
+template <auto read_imm, char type, typename... Values>
+void CompositeConstruct(EmitContext& ctx, IR::Inst& inst, Values&&... elements) {
     const Register ret{ctx.reg_alloc.Define(inst)};
     if (std::ranges::any_of(std::array{elements...},
                             [](const IR::Value& value) { return value.IsImmediate(); })) {
-        const std::array<u32, 4> values{(elements.IsImmediate() ? elements.U32() : 0)...};
-        ctx.Add("MOV.U {},{{{},{},{},{}}};", ret, fmt::to_string(values[0]),
+        using Type = std::invoke_result_t<decltype(read_imm), IR::Value>;
+        const std::array<Type, 4> values{(elements.IsImmediate() ? (elements.*read_imm)() : 0)...};
+        ctx.Add("MOV.{} {},{{{},{},{},{}}};", type, ret, fmt::to_string(values[0]),
                 fmt::to_string(values[1]), fmt::to_string(values[2]), fmt::to_string(values[3]));
     }
     size_t index{};
     for (const IR::Value& element : {elements...}) {
         if (!element.IsImmediate()) {
             const ScalarU32 value{ctx.reg_alloc.Consume(element)};
-            ctx.Add("MOV.U {}.{},{};", ret, "xyzw"[index], value);
+            ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], value);
         }
         ++index;
     }
 }
 
-void CompositeExtractU32(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+void CompositeExtract(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index, char type) {
     const Register ret{ctx.reg_alloc.Define(inst)};
     if (ret == composite && index == 0) {
         // No need to do anything here, the source and destination are the same register
         return;
     }
-    ctx.Add("MOV.U {}.x,{}.{};", ret, composite, "xyzw"[index]);
+    ctx.Add("MOV.{} {}.x,{}.{};", type, ret, composite, "xyzw"[index]);
+}
+
+template <typename ObjectType>
+void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, ObjectType object,
+                     u32 index, char type) {
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (ret != composite) {
+        ctx.Add("MOV.{} {},{};", type, ret, composite);
+    }
+    ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], object);
 }
 } // Anonymous namespace
 
 void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
                                  const IR::Value& e2) {
-    CompositeConstructU32(ctx, inst, e1, e2);
+    CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2);
 }
 
 void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
                                  const IR::Value& e2, const IR::Value& e3) {
-    CompositeConstructU32(ctx, inst, e1, e2, e3);
+    CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3);
 }
 
 void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
                                  const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) {
-    CompositeConstructU32(ctx, inst, e1, e2, e3, e4);
+    CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3, e4);
 }
 
 void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
-    CompositeExtractU32(ctx, inst, composite, index);
+    CompositeExtract(ctx, inst, composite, index, 'U');
 }
 
 void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
-    CompositeExtractU32(ctx, inst, composite, index);
+    CompositeExtract(ctx, inst, composite, index, 'U');
 }
 
 void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
-    CompositeExtractU32(ctx, inst, composite, index);
+    CompositeExtract(ctx, inst, composite, index, 'U');
 }
 
 void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx,
@@ -131,53 +142,46 @@ void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx,
     throw NotImplementedException("GLASM instruction");
 }
 
-void EmitCompositeConstructF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 e1,
-                                 [[maybe_unused]] ScalarF32 e2) {
-    throw NotImplementedException("GLASM instruction");
+void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2) {
+    CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2);
 }
 
-void EmitCompositeConstructF32x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 e1,
-                                 [[maybe_unused]] ScalarF32 e2, [[maybe_unused]] ScalarF32 e3) {
-    throw NotImplementedException("GLASM instruction");
+void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2, const IR::Value& e3) {
+    CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3);
 }
 
-void EmitCompositeConstructF32x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 e1,
-                                 [[maybe_unused]] ScalarF32 e2, [[maybe_unused]] ScalarF32 e3,
-                                 [[maybe_unused]] ScalarF32 e4) {
-    throw NotImplementedException("GLASM instruction");
+void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) {
+    CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3, e4);
 }
 
-void EmitCompositeExtractF32x2([[maybe_unused]] EmitContext& ctx,
-                               [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
-    throw NotImplementedException("GLASM instruction");
+void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+    CompositeExtract(ctx, inst, composite, index, 'F');
 }
 
-void EmitCompositeExtractF32x3([[maybe_unused]] EmitContext& ctx,
-                               [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
-    throw NotImplementedException("GLASM instruction");
+void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+    CompositeExtract(ctx, inst, composite, index, 'F');
 }
 
-void EmitCompositeExtractF32x4([[maybe_unused]] EmitContext& ctx,
-                               [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
-    throw NotImplementedException("GLASM instruction");
+void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+    CompositeExtract(ctx, inst, composite, index, 'F');
 }
 
-void EmitCompositeInsertF32x2([[maybe_unused]] EmitContext& ctx,
-                              [[maybe_unused]] Register composite,
-                              [[maybe_unused]] ScalarF32 object, [[maybe_unused]] u32 index) {
-    throw NotImplementedException("GLASM instruction");
+void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite,
+                              ScalarF32 object, u32 index) {
+    CompositeInsert(ctx, inst, composite, object, index, 'F');
 }
 
-void EmitCompositeInsertF32x3([[maybe_unused]] EmitContext& ctx,
-                              [[maybe_unused]] Register composite,
-                              [[maybe_unused]] ScalarF32 object, [[maybe_unused]] u32 index) {
-    throw NotImplementedException("GLASM instruction");
+void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite,
+                              ScalarF32 object, u32 index) {
+    CompositeInsert(ctx, inst, composite, object, index, 'F');
 }
 
-void EmitCompositeInsertF32x4([[maybe_unused]] EmitContext& ctx,
-                              [[maybe_unused]] Register composite,
-                              [[maybe_unused]] ScalarF32 object, [[maybe_unused]] u32 index) {
-    throw NotImplementedException("GLASM instruction");
+void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite,
+                              ScalarF32 object, u32 index) {
+    CompositeInsert(ctx, inst, composite, object, index, 'F');
 }
 
 void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) {
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
index 6db76bf463..42de6716a4 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -158,16 +158,21 @@ void EmitCompositeExtractF16x4(EmitContext& ctx, Register composite, u32 index);
 void EmitCompositeInsertF16x2(EmitContext& ctx, Register composite, Register object, u32 index);
 void EmitCompositeInsertF16x3(EmitContext& ctx, Register composite, Register object, u32 index);
 void EmitCompositeInsertF16x4(EmitContext& ctx, Register composite, Register object, u32 index);
-void EmitCompositeConstructF32x2(EmitContext& ctx, ScalarF32 e1, ScalarF32 e2);
-void EmitCompositeConstructF32x3(EmitContext& ctx, ScalarF32 e1, ScalarF32 e2, ScalarF32 e3);
-void EmitCompositeConstructF32x4(EmitContext& ctx, ScalarF32 e1, ScalarF32 e2, ScalarF32 e3,
-                                 ScalarF32 e4);
-void EmitCompositeExtractF32x2(EmitContext& ctx, Register composite, u32 index);
-void EmitCompositeExtractF32x3(EmitContext& ctx, Register composite, u32 index);
-void EmitCompositeExtractF32x4(EmitContext& ctx, Register composite, u32 index);
-void EmitCompositeInsertF32x2(EmitContext& ctx, Register composite, ScalarF32 object, u32 index);
-void EmitCompositeInsertF32x3(EmitContext& ctx, Register composite, ScalarF32 object, u32 index);
-void EmitCompositeInsertF32x4(EmitContext& ctx, Register composite, ScalarF32 object, u32 index);
+void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2);
+void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2, const IR::Value& e3);
+void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+                                 const IR::Value& e2, const IR::Value& e3, const IR::Value& e4);
+void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite,
+                              ScalarF32 object, u32 index);
+void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite,
+                              ScalarF32 object, u32 index);
+void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite,
+                              ScalarF32 object, u32 index);
 void EmitCompositeConstructF64x2(EmitContext& ctx);
 void EmitCompositeConstructF64x3(EmitContext& ctx);
 void EmitCompositeConstructF64x4(EmitContext& ctx);