pica_types: float: truncate, flush to 0, and treat

infinities for all values doing it at FromFloat32 results in this being applied for all float<M, E> that aren't constructed from raw. Note: due to lack of compiler support for C++23 at the moment, the use of std::isnormal and std::abs results in Trunc not being constexpr, which required the changes to Zero, One, and operator-, to prevent FromFloat32 being used in constexpr contexts, and those specific changes may be reverted in the future.
2024-01-20 21:20:28 -03:00
parent a19166ddec
commit d2af98673a
2 changed files with 65 additions and 6 deletions
--- a/src/video_core/pica_types.h
+++ b/src/video_core/pica_types.h
@@ -7,6 +7,7 @@
 #include <bit>
 #include <cmath>
 #include <cstring>
+#include <limits>
 #include <boost/serialization/access.hpp>
 #include "common/common_types.h"

@@ -29,6 +30,41 @@ public:
    static constexpr Float<M, E> FromFloat32(float val) {
        Float<M, E> ret;
        ret.value = val;
+        return Trunc(ret);
+    }
+
+    static constexpr Float<M, E> MinNormal() {
+        Float<M, E> ret;
+        // Mininum normal value = 1.0 / (1 << ((1 << (E - 1)) - 2));
+        if constexpr (E == 5) {
+            ret.value = 0x1.p-14;
+        } else {
+            // E == 7
+            ret.value = (0x1.p-62);
+        }
+        return ret;
+    }
+
+    // these values are approximate, rounded up
+    static constexpr Float<M, E> Max() {
+        Float<M, E> ret;
+        if constexpr (E == 5) {
+            ret.value = 0x1.p16;
+        } else {
+            // E == 7
+            ret.value = 0x1.p64;
+        }
+        return ret;
+    }
+
+    // before C++23 std::isnormal and std::abs aren't considered constexpr so this function can't be
+    // used as constexpr until the compilers support that.
+    static constexpr Float<M, E> Trunc(const Float<M, E>& val) {
+        Float<M, E> ret = val.Flushed().InfChecked();
+        if (std::isnormal(val.ToFloat32())) {
+            u32 hex = std::bit_cast<u32>(ret.ToFloat32()) & (0xffffffff ^ ((1 << (23 - M)) - 1));
+            ret.value = std::bit_cast<float>(hex);
+        }
        return ret;
    }

@@ -57,11 +93,15 @@ public:
    }

    static constexpr Float<M, E> Zero() {
-        return FromFloat32(0.f);
+        Float<M, E> ret;
+        ret.value = 0.f;
+        return ret;
    }

    static constexpr Float<M, E> One() {
-        return FromFloat32(1.f);
+        Float<M, E> ret;
+        ret.value = 1.f;
+        return ret;
    }

    // Not recommended for anything but logging
@@ -69,6 +109,24 @@ public:
        return value;
    }

+    constexpr Float<M, E> Flushed() const {
+        Float<M, E> ret;
+        ret.value = value;
+        if (std::abs(value) < MinNormal().ToFloat32()) {
+            ret.value = 0;
+        }
+        return ret;
+    }
+
+    constexpr Float<M, E> InfChecked() const {
+        Float<M, E> ret;
+        ret.value = value;
+        if (std::abs(value) > Max().ToFloat32()) {
+            ret.value = value * std::numeric_limits<float>::infinity();
+        }
+        return ret;
+    }
+
    constexpr Float<M, E> operator*(const Float<M, E>& flt) const {
        float result = value * flt.ToFloat32();
        // PICA gives 0 instead of NaN when multiplying by inf
@@ -111,7 +169,9 @@ public:
    }

    constexpr Float<M, E> operator-() const {
-        return Float<M, E>::FromFloat32(-ToFloat32());
+        Float<M, E> ret;
+        ret.value = -value;
+        return ret;
    }

    constexpr bool operator<(const Float<M, E>& flt) const {
--- a/src/video_core/renderer_software/sw_rasterizer.cpp
+++ b/src/video_core/renderer_software/sw_rasterizer.cpp
@@ -125,9 +125,8 @@ void RasterizerSoftware::AddTriangle(const Pica::OutputVertex& v0, const Pica::O
    auto* input_list = &buffer_b;

    // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value.
-    // TODO: Not sure if this is a valid approach. Also should probably instead use the smallest
-    //       epsilon possible within f24 accuracy.
-    static constexpr f24 EPSILON = f24::FromFloat32(0.00001f);
+    // TODO: Not sure if this is a valid approach.
+    static constexpr f24 EPSILON = f24::MinNormal();
    static constexpr f24 f0 = f24::Zero();
    static constexpr f24 f1 = f24::One();
    static constexpr std::array<ClippingEdge, 7> clipping_edges = {{