pica_types: float: truncate, flush to 0, and treat

infinities for all values

doing it at FromFloat32 results in this being applied for all
float<M, E> that aren't constructed from raw.

Note: due to lack of compiler support for C++23 at the moment,
the use of std::isnormal and std::abs results in Trunc not being
constexpr, which required the changes to Zero, One, and operator-,
to prevent FromFloat32 being used in constexpr contexts, and those
specific changes may be reverted in the future.
This commit is contained in:
Vitor Kiguchi 2024-01-20 21:20:28 -03:00
parent a19166ddec
commit d2af98673a
2 changed files with 65 additions and 6 deletions

View File

@ -7,6 +7,7 @@
#include <bit> #include <bit>
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
#include <limits>
#include <boost/serialization/access.hpp> #include <boost/serialization/access.hpp>
#include "common/common_types.h" #include "common/common_types.h"
@ -29,6 +30,41 @@ public:
static constexpr Float<M, E> FromFloat32(float val) { static constexpr Float<M, E> FromFloat32(float val) {
Float<M, E> ret; Float<M, E> ret;
ret.value = val; ret.value = val;
return Trunc(ret);
}
static constexpr Float<M, E> MinNormal() {
Float<M, E> ret;
// Mininum normal value = 1.0 / (1 << ((1 << (E - 1)) - 2));
if constexpr (E == 5) {
ret.value = 0x1.p-14;
} else {
// E == 7
ret.value = (0x1.p-62);
}
return ret;
}
// these values are approximate, rounded up
static constexpr Float<M, E> Max() {
Float<M, E> ret;
if constexpr (E == 5) {
ret.value = 0x1.p16;
} else {
// E == 7
ret.value = 0x1.p64;
}
return ret;
}
// before C++23 std::isnormal and std::abs aren't considered constexpr so this function can't be
// used as constexpr until the compilers support that.
static constexpr Float<M, E> Trunc(const Float<M, E>& val) {
Float<M, E> ret = val.Flushed().InfChecked();
if (std::isnormal(val.ToFloat32())) {
u32 hex = std::bit_cast<u32>(ret.ToFloat32()) & (0xffffffff ^ ((1 << (23 - M)) - 1));
ret.value = std::bit_cast<float>(hex);
}
return ret; return ret;
} }
@ -57,11 +93,15 @@ public:
} }
static constexpr Float<M, E> Zero() { static constexpr Float<M, E> Zero() {
return FromFloat32(0.f); Float<M, E> ret;
ret.value = 0.f;
return ret;
} }
static constexpr Float<M, E> One() { static constexpr Float<M, E> One() {
return FromFloat32(1.f); Float<M, E> ret;
ret.value = 1.f;
return ret;
} }
// Not recommended for anything but logging // Not recommended for anything but logging
@ -69,6 +109,24 @@ public:
return value; return value;
} }
constexpr Float<M, E> Flushed() const {
Float<M, E> ret;
ret.value = value;
if (std::abs(value) < MinNormal().ToFloat32()) {
ret.value = 0;
}
return ret;
}
constexpr Float<M, E> InfChecked() const {
Float<M, E> ret;
ret.value = value;
if (std::abs(value) > Max().ToFloat32()) {
ret.value = value * std::numeric_limits<float>::infinity();
}
return ret;
}
constexpr Float<M, E> operator*(const Float<M, E>& flt) const { constexpr Float<M, E> operator*(const Float<M, E>& flt) const {
float result = value * flt.ToFloat32(); float result = value * flt.ToFloat32();
// PICA gives 0 instead of NaN when multiplying by inf // PICA gives 0 instead of NaN when multiplying by inf
@ -111,7 +169,9 @@ public:
} }
constexpr Float<M, E> operator-() const { constexpr Float<M, E> operator-() const {
return Float<M, E>::FromFloat32(-ToFloat32()); Float<M, E> ret;
ret.value = -value;
return ret;
} }
constexpr bool operator<(const Float<M, E>& flt) const { constexpr bool operator<(const Float<M, E>& flt) const {

View File

@ -125,9 +125,8 @@ void RasterizerSoftware::AddTriangle(const Pica::OutputVertex& v0, const Pica::O
auto* input_list = &buffer_b; auto* input_list = &buffer_b;
// NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value. // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value.
// TODO: Not sure if this is a valid approach. Also should probably instead use the smallest // TODO: Not sure if this is a valid approach.
// epsilon possible within f24 accuracy. static constexpr f24 EPSILON = f24::MinNormal();
static constexpr f24 EPSILON = f24::FromFloat32(0.00001f);
static constexpr f24 f0 = f24::Zero(); static constexpr f24 f0 = f24::Zero();
static constexpr f24 f1 = f24::One(); static constexpr f24 f1 = f24::One();
static constexpr std::array<ClippingEdge, 7> clipping_edges = {{ static constexpr std::array<ClippingEdge, 7> clipping_edges = {{