From 03db0ea0581f88f3dc4874a28afa9ada01073da1 Mon Sep 17 00:00:00 2001 From: B3n30 Date: Sun, 30 Jul 2017 13:07:32 +0200 Subject: [PATCH] float24: take care of the denormalized values --- src/video_core/pica_types.h | 57 ++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h index 1af9ac8f5..11ec00c53 100644 --- a/src/video_core/pica_types.h +++ b/src/video_core/pica_types.h @@ -28,24 +28,22 @@ public: u32 hex; std::memcpy(&hex, &val, sizeof(u32)); - if (val == 0.f) - return Float::Zero(); // Pica200 has no -0 - const int bias = 127 - Float::EXPONENT_BIAS; - u32 sign = hex >> 31; - u32 exponent = ((hex >> 23) & ((1 << 8) - 1)) - bias; - u32 mantissa = (hex & ((1 << 23) - 1)) >> (23 - M - 1); - - // calculate with a M+1-bit mantissa and round down to M bit - if (mantissa << 31) - mantissa = ((mantissa >> 1) + 1); - else - mantissa = (mantissa >> 1); + // Take care of special case 0.0 + if (val == 0.f) { + // Since Pica has no -0, we don't have to check for the sign + return Float::Zero(); + } + const s32 bias = 127 - EXPONENT_BIAS; + const s32 sign = hex & 1 << 31; + const s32 exponent = ((hex >> 23) & ((1 << 8) - 1)) - bias; + // Take care of the special cases NAN and INF if (std::isnan(val)) { Float res; res.value = val; return res; - } else if (exponent & (1 << E)) { + } else if (exponent > EXPONENT_MASK) { + // exponent is bigger then the maximum value for E, thus infinity if (sign) { Float res; res.value = -std::numeric_limits::infinity(); @@ -57,7 +55,28 @@ public: } } - u32 res = (sign << 31) | ((exponent + bias) << 23) | (mantissa << (23 - M)); + // calculate with a M+1-bit mantissa and round down to M bit + u32 mantissa = (hex & ((1 << 23) - 1)) >> (23 - M - 1); + + if (mantissa & 1) { + // TODO(B3N30): Check that this is the correct PICA rounding + mantissa = ((mantissa >> 1) + 1); + } else { + mantissa = (mantissa >> 1); + } + + // Take care of the denormalized values. Are the limits correct? + if ((exponent < -EXPONENT_BIAS) && (exponent > -(s32)(EXPONENT_BIAS + M))) { + // Mask out the least significant bits that would get lost due to normalization + mantissa &= ~((1 << (-exponent - EXPONENT_BIAS)) - 1); + } else if (exponent <= -(s32)(EXPONENT_BIAS + M)) { + // It's even to small for denoermalized values + return Float::Zero(); + } + + // TODO(B3N30): Make this faster. Compared to the original hex only the mantissa is changed + // slightly + u32 res = sign | ((exponent + bias) << 23) | (mantissa << (23 - M)); Float result; std::memcpy(&result.value, &res, sizeof(float)); return result; @@ -66,9 +85,9 @@ public: static Float FromRaw(u32 hex) { Float res; - const int width = M + E + 1; - const int bias = 128 - (1 << (E - 1)); - const int exponent = (hex >> M) & ((1 << E) - 1); + const s32 width = M + E + 1; + const s32 bias = 127 - EXPONENT_BIAS; + const s32 exponent = (hex >> M) & ((1 << E) - 1); const unsigned mantissa = hex & ((1 << M) - 1); if (hex & ((1 << (width - 1)) - 1)) @@ -161,10 +180,8 @@ public: private: static_assert(M + E + 1 <= 32, "Maximum bitsize is 32"); - static const unsigned MASK = (1 << (M + E + 1)) - 1; - static const unsigned MANTISSA_MASK = (1 << M) - 1; static const unsigned EXPONENT_MASK = (1 << E) - 1; - static const u32 EXPONENT_BIAS = (1 << (E - 1)) - 1; + static const s32 EXPONENT_BIAS = (1 << (E - 1)) - 1; // Stored as a regular float, merely for convenience // TODO: Perform proper arithmetic on this!