mirror of
https://github.com/citra-emu/citra.git
synced 2024-11-29 23:40:05 +00:00
float24: take care of the denormalized values
This commit is contained in:
parent
6cae218e66
commit
03db0ea058
@ -28,24 +28,22 @@ public:
|
|||||||
u32 hex;
|
u32 hex;
|
||||||
std::memcpy(&hex, &val, sizeof(u32));
|
std::memcpy(&hex, &val, sizeof(u32));
|
||||||
|
|
||||||
if (val == 0.f)
|
// Take care of special case 0.0
|
||||||
return Float<M, E>::Zero(); // Pica200 has no -0
|
if (val == 0.f) {
|
||||||
const int bias = 127 - Float<M, E>::EXPONENT_BIAS;
|
// Since Pica has no -0, we don't have to check for the sign
|
||||||
u32 sign = hex >> 31;
|
return Float<M, E>::Zero();
|
||||||
u32 exponent = ((hex >> 23) & ((1 << 8) - 1)) - bias;
|
}
|
||||||
u32 mantissa = (hex & ((1 << 23) - 1)) >> (23 - M - 1);
|
const s32 bias = 127 - EXPONENT_BIAS;
|
||||||
|
const s32 sign = hex & 1 << 31;
|
||||||
// calculate with a M+1-bit mantissa and round down to M bit
|
const s32 exponent = ((hex >> 23) & ((1 << 8) - 1)) - bias;
|
||||||
if (mantissa << 31)
|
|
||||||
mantissa = ((mantissa >> 1) + 1);
|
|
||||||
else
|
|
||||||
mantissa = (mantissa >> 1);
|
|
||||||
|
|
||||||
|
// Take care of the special cases NAN and INF
|
||||||
if (std::isnan(val)) {
|
if (std::isnan(val)) {
|
||||||
Float<M, E> res;
|
Float<M, E> res;
|
||||||
res.value = val;
|
res.value = val;
|
||||||
return res;
|
return res;
|
||||||
} else if (exponent & (1 << E)) {
|
} else if (exponent > EXPONENT_MASK) {
|
||||||
|
// exponent is bigger then the maximum value for E, thus infinity
|
||||||
if (sign) {
|
if (sign) {
|
||||||
Float<M, E> res;
|
Float<M, E> res;
|
||||||
res.value = -std::numeric_limits<float>::infinity();
|
res.value = -std::numeric_limits<float>::infinity();
|
||||||
@ -57,7 +55,28 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 res = (sign << 31) | ((exponent + bias) << 23) | (mantissa << (23 - M));
|
// calculate with a M+1-bit mantissa and round down to M bit
|
||||||
|
u32 mantissa = (hex & ((1 << 23) - 1)) >> (23 - M - 1);
|
||||||
|
|
||||||
|
if (mantissa & 1) {
|
||||||
|
// TODO(B3N30): Check that this is the correct PICA rounding
|
||||||
|
mantissa = ((mantissa >> 1) + 1);
|
||||||
|
} else {
|
||||||
|
mantissa = (mantissa >> 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Take care of the denormalized values. Are the limits correct?
|
||||||
|
if ((exponent < -EXPONENT_BIAS) && (exponent > -(s32)(EXPONENT_BIAS + M))) {
|
||||||
|
// Mask out the least significant bits that would get lost due to normalization
|
||||||
|
mantissa &= ~((1 << (-exponent - EXPONENT_BIAS)) - 1);
|
||||||
|
} else if (exponent <= -(s32)(EXPONENT_BIAS + M)) {
|
||||||
|
// It's even to small for denoermalized values
|
||||||
|
return Float<M, E>::Zero();
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(B3N30): Make this faster. Compared to the original hex only the mantissa is changed
|
||||||
|
// slightly
|
||||||
|
u32 res = sign | ((exponent + bias) << 23) | (mantissa << (23 - M));
|
||||||
Float<M, E> result;
|
Float<M, E> result;
|
||||||
std::memcpy(&result.value, &res, sizeof(float));
|
std::memcpy(&result.value, &res, sizeof(float));
|
||||||
return result;
|
return result;
|
||||||
@ -66,9 +85,9 @@ public:
|
|||||||
static Float<M, E> FromRaw(u32 hex) {
|
static Float<M, E> FromRaw(u32 hex) {
|
||||||
Float<M, E> res;
|
Float<M, E> res;
|
||||||
|
|
||||||
const int width = M + E + 1;
|
const s32 width = M + E + 1;
|
||||||
const int bias = 128 - (1 << (E - 1));
|
const s32 bias = 127 - EXPONENT_BIAS;
|
||||||
const int exponent = (hex >> M) & ((1 << E) - 1);
|
const s32 exponent = (hex >> M) & ((1 << E) - 1);
|
||||||
const unsigned mantissa = hex & ((1 << M) - 1);
|
const unsigned mantissa = hex & ((1 << M) - 1);
|
||||||
|
|
||||||
if (hex & ((1 << (width - 1)) - 1))
|
if (hex & ((1 << (width - 1)) - 1))
|
||||||
@ -161,10 +180,8 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
static_assert(M + E + 1 <= 32, "Maximum bitsize is 32");
|
static_assert(M + E + 1 <= 32, "Maximum bitsize is 32");
|
||||||
static const unsigned MASK = (1 << (M + E + 1)) - 1;
|
|
||||||
static const unsigned MANTISSA_MASK = (1 << M) - 1;
|
|
||||||
static const unsigned EXPONENT_MASK = (1 << E) - 1;
|
static const unsigned EXPONENT_MASK = (1 << E) - 1;
|
||||||
static const u32 EXPONENT_BIAS = (1 << (E - 1)) - 1;
|
static const s32 EXPONENT_BIAS = (1 << (E - 1)) - 1;
|
||||||
|
|
||||||
// Stored as a regular float, merely for convenience
|
// Stored as a regular float, merely for convenience
|
||||||
// TODO: Perform proper arithmetic on this!
|
// TODO: Perform proper arithmetic on this!
|
||||||
|
Loading…
Reference in New Issue
Block a user