mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-22 09:21:57 -05:00
AK: Make FloatExtractor use bit_cast<>() instead of a union
The motivation is to allow functions that use FloatExtractor to be constexpr. Type punning through a union will never work in constexpr. In practice, bit_cast<>()ing bit fields also does not yet work in clang, but that's just a bug and it will work eventually (and it does already work in gcc): https://github.com/llvm/llvm-project/issues/54018 No behavior change.
This commit is contained in:
parent
39a2356c54
commit
48a28cffd5
5 changed files with 77 additions and 79 deletions
|
@ -13,91 +13,95 @@
|
|||
namespace AK {
|
||||
|
||||
template<typename T>
|
||||
union FloatExtractor;
|
||||
struct FloatExtractor;
|
||||
|
||||
#ifdef AK_HAS_FLOAT_128
|
||||
template<>
|
||||
union FloatExtractor<f128> {
|
||||
struct FloatExtractor<f128> {
|
||||
static constexpr FloatExtractor<f128> from_float(f128 f) { return bit_cast<FloatExtractor<f128>>(f); }
|
||||
constexpr f128 to_float() const { return bit_cast<f128>(*this); }
|
||||
|
||||
using ComponentType = unsigned __int128;
|
||||
static constexpr int mantissa_bits = 112;
|
||||
static constexpr ComponentType mantissa_max = (((ComponentType)1) << 112) - 1;
|
||||
static constexpr int exponent_bias = 16383;
|
||||
static constexpr int exponent_bits = 15;
|
||||
static constexpr unsigned exponent_max = 32767;
|
||||
struct [[gnu::packed]] {
|
||||
ComponentType mantissa : 112;
|
||||
ComponentType exponent : 15;
|
||||
ComponentType sign : 1;
|
||||
};
|
||||
f128 d;
|
||||
|
||||
ComponentType mantissa : 112;
|
||||
ComponentType exponent : 15;
|
||||
ComponentType sign : 1;
|
||||
};
|
||||
// Validate that f128 and the FloatExtractor union are 128 bits.
|
||||
// Validate that f128 and the FloatExtractor struct are 128 bits.
|
||||
static_assert(AssertSize<f128, 16>());
|
||||
static_assert(AssertSize<FloatExtractor<f128>, sizeof(f128)>());
|
||||
#endif
|
||||
|
||||
#ifdef AK_HAS_FLOAT_80
|
||||
template<>
|
||||
union FloatExtractor<f80> {
|
||||
struct FloatExtractor<f80> {
|
||||
static constexpr FloatExtractor<f80> from_float(f80 f) { return bit_cast<FloatExtractor<f80>>(f); }
|
||||
constexpr f80 to_float() const { return bit_cast<f80>(*this); }
|
||||
|
||||
using ComponentType = unsigned long long;
|
||||
static constexpr int mantissa_bits = 64;
|
||||
static constexpr ComponentType mantissa_max = ~0ull;
|
||||
static constexpr int exponent_bias = 16383;
|
||||
static constexpr int exponent_bits = 15;
|
||||
static constexpr unsigned exponent_max = 32767;
|
||||
struct [[gnu::packed]] {
|
||||
// This is technically wrong: Extended floating point values really only have 63 bits of mantissa
|
||||
// and an "integer bit" that behaves in various strange, unintuitive and non-IEEE-754 ways.
|
||||
// However, since all bit-fiddling float code assumes IEEE floats, it cannot handle this properly.
|
||||
// If we pretend that 80-bit floats are IEEE floats with 64-bit mantissas, almost everything works correctly
|
||||
// and we just need a few special cases.
|
||||
ComponentType mantissa : 64;
|
||||
ComponentType exponent : 15;
|
||||
ComponentType sign : 1;
|
||||
};
|
||||
f80 d;
|
||||
|
||||
// This is technically wrong: Extended floating point values really only have 63 bits of mantissa
|
||||
// and an "integer bit" that behaves in various strange, unintuitive and non-IEEE-754 ways.
|
||||
// However, since all bit-fiddling float code assumes IEEE floats, it cannot handle this properly.
|
||||
// If we pretend that 80-bit floats are IEEE floats with 64-bit mantissas, almost everything works correctly
|
||||
// and we just need a few special cases.
|
||||
ComponentType mantissa : 64;
|
||||
ComponentType exponent : 15;
|
||||
ComponentType sign : 1;
|
||||
};
|
||||
static_assert(AssertSize<FloatExtractor<f80>, sizeof(f80)>());
|
||||
#endif
|
||||
|
||||
template<>
|
||||
union FloatExtractor<f64> {
|
||||
struct FloatExtractor<f64> {
|
||||
static constexpr FloatExtractor<f64> from_float(f64 f) { return bit_cast<FloatExtractor<f64>>(f); }
|
||||
constexpr f64 to_float() const { return bit_cast<f64>(*this); }
|
||||
|
||||
using ComponentType = unsigned long long;
|
||||
static constexpr int mantissa_bits = 52;
|
||||
static constexpr ComponentType mantissa_max = (1ull << 52) - 1;
|
||||
static constexpr int exponent_bias = 1023;
|
||||
static constexpr int exponent_bits = 11;
|
||||
static constexpr unsigned exponent_max = 2047;
|
||||
struct [[gnu::packed]] {
|
||||
// FIXME: These types have to all be the same, otherwise this struct
|
||||
// goes from being a bitfield describing the layout of an f64
|
||||
// into being a multibyte mess on windows.
|
||||
// Technically, '-mno-ms-bitfields' is supposed to disable this
|
||||
// very intuitive and portable behaviour on windows, but it doesn't
|
||||
// work with the msvc ABI.
|
||||
// See <https://github.com/llvm/llvm-project/issues/24757>
|
||||
ComponentType mantissa : 52;
|
||||
ComponentType exponent : 11;
|
||||
ComponentType sign : 1;
|
||||
};
|
||||
f64 d;
|
||||
|
||||
// FIXME: These types have to all be the same, otherwise this struct
|
||||
// goes from being a bitfield describing the layout of an f64
|
||||
// into being a multibyte mess on windows.
|
||||
// Technically, '-mno-ms-bitfields' is supposed to disable this
|
||||
// very intuitive and portable behaviour on windows, but it doesn't
|
||||
// work with the msvc ABI.
|
||||
// See <https://github.com/llvm/llvm-project/issues/24757>
|
||||
ComponentType mantissa : 52;
|
||||
ComponentType exponent : 11;
|
||||
ComponentType sign : 1;
|
||||
};
|
||||
static_assert(AssertSize<FloatExtractor<f64>, sizeof(f64)>());
|
||||
|
||||
template<>
|
||||
union FloatExtractor<f32> {
|
||||
struct FloatExtractor<f32> {
|
||||
static constexpr FloatExtractor<f32> from_float(f32 f) { return bit_cast<FloatExtractor<f32>>(f); }
|
||||
constexpr f32 to_float() const { return bit_cast<f32>(*this); }
|
||||
|
||||
using ComponentType = unsigned;
|
||||
static constexpr int mantissa_bits = 23;
|
||||
static constexpr ComponentType mantissa_max = (1 << 23) - 1;
|
||||
static constexpr int exponent_bias = 127;
|
||||
static constexpr int exponent_bits = 8;
|
||||
static constexpr ComponentType exponent_max = 255;
|
||||
struct [[gnu::packed]] {
|
||||
ComponentType mantissa : 23;
|
||||
ComponentType exponent : 8;
|
||||
ComponentType sign : 1;
|
||||
};
|
||||
f32 d;
|
||||
|
||||
ComponentType mantissa : 23;
|
||||
ComponentType exponent : 8;
|
||||
ComponentType sign : 1;
|
||||
};
|
||||
static_assert(AssertSize<FloatExtractor<f32>, sizeof(f32)>());
|
||||
|
||||
|
|
17
AK/Math.h
17
AK/Math.h
|
@ -70,11 +70,10 @@ template<FloatingPoint FloatT>
|
|||
FloatT copysign(FloatT x, FloatT y)
|
||||
{
|
||||
using Extractor = FloatExtractor<FloatT>;
|
||||
Extractor ex, ey;
|
||||
ex.d = x;
|
||||
ey.d = y;
|
||||
auto ex = Extractor::from_float(x);
|
||||
auto ey = Extractor::from_float(y);
|
||||
ex.sign = ey.sign;
|
||||
return ex.d;
|
||||
return ex.to_float();
|
||||
}
|
||||
|
||||
#define CONSTEXPR_STATE(function, args...) \
|
||||
|
@ -475,10 +474,10 @@ constexpr T fmod(T x, T y)
|
|||
// If y_exponent < x_exponent, we'll iteratively reduce x_exponent by shifting from
|
||||
// the exponent into the mantissa.
|
||||
|
||||
FloatExtractor<T> x_bits { .d = x };
|
||||
auto x_bits = FloatExtractor<T>::from_float(x);
|
||||
typename FloatExtractor<T>::ComponentType x_exponent = x_bits.exponent; // - FloatExtractor<T>::exponent_bias;
|
||||
|
||||
FloatExtractor<T> y_bits { .d = y };
|
||||
auto y_bits = FloatExtractor<T>::from_float(y);
|
||||
typename FloatExtractor<T>::ComponentType y_exponent = y_bits.exponent; // - FloatExtractor<T>::exponent_bias;
|
||||
|
||||
// FIXME: Handle denormals. For now, treat them as 0.
|
||||
|
@ -523,7 +522,7 @@ constexpr T fmod(T x, T y)
|
|||
|
||||
x_bits.exponent = x_exponent;
|
||||
x_bits.mantissa = x_mantissa;
|
||||
return x_bits.d;
|
||||
return x_bits.to_float();
|
||||
# else
|
||||
if constexpr (IsSame<T, long double>)
|
||||
return __builtin_fmodl(x, y);
|
||||
|
@ -966,7 +965,7 @@ constexpr T log2(T x)
|
|||
if (x <= 0 || __builtin_isnan(x))
|
||||
return NaN<T>;
|
||||
|
||||
FloatExtractor<T> ext { .d = x };
|
||||
auto ext = FloatExtractor<T>::from_float(x);
|
||||
T exponent = ext.exponent - FloatExtractor<T>::exponent_bias;
|
||||
|
||||
// When the mantissa shows 0b00 (implicitly 1.0) we are on a power of 2
|
||||
|
@ -982,7 +981,7 @@ constexpr T log2(T x)
|
|||
};
|
||||
|
||||
// (1 <= mantissa < 2)
|
||||
T m = mantissa_ext.d;
|
||||
T m = mantissa_ext.to_float();
|
||||
|
||||
// This is a reconstruction of one of Sun's algorithms
|
||||
// They use a transformation to lower the problem space,
|
||||
|
|
|
@ -47,7 +47,7 @@ FloatingPointExponentialForm inner_convert_floating_point_to_decimal_exponential
|
|||
{
|
||||
using Extractor = FloatExtractor<FloatingPoint>;
|
||||
|
||||
Extractor bit_representation { .d = value };
|
||||
auto bit_representation = Extractor::from_float(value);
|
||||
|
||||
bool sign = bit_representation.sign;
|
||||
i32 exponent = bit_representation.exponent;
|
||||
|
|
|
@ -73,8 +73,7 @@ static FloatType internal_to_integer(FloatType x, RoundingMode rounding_mode)
|
|||
// Most component types are larger than int.
|
||||
constexpr auto zero = static_cast<Extractor::ComponentType>(0);
|
||||
constexpr auto one = static_cast<Extractor::ComponentType>(1);
|
||||
Extractor extractor;
|
||||
extractor.d = x;
|
||||
auto extractor = Extractor::from_float(x);
|
||||
|
||||
auto unbiased_exponent = extractor.exponent - Extractor::exponent_bias;
|
||||
|
||||
|
@ -132,16 +131,18 @@ static FloatType internal_to_integer(FloatType x, RoundingMode rounding_mode)
|
|||
break;
|
||||
}
|
||||
|
||||
auto result = extractor.to_float();
|
||||
|
||||
if (should_round) {
|
||||
// We could do this ourselves, but this saves us from manually
|
||||
// handling overflow.
|
||||
if (extractor.sign)
|
||||
extractor.d -= static_cast<FloatType>(1.0);
|
||||
result -= static_cast<FloatType>(1.0);
|
||||
else
|
||||
extractor.d += static_cast<FloatType>(1.0);
|
||||
result += static_cast<FloatType>(1.0);
|
||||
}
|
||||
|
||||
return extractor.d;
|
||||
return result;
|
||||
}
|
||||
|
||||
// This is much branchier than it really needs to be
|
||||
|
@ -151,22 +152,21 @@ static FloatType internal_nextafter(FloatType x, bool up)
|
|||
if (!isfinite(x))
|
||||
return x;
|
||||
using Extractor = FloatExtractor<decltype(x)>;
|
||||
Extractor extractor;
|
||||
extractor.d = x;
|
||||
auto extractor = Extractor::from_float(x);
|
||||
if (x == 0) {
|
||||
if (!extractor.sign) {
|
||||
extractor.mantissa = 1;
|
||||
extractor.sign = !up;
|
||||
return extractor.d;
|
||||
return extractor.to_float();
|
||||
}
|
||||
if (up) {
|
||||
extractor.sign = false;
|
||||
extractor.mantissa = 1;
|
||||
return extractor.d;
|
||||
return extractor.to_float();
|
||||
}
|
||||
extractor.mantissa = 1;
|
||||
extractor.sign = up != extractor.sign;
|
||||
return extractor.d;
|
||||
return extractor.to_float();
|
||||
}
|
||||
if (up != extractor.sign) {
|
||||
extractor.mantissa++;
|
||||
|
@ -179,22 +179,21 @@ static FloatType internal_nextafter(FloatType x, bool up)
|
|||
extractor.mantissa = Extractor::mantissa_max;
|
||||
}
|
||||
}
|
||||
return extractor.d;
|
||||
return extractor.to_float();
|
||||
}
|
||||
|
||||
if (!extractor.mantissa) {
|
||||
if (extractor.exponent) {
|
||||
extractor.exponent--;
|
||||
extractor.mantissa = Extractor::mantissa_max;
|
||||
} else {
|
||||
extractor.d = 0;
|
||||
return extractor.to_float();
|
||||
}
|
||||
return extractor.d;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extractor.mantissa--;
|
||||
if (extractor.mantissa != Extractor::mantissa_max)
|
||||
return extractor.d;
|
||||
return extractor.to_float();
|
||||
if (extractor.exponent) {
|
||||
extractor.exponent--;
|
||||
// normalize
|
||||
|
@ -206,7 +205,7 @@ static FloatType internal_nextafter(FloatType x, bool up)
|
|||
extractor.exponent = Extractor::exponent_max;
|
||||
}
|
||||
}
|
||||
return extractor.d;
|
||||
return extractor.to_float();
|
||||
}
|
||||
|
||||
template<typename FloatT>
|
||||
|
@ -223,8 +222,7 @@ static int internal_ilogb(FloatT x) NOEXCEPT
|
|||
|
||||
using Extractor = FloatExtractor<FloatT>;
|
||||
|
||||
Extractor extractor;
|
||||
extractor.d = x;
|
||||
auto extractor = Extractor::from_float(x);
|
||||
|
||||
return (int)extractor.exponent - Extractor::exponent_bias;
|
||||
}
|
||||
|
@ -247,12 +245,11 @@ static FloatT internal_scalbn(FloatT x, int exponent) NOEXCEPT
|
|||
return x;
|
||||
|
||||
using Extractor = FloatExtractor<FloatT>;
|
||||
Extractor extractor;
|
||||
extractor.d = x;
|
||||
auto extractor = Extractor::from_float(x);
|
||||
|
||||
if (extractor.exponent != 0) {
|
||||
extractor.exponent = clamp((int)extractor.exponent + exponent, 0, (int)Extractor::exponent_max);
|
||||
return extractor.d;
|
||||
return extractor.to_float();
|
||||
}
|
||||
|
||||
unsigned leading_mantissa_zeroes = extractor.mantissa == 0 ? 32 : count_leading_zeroes(extractor.mantissa);
|
||||
|
@ -262,7 +259,7 @@ static FloatT internal_scalbn(FloatT x, int exponent) NOEXCEPT
|
|||
extractor.exponent <<= shift;
|
||||
extractor.exponent = exponent + 1;
|
||||
|
||||
return extractor.d;
|
||||
return extractor.to_float();
|
||||
}
|
||||
|
||||
template<typename FloatT>
|
||||
|
|
|
@ -49,8 +49,7 @@ UnsignedBigInteger::UnsignedBigInteger(double value)
|
|||
return;
|
||||
}
|
||||
|
||||
FloatExtractor<double> extractor;
|
||||
extractor.d = value;
|
||||
auto extractor = FloatExtractor<double>::from_float(value);
|
||||
VERIFY(!extractor.sign);
|
||||
|
||||
i32 real_exponent = extractor.exponent - extractor.exponent_bias;
|
||||
|
@ -350,7 +349,7 @@ double UnsignedBigInteger::to_double(UnsignedBigInteger::RoundingMode rounding_m
|
|||
VERIFY((mantissa & 0xfff0000000000000) == 0);
|
||||
extractor.mantissa = mantissa;
|
||||
|
||||
return extractor.d;
|
||||
return extractor.to_float();
|
||||
}
|
||||
|
||||
void UnsignedBigInteger::set_to_0()
|
||||
|
@ -638,8 +637,7 @@ UnsignedBigInteger::CompareResult UnsignedBigInteger::compare_to_double(double v
|
|||
if (is_zero())
|
||||
return CompareResult::DoubleGreaterThanBigInt;
|
||||
|
||||
FloatExtractor<double> extractor;
|
||||
extractor.d = value;
|
||||
auto extractor = FloatExtractor<double>::from_float(value);
|
||||
|
||||
// Value cannot be negative at this point.
|
||||
VERIFY(extractor.sign == 0);
|
||||
|
|
Loading…
Reference in a new issue