mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-22 17:31:58 -05:00
AK: Make FloatExtractor use bit_cast<>() instead of a union
The motivation is to allow functions that use FloatExtractor to be constexpr. Type punning through a union will never work in constexpr. In practice, bit_cast<>()ing bit fields also does not yet work in clang, but that's just a bug and it will work eventually (and it does already work in gcc): https://github.com/llvm/llvm-project/issues/54018 No behavior change.
This commit is contained in:
parent
39a2356c54
commit
48a28cffd5
5 changed files with 77 additions and 79 deletions
|
@ -13,91 +13,95 @@
|
||||||
namespace AK {
|
namespace AK {
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
union FloatExtractor;
|
struct FloatExtractor;
|
||||||
|
|
||||||
#ifdef AK_HAS_FLOAT_128
|
#ifdef AK_HAS_FLOAT_128
|
||||||
template<>
|
template<>
|
||||||
union FloatExtractor<f128> {
|
struct FloatExtractor<f128> {
|
||||||
|
static constexpr FloatExtractor<f128> from_float(f128 f) { return bit_cast<FloatExtractor<f128>>(f); }
|
||||||
|
constexpr f128 to_float() const { return bit_cast<f128>(*this); }
|
||||||
|
|
||||||
using ComponentType = unsigned __int128;
|
using ComponentType = unsigned __int128;
|
||||||
static constexpr int mantissa_bits = 112;
|
static constexpr int mantissa_bits = 112;
|
||||||
static constexpr ComponentType mantissa_max = (((ComponentType)1) << 112) - 1;
|
static constexpr ComponentType mantissa_max = (((ComponentType)1) << 112) - 1;
|
||||||
static constexpr int exponent_bias = 16383;
|
static constexpr int exponent_bias = 16383;
|
||||||
static constexpr int exponent_bits = 15;
|
static constexpr int exponent_bits = 15;
|
||||||
static constexpr unsigned exponent_max = 32767;
|
static constexpr unsigned exponent_max = 32767;
|
||||||
struct [[gnu::packed]] {
|
|
||||||
ComponentType mantissa : 112;
|
ComponentType mantissa : 112;
|
||||||
ComponentType exponent : 15;
|
ComponentType exponent : 15;
|
||||||
ComponentType sign : 1;
|
ComponentType sign : 1;
|
||||||
};
|
|
||||||
f128 d;
|
|
||||||
};
|
};
|
||||||
// Validate that f128 and the FloatExtractor union are 128 bits.
|
// Validate that f128 and the FloatExtractor struct are 128 bits.
|
||||||
static_assert(AssertSize<f128, 16>());
|
static_assert(AssertSize<f128, 16>());
|
||||||
static_assert(AssertSize<FloatExtractor<f128>, sizeof(f128)>());
|
static_assert(AssertSize<FloatExtractor<f128>, sizeof(f128)>());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef AK_HAS_FLOAT_80
|
#ifdef AK_HAS_FLOAT_80
|
||||||
template<>
|
template<>
|
||||||
union FloatExtractor<f80> {
|
struct FloatExtractor<f80> {
|
||||||
|
static constexpr FloatExtractor<f80> from_float(f80 f) { return bit_cast<FloatExtractor<f80>>(f); }
|
||||||
|
constexpr f80 to_float() const { return bit_cast<f80>(*this); }
|
||||||
|
|
||||||
using ComponentType = unsigned long long;
|
using ComponentType = unsigned long long;
|
||||||
static constexpr int mantissa_bits = 64;
|
static constexpr int mantissa_bits = 64;
|
||||||
static constexpr ComponentType mantissa_max = ~0ull;
|
static constexpr ComponentType mantissa_max = ~0ull;
|
||||||
static constexpr int exponent_bias = 16383;
|
static constexpr int exponent_bias = 16383;
|
||||||
static constexpr int exponent_bits = 15;
|
static constexpr int exponent_bits = 15;
|
||||||
static constexpr unsigned exponent_max = 32767;
|
static constexpr unsigned exponent_max = 32767;
|
||||||
struct [[gnu::packed]] {
|
|
||||||
// This is technically wrong: Extended floating point values really only have 63 bits of mantissa
|
// This is technically wrong: Extended floating point values really only have 63 bits of mantissa
|
||||||
// and an "integer bit" that behaves in various strange, unintuitive and non-IEEE-754 ways.
|
// and an "integer bit" that behaves in various strange, unintuitive and non-IEEE-754 ways.
|
||||||
// However, since all bit-fiddling float code assumes IEEE floats, it cannot handle this properly.
|
// However, since all bit-fiddling float code assumes IEEE floats, it cannot handle this properly.
|
||||||
// If we pretend that 80-bit floats are IEEE floats with 64-bit mantissas, almost everything works correctly
|
// If we pretend that 80-bit floats are IEEE floats with 64-bit mantissas, almost everything works correctly
|
||||||
// and we just need a few special cases.
|
// and we just need a few special cases.
|
||||||
ComponentType mantissa : 64;
|
ComponentType mantissa : 64;
|
||||||
ComponentType exponent : 15;
|
ComponentType exponent : 15;
|
||||||
ComponentType sign : 1;
|
ComponentType sign : 1;
|
||||||
};
|
|
||||||
f80 d;
|
|
||||||
};
|
};
|
||||||
static_assert(AssertSize<FloatExtractor<f80>, sizeof(f80)>());
|
static_assert(AssertSize<FloatExtractor<f80>, sizeof(f80)>());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
union FloatExtractor<f64> {
|
struct FloatExtractor<f64> {
|
||||||
|
static constexpr FloatExtractor<f64> from_float(f64 f) { return bit_cast<FloatExtractor<f64>>(f); }
|
||||||
|
constexpr f64 to_float() const { return bit_cast<f64>(*this); }
|
||||||
|
|
||||||
using ComponentType = unsigned long long;
|
using ComponentType = unsigned long long;
|
||||||
static constexpr int mantissa_bits = 52;
|
static constexpr int mantissa_bits = 52;
|
||||||
static constexpr ComponentType mantissa_max = (1ull << 52) - 1;
|
static constexpr ComponentType mantissa_max = (1ull << 52) - 1;
|
||||||
static constexpr int exponent_bias = 1023;
|
static constexpr int exponent_bias = 1023;
|
||||||
static constexpr int exponent_bits = 11;
|
static constexpr int exponent_bits = 11;
|
||||||
static constexpr unsigned exponent_max = 2047;
|
static constexpr unsigned exponent_max = 2047;
|
||||||
struct [[gnu::packed]] {
|
|
||||||
// FIXME: These types have to all be the same, otherwise this struct
|
// FIXME: These types have to all be the same, otherwise this struct
|
||||||
// goes from being a bitfield describing the layout of an f64
|
// goes from being a bitfield describing the layout of an f64
|
||||||
// into being a multibyte mess on windows.
|
// into being a multibyte mess on windows.
|
||||||
// Technically, '-mno-ms-bitfields' is supposed to disable this
|
// Technically, '-mno-ms-bitfields' is supposed to disable this
|
||||||
// very intuitive and portable behaviour on windows, but it doesn't
|
// very intuitive and portable behaviour on windows, but it doesn't
|
||||||
// work with the msvc ABI.
|
// work with the msvc ABI.
|
||||||
// See <https://github.com/llvm/llvm-project/issues/24757>
|
// See <https://github.com/llvm/llvm-project/issues/24757>
|
||||||
ComponentType mantissa : 52;
|
ComponentType mantissa : 52;
|
||||||
ComponentType exponent : 11;
|
ComponentType exponent : 11;
|
||||||
ComponentType sign : 1;
|
ComponentType sign : 1;
|
||||||
};
|
|
||||||
f64 d;
|
|
||||||
};
|
};
|
||||||
static_assert(AssertSize<FloatExtractor<f64>, sizeof(f64)>());
|
static_assert(AssertSize<FloatExtractor<f64>, sizeof(f64)>());
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
union FloatExtractor<f32> {
|
struct FloatExtractor<f32> {
|
||||||
|
static constexpr FloatExtractor<f32> from_float(f32 f) { return bit_cast<FloatExtractor<f32>>(f); }
|
||||||
|
constexpr f32 to_float() const { return bit_cast<f32>(*this); }
|
||||||
|
|
||||||
using ComponentType = unsigned;
|
using ComponentType = unsigned;
|
||||||
static constexpr int mantissa_bits = 23;
|
static constexpr int mantissa_bits = 23;
|
||||||
static constexpr ComponentType mantissa_max = (1 << 23) - 1;
|
static constexpr ComponentType mantissa_max = (1 << 23) - 1;
|
||||||
static constexpr int exponent_bias = 127;
|
static constexpr int exponent_bias = 127;
|
||||||
static constexpr int exponent_bits = 8;
|
static constexpr int exponent_bits = 8;
|
||||||
static constexpr ComponentType exponent_max = 255;
|
static constexpr ComponentType exponent_max = 255;
|
||||||
struct [[gnu::packed]] {
|
|
||||||
ComponentType mantissa : 23;
|
ComponentType mantissa : 23;
|
||||||
ComponentType exponent : 8;
|
ComponentType exponent : 8;
|
||||||
ComponentType sign : 1;
|
ComponentType sign : 1;
|
||||||
};
|
|
||||||
f32 d;
|
|
||||||
};
|
};
|
||||||
static_assert(AssertSize<FloatExtractor<f32>, sizeof(f32)>());
|
static_assert(AssertSize<FloatExtractor<f32>, sizeof(f32)>());
|
||||||
|
|
||||||
|
|
17
AK/Math.h
17
AK/Math.h
|
@ -70,11 +70,10 @@ template<FloatingPoint FloatT>
|
||||||
FloatT copysign(FloatT x, FloatT y)
|
FloatT copysign(FloatT x, FloatT y)
|
||||||
{
|
{
|
||||||
using Extractor = FloatExtractor<FloatT>;
|
using Extractor = FloatExtractor<FloatT>;
|
||||||
Extractor ex, ey;
|
auto ex = Extractor::from_float(x);
|
||||||
ex.d = x;
|
auto ey = Extractor::from_float(y);
|
||||||
ey.d = y;
|
|
||||||
ex.sign = ey.sign;
|
ex.sign = ey.sign;
|
||||||
return ex.d;
|
return ex.to_float();
|
||||||
}
|
}
|
||||||
|
|
||||||
#define CONSTEXPR_STATE(function, args...) \
|
#define CONSTEXPR_STATE(function, args...) \
|
||||||
|
@ -475,10 +474,10 @@ constexpr T fmod(T x, T y)
|
||||||
// If y_exponent < x_exponent, we'll iteratively reduce x_exponent by shifting from
|
// If y_exponent < x_exponent, we'll iteratively reduce x_exponent by shifting from
|
||||||
// the exponent into the mantissa.
|
// the exponent into the mantissa.
|
||||||
|
|
||||||
FloatExtractor<T> x_bits { .d = x };
|
auto x_bits = FloatExtractor<T>::from_float(x);
|
||||||
typename FloatExtractor<T>::ComponentType x_exponent = x_bits.exponent; // - FloatExtractor<T>::exponent_bias;
|
typename FloatExtractor<T>::ComponentType x_exponent = x_bits.exponent; // - FloatExtractor<T>::exponent_bias;
|
||||||
|
|
||||||
FloatExtractor<T> y_bits { .d = y };
|
auto y_bits = FloatExtractor<T>::from_float(y);
|
||||||
typename FloatExtractor<T>::ComponentType y_exponent = y_bits.exponent; // - FloatExtractor<T>::exponent_bias;
|
typename FloatExtractor<T>::ComponentType y_exponent = y_bits.exponent; // - FloatExtractor<T>::exponent_bias;
|
||||||
|
|
||||||
// FIXME: Handle denormals. For now, treat them as 0.
|
// FIXME: Handle denormals. For now, treat them as 0.
|
||||||
|
@ -523,7 +522,7 @@ constexpr T fmod(T x, T y)
|
||||||
|
|
||||||
x_bits.exponent = x_exponent;
|
x_bits.exponent = x_exponent;
|
||||||
x_bits.mantissa = x_mantissa;
|
x_bits.mantissa = x_mantissa;
|
||||||
return x_bits.d;
|
return x_bits.to_float();
|
||||||
# else
|
# else
|
||||||
if constexpr (IsSame<T, long double>)
|
if constexpr (IsSame<T, long double>)
|
||||||
return __builtin_fmodl(x, y);
|
return __builtin_fmodl(x, y);
|
||||||
|
@ -966,7 +965,7 @@ constexpr T log2(T x)
|
||||||
if (x <= 0 || __builtin_isnan(x))
|
if (x <= 0 || __builtin_isnan(x))
|
||||||
return NaN<T>;
|
return NaN<T>;
|
||||||
|
|
||||||
FloatExtractor<T> ext { .d = x };
|
auto ext = FloatExtractor<T>::from_float(x);
|
||||||
T exponent = ext.exponent - FloatExtractor<T>::exponent_bias;
|
T exponent = ext.exponent - FloatExtractor<T>::exponent_bias;
|
||||||
|
|
||||||
// When the mantissa shows 0b00 (implicitly 1.0) we are on a power of 2
|
// When the mantissa shows 0b00 (implicitly 1.0) we are on a power of 2
|
||||||
|
@ -982,7 +981,7 @@ constexpr T log2(T x)
|
||||||
};
|
};
|
||||||
|
|
||||||
// (1 <= mantissa < 2)
|
// (1 <= mantissa < 2)
|
||||||
T m = mantissa_ext.d;
|
T m = mantissa_ext.to_float();
|
||||||
|
|
||||||
// This is a reconstruction of one of Sun's algorithms
|
// This is a reconstruction of one of Sun's algorithms
|
||||||
// They use a transformation to lower the problem space,
|
// They use a transformation to lower the problem space,
|
||||||
|
|
|
@ -47,7 +47,7 @@ FloatingPointExponentialForm inner_convert_floating_point_to_decimal_exponential
|
||||||
{
|
{
|
||||||
using Extractor = FloatExtractor<FloatingPoint>;
|
using Extractor = FloatExtractor<FloatingPoint>;
|
||||||
|
|
||||||
Extractor bit_representation { .d = value };
|
auto bit_representation = Extractor::from_float(value);
|
||||||
|
|
||||||
bool sign = bit_representation.sign;
|
bool sign = bit_representation.sign;
|
||||||
i32 exponent = bit_representation.exponent;
|
i32 exponent = bit_representation.exponent;
|
||||||
|
|
|
@ -73,8 +73,7 @@ static FloatType internal_to_integer(FloatType x, RoundingMode rounding_mode)
|
||||||
// Most component types are larger than int.
|
// Most component types are larger than int.
|
||||||
constexpr auto zero = static_cast<Extractor::ComponentType>(0);
|
constexpr auto zero = static_cast<Extractor::ComponentType>(0);
|
||||||
constexpr auto one = static_cast<Extractor::ComponentType>(1);
|
constexpr auto one = static_cast<Extractor::ComponentType>(1);
|
||||||
Extractor extractor;
|
auto extractor = Extractor::from_float(x);
|
||||||
extractor.d = x;
|
|
||||||
|
|
||||||
auto unbiased_exponent = extractor.exponent - Extractor::exponent_bias;
|
auto unbiased_exponent = extractor.exponent - Extractor::exponent_bias;
|
||||||
|
|
||||||
|
@ -132,16 +131,18 @@ static FloatType internal_to_integer(FloatType x, RoundingMode rounding_mode)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto result = extractor.to_float();
|
||||||
|
|
||||||
if (should_round) {
|
if (should_round) {
|
||||||
// We could do this ourselves, but this saves us from manually
|
// We could do this ourselves, but this saves us from manually
|
||||||
// handling overflow.
|
// handling overflow.
|
||||||
if (extractor.sign)
|
if (extractor.sign)
|
||||||
extractor.d -= static_cast<FloatType>(1.0);
|
result -= static_cast<FloatType>(1.0);
|
||||||
else
|
else
|
||||||
extractor.d += static_cast<FloatType>(1.0);
|
result += static_cast<FloatType>(1.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
return extractor.d;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is much branchier than it really needs to be
|
// This is much branchier than it really needs to be
|
||||||
|
@ -151,22 +152,21 @@ static FloatType internal_nextafter(FloatType x, bool up)
|
||||||
if (!isfinite(x))
|
if (!isfinite(x))
|
||||||
return x;
|
return x;
|
||||||
using Extractor = FloatExtractor<decltype(x)>;
|
using Extractor = FloatExtractor<decltype(x)>;
|
||||||
Extractor extractor;
|
auto extractor = Extractor::from_float(x);
|
||||||
extractor.d = x;
|
|
||||||
if (x == 0) {
|
if (x == 0) {
|
||||||
if (!extractor.sign) {
|
if (!extractor.sign) {
|
||||||
extractor.mantissa = 1;
|
extractor.mantissa = 1;
|
||||||
extractor.sign = !up;
|
extractor.sign = !up;
|
||||||
return extractor.d;
|
return extractor.to_float();
|
||||||
}
|
}
|
||||||
if (up) {
|
if (up) {
|
||||||
extractor.sign = false;
|
extractor.sign = false;
|
||||||
extractor.mantissa = 1;
|
extractor.mantissa = 1;
|
||||||
return extractor.d;
|
return extractor.to_float();
|
||||||
}
|
}
|
||||||
extractor.mantissa = 1;
|
extractor.mantissa = 1;
|
||||||
extractor.sign = up != extractor.sign;
|
extractor.sign = up != extractor.sign;
|
||||||
return extractor.d;
|
return extractor.to_float();
|
||||||
}
|
}
|
||||||
if (up != extractor.sign) {
|
if (up != extractor.sign) {
|
||||||
extractor.mantissa++;
|
extractor.mantissa++;
|
||||||
|
@ -179,22 +179,21 @@ static FloatType internal_nextafter(FloatType x, bool up)
|
||||||
extractor.mantissa = Extractor::mantissa_max;
|
extractor.mantissa = Extractor::mantissa_max;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return extractor.d;
|
return extractor.to_float();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!extractor.mantissa) {
|
if (!extractor.mantissa) {
|
||||||
if (extractor.exponent) {
|
if (extractor.exponent) {
|
||||||
extractor.exponent--;
|
extractor.exponent--;
|
||||||
extractor.mantissa = Extractor::mantissa_max;
|
extractor.mantissa = Extractor::mantissa_max;
|
||||||
} else {
|
return extractor.to_float();
|
||||||
extractor.d = 0;
|
|
||||||
}
|
}
|
||||||
return extractor.d;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
extractor.mantissa--;
|
extractor.mantissa--;
|
||||||
if (extractor.mantissa != Extractor::mantissa_max)
|
if (extractor.mantissa != Extractor::mantissa_max)
|
||||||
return extractor.d;
|
return extractor.to_float();
|
||||||
if (extractor.exponent) {
|
if (extractor.exponent) {
|
||||||
extractor.exponent--;
|
extractor.exponent--;
|
||||||
// normalize
|
// normalize
|
||||||
|
@ -206,7 +205,7 @@ static FloatType internal_nextafter(FloatType x, bool up)
|
||||||
extractor.exponent = Extractor::exponent_max;
|
extractor.exponent = Extractor::exponent_max;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return extractor.d;
|
return extractor.to_float();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename FloatT>
|
template<typename FloatT>
|
||||||
|
@ -223,8 +222,7 @@ static int internal_ilogb(FloatT x) NOEXCEPT
|
||||||
|
|
||||||
using Extractor = FloatExtractor<FloatT>;
|
using Extractor = FloatExtractor<FloatT>;
|
||||||
|
|
||||||
Extractor extractor;
|
auto extractor = Extractor::from_float(x);
|
||||||
extractor.d = x;
|
|
||||||
|
|
||||||
return (int)extractor.exponent - Extractor::exponent_bias;
|
return (int)extractor.exponent - Extractor::exponent_bias;
|
||||||
}
|
}
|
||||||
|
@ -247,12 +245,11 @@ static FloatT internal_scalbn(FloatT x, int exponent) NOEXCEPT
|
||||||
return x;
|
return x;
|
||||||
|
|
||||||
using Extractor = FloatExtractor<FloatT>;
|
using Extractor = FloatExtractor<FloatT>;
|
||||||
Extractor extractor;
|
auto extractor = Extractor::from_float(x);
|
||||||
extractor.d = x;
|
|
||||||
|
|
||||||
if (extractor.exponent != 0) {
|
if (extractor.exponent != 0) {
|
||||||
extractor.exponent = clamp((int)extractor.exponent + exponent, 0, (int)Extractor::exponent_max);
|
extractor.exponent = clamp((int)extractor.exponent + exponent, 0, (int)Extractor::exponent_max);
|
||||||
return extractor.d;
|
return extractor.to_float();
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned leading_mantissa_zeroes = extractor.mantissa == 0 ? 32 : count_leading_zeroes(extractor.mantissa);
|
unsigned leading_mantissa_zeroes = extractor.mantissa == 0 ? 32 : count_leading_zeroes(extractor.mantissa);
|
||||||
|
@ -262,7 +259,7 @@ static FloatT internal_scalbn(FloatT x, int exponent) NOEXCEPT
|
||||||
extractor.exponent <<= shift;
|
extractor.exponent <<= shift;
|
||||||
extractor.exponent = exponent + 1;
|
extractor.exponent = exponent + 1;
|
||||||
|
|
||||||
return extractor.d;
|
return extractor.to_float();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename FloatT>
|
template<typename FloatT>
|
||||||
|
|
|
@ -49,8 +49,7 @@ UnsignedBigInteger::UnsignedBigInteger(double value)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
FloatExtractor<double> extractor;
|
auto extractor = FloatExtractor<double>::from_float(value);
|
||||||
extractor.d = value;
|
|
||||||
VERIFY(!extractor.sign);
|
VERIFY(!extractor.sign);
|
||||||
|
|
||||||
i32 real_exponent = extractor.exponent - extractor.exponent_bias;
|
i32 real_exponent = extractor.exponent - extractor.exponent_bias;
|
||||||
|
@ -350,7 +349,7 @@ double UnsignedBigInteger::to_double(UnsignedBigInteger::RoundingMode rounding_m
|
||||||
VERIFY((mantissa & 0xfff0000000000000) == 0);
|
VERIFY((mantissa & 0xfff0000000000000) == 0);
|
||||||
extractor.mantissa = mantissa;
|
extractor.mantissa = mantissa;
|
||||||
|
|
||||||
return extractor.d;
|
return extractor.to_float();
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnsignedBigInteger::set_to_0()
|
void UnsignedBigInteger::set_to_0()
|
||||||
|
@ -638,8 +637,7 @@ UnsignedBigInteger::CompareResult UnsignedBigInteger::compare_to_double(double v
|
||||||
if (is_zero())
|
if (is_zero())
|
||||||
return CompareResult::DoubleGreaterThanBigInt;
|
return CompareResult::DoubleGreaterThanBigInt;
|
||||||
|
|
||||||
FloatExtractor<double> extractor;
|
auto extractor = FloatExtractor<double>::from_float(value);
|
||||||
extractor.d = value;
|
|
||||||
|
|
||||||
// Value cannot be negative at this point.
|
// Value cannot be negative at this point.
|
||||||
VERIFY(extractor.sign == 0);
|
VERIFY(extractor.sign == 0);
|
||||||
|
|
Loading…
Reference in a new issue