AK: Make FloatExtractor use bit_cast<>() instead of a union

The motivation is to allow functions that use FloatExtractor to be
constexpr.  Type punning through a union will never work in constexpr.

In practice, bit_cast<>()ing bit fields also does not yet work in clang,
but that's just a bug and it will work eventually (and it does already
work in gcc): https://github.com/llvm/llvm-project/issues/54018

No behavior change.
This commit is contained in:
Nico Weber 2025-01-01 14:10:26 -05:00
parent 39a2356c54
commit 48a28cffd5
5 changed files with 77 additions and 79 deletions

View file

@ -13,91 +13,95 @@
namespace AK { namespace AK {
template<typename T> template<typename T>
union FloatExtractor; struct FloatExtractor;
#ifdef AK_HAS_FLOAT_128 #ifdef AK_HAS_FLOAT_128
template<> template<>
union FloatExtractor<f128> { struct FloatExtractor<f128> {
static constexpr FloatExtractor<f128> from_float(f128 f) { return bit_cast<FloatExtractor<f128>>(f); }
constexpr f128 to_float() const { return bit_cast<f128>(*this); }
using ComponentType = unsigned __int128; using ComponentType = unsigned __int128;
static constexpr int mantissa_bits = 112; static constexpr int mantissa_bits = 112;
static constexpr ComponentType mantissa_max = (((ComponentType)1) << 112) - 1; static constexpr ComponentType mantissa_max = (((ComponentType)1) << 112) - 1;
static constexpr int exponent_bias = 16383; static constexpr int exponent_bias = 16383;
static constexpr int exponent_bits = 15; static constexpr int exponent_bits = 15;
static constexpr unsigned exponent_max = 32767; static constexpr unsigned exponent_max = 32767;
struct [[gnu::packed]] {
ComponentType mantissa : 112; ComponentType mantissa : 112;
ComponentType exponent : 15; ComponentType exponent : 15;
ComponentType sign : 1; ComponentType sign : 1;
};
f128 d;
}; };
// Validate that f128 and the FloatExtractor union are 128 bits. // Validate that f128 and the FloatExtractor struct are 128 bits.
static_assert(AssertSize<f128, 16>()); static_assert(AssertSize<f128, 16>());
static_assert(AssertSize<FloatExtractor<f128>, sizeof(f128)>()); static_assert(AssertSize<FloatExtractor<f128>, sizeof(f128)>());
#endif #endif
#ifdef AK_HAS_FLOAT_80 #ifdef AK_HAS_FLOAT_80
template<> template<>
union FloatExtractor<f80> { struct FloatExtractor<f80> {
static constexpr FloatExtractor<f80> from_float(f80 f) { return bit_cast<FloatExtractor<f80>>(f); }
constexpr f80 to_float() const { return bit_cast<f80>(*this); }
using ComponentType = unsigned long long; using ComponentType = unsigned long long;
static constexpr int mantissa_bits = 64; static constexpr int mantissa_bits = 64;
static constexpr ComponentType mantissa_max = ~0ull; static constexpr ComponentType mantissa_max = ~0ull;
static constexpr int exponent_bias = 16383; static constexpr int exponent_bias = 16383;
static constexpr int exponent_bits = 15; static constexpr int exponent_bits = 15;
static constexpr unsigned exponent_max = 32767; static constexpr unsigned exponent_max = 32767;
struct [[gnu::packed]] {
// This is technically wrong: Extended floating point values really only have 63 bits of mantissa // This is technically wrong: Extended floating point values really only have 63 bits of mantissa
// and an "integer bit" that behaves in various strange, unintuitive and non-IEEE-754 ways. // and an "integer bit" that behaves in various strange, unintuitive and non-IEEE-754 ways.
// However, since all bit-fiddling float code assumes IEEE floats, it cannot handle this properly. // However, since all bit-fiddling float code assumes IEEE floats, it cannot handle this properly.
// If we pretend that 80-bit floats are IEEE floats with 64-bit mantissas, almost everything works correctly // If we pretend that 80-bit floats are IEEE floats with 64-bit mantissas, almost everything works correctly
// and we just need a few special cases. // and we just need a few special cases.
ComponentType mantissa : 64; ComponentType mantissa : 64;
ComponentType exponent : 15; ComponentType exponent : 15;
ComponentType sign : 1; ComponentType sign : 1;
};
f80 d;
}; };
static_assert(AssertSize<FloatExtractor<f80>, sizeof(f80)>()); static_assert(AssertSize<FloatExtractor<f80>, sizeof(f80)>());
#endif #endif
template<> template<>
union FloatExtractor<f64> { struct FloatExtractor<f64> {
static constexpr FloatExtractor<f64> from_float(f64 f) { return bit_cast<FloatExtractor<f64>>(f); }
constexpr f64 to_float() const { return bit_cast<f64>(*this); }
using ComponentType = unsigned long long; using ComponentType = unsigned long long;
static constexpr int mantissa_bits = 52; static constexpr int mantissa_bits = 52;
static constexpr ComponentType mantissa_max = (1ull << 52) - 1; static constexpr ComponentType mantissa_max = (1ull << 52) - 1;
static constexpr int exponent_bias = 1023; static constexpr int exponent_bias = 1023;
static constexpr int exponent_bits = 11; static constexpr int exponent_bits = 11;
static constexpr unsigned exponent_max = 2047; static constexpr unsigned exponent_max = 2047;
struct [[gnu::packed]] {
// FIXME: These types have to all be the same, otherwise this struct // FIXME: These types have to all be the same, otherwise this struct
// goes from being a bitfield describing the layout of an f64 // goes from being a bitfield describing the layout of an f64
// into being a multibyte mess on windows. // into being a multibyte mess on windows.
// Technically, '-mno-ms-bitfields' is supposed to disable this // Technically, '-mno-ms-bitfields' is supposed to disable this
// very intuitive and portable behaviour on windows, but it doesn't // very intuitive and portable behaviour on windows, but it doesn't
// work with the msvc ABI. // work with the msvc ABI.
// See <https://github.com/llvm/llvm-project/issues/24757> // See <https://github.com/llvm/llvm-project/issues/24757>
ComponentType mantissa : 52; ComponentType mantissa : 52;
ComponentType exponent : 11; ComponentType exponent : 11;
ComponentType sign : 1; ComponentType sign : 1;
};
f64 d;
}; };
static_assert(AssertSize<FloatExtractor<f64>, sizeof(f64)>()); static_assert(AssertSize<FloatExtractor<f64>, sizeof(f64)>());
template<> template<>
union FloatExtractor<f32> { struct FloatExtractor<f32> {
static constexpr FloatExtractor<f32> from_float(f32 f) { return bit_cast<FloatExtractor<f32>>(f); }
constexpr f32 to_float() const { return bit_cast<f32>(*this); }
using ComponentType = unsigned; using ComponentType = unsigned;
static constexpr int mantissa_bits = 23; static constexpr int mantissa_bits = 23;
static constexpr ComponentType mantissa_max = (1 << 23) - 1; static constexpr ComponentType mantissa_max = (1 << 23) - 1;
static constexpr int exponent_bias = 127; static constexpr int exponent_bias = 127;
static constexpr int exponent_bits = 8; static constexpr int exponent_bits = 8;
static constexpr ComponentType exponent_max = 255; static constexpr ComponentType exponent_max = 255;
struct [[gnu::packed]] {
ComponentType mantissa : 23; ComponentType mantissa : 23;
ComponentType exponent : 8; ComponentType exponent : 8;
ComponentType sign : 1; ComponentType sign : 1;
};
f32 d;
}; };
static_assert(AssertSize<FloatExtractor<f32>, sizeof(f32)>()); static_assert(AssertSize<FloatExtractor<f32>, sizeof(f32)>());

View file

@ -70,11 +70,10 @@ template<FloatingPoint FloatT>
FloatT copysign(FloatT x, FloatT y) FloatT copysign(FloatT x, FloatT y)
{ {
using Extractor = FloatExtractor<FloatT>; using Extractor = FloatExtractor<FloatT>;
Extractor ex, ey; auto ex = Extractor::from_float(x);
ex.d = x; auto ey = Extractor::from_float(y);
ey.d = y;
ex.sign = ey.sign; ex.sign = ey.sign;
return ex.d; return ex.to_float();
} }
#define CONSTEXPR_STATE(function, args...) \ #define CONSTEXPR_STATE(function, args...) \
@ -475,10 +474,10 @@ constexpr T fmod(T x, T y)
// If y_exponent < x_exponent, we'll iteratively reduce x_exponent by shifting from // If y_exponent < x_exponent, we'll iteratively reduce x_exponent by shifting from
// the exponent into the mantissa. // the exponent into the mantissa.
FloatExtractor<T> x_bits { .d = x }; auto x_bits = FloatExtractor<T>::from_float(x);
typename FloatExtractor<T>::ComponentType x_exponent = x_bits.exponent; // - FloatExtractor<T>::exponent_bias; typename FloatExtractor<T>::ComponentType x_exponent = x_bits.exponent; // - FloatExtractor<T>::exponent_bias;
FloatExtractor<T> y_bits { .d = y }; auto y_bits = FloatExtractor<T>::from_float(y);
typename FloatExtractor<T>::ComponentType y_exponent = y_bits.exponent; // - FloatExtractor<T>::exponent_bias; typename FloatExtractor<T>::ComponentType y_exponent = y_bits.exponent; // - FloatExtractor<T>::exponent_bias;
// FIXME: Handle denormals. For now, treat them as 0. // FIXME: Handle denormals. For now, treat them as 0.
@ -523,7 +522,7 @@ constexpr T fmod(T x, T y)
x_bits.exponent = x_exponent; x_bits.exponent = x_exponent;
x_bits.mantissa = x_mantissa; x_bits.mantissa = x_mantissa;
return x_bits.d; return x_bits.to_float();
# else # else
if constexpr (IsSame<T, long double>) if constexpr (IsSame<T, long double>)
return __builtin_fmodl(x, y); return __builtin_fmodl(x, y);
@ -966,7 +965,7 @@ constexpr T log2(T x)
if (x <= 0 || __builtin_isnan(x)) if (x <= 0 || __builtin_isnan(x))
return NaN<T>; return NaN<T>;
FloatExtractor<T> ext { .d = x }; auto ext = FloatExtractor<T>::from_float(x);
T exponent = ext.exponent - FloatExtractor<T>::exponent_bias; T exponent = ext.exponent - FloatExtractor<T>::exponent_bias;
// When the mantissa shows 0b00 (implicitly 1.0) we are on a power of 2 // When the mantissa shows 0b00 (implicitly 1.0) we are on a power of 2
@ -982,7 +981,7 @@ constexpr T log2(T x)
}; };
// (1 <= mantissa < 2) // (1 <= mantissa < 2)
T m = mantissa_ext.d; T m = mantissa_ext.to_float();
// This is a reconstruction of one of Sun's algorithms // This is a reconstruction of one of Sun's algorithms
// They use a transformation to lower the problem space, // They use a transformation to lower the problem space,

View file

@ -47,7 +47,7 @@ FloatingPointExponentialForm inner_convert_floating_point_to_decimal_exponential
{ {
using Extractor = FloatExtractor<FloatingPoint>; using Extractor = FloatExtractor<FloatingPoint>;
Extractor bit_representation { .d = value }; auto bit_representation = Extractor::from_float(value);
bool sign = bit_representation.sign; bool sign = bit_representation.sign;
i32 exponent = bit_representation.exponent; i32 exponent = bit_representation.exponent;

View file

@ -73,8 +73,7 @@ static FloatType internal_to_integer(FloatType x, RoundingMode rounding_mode)
// Most component types are larger than int. // Most component types are larger than int.
constexpr auto zero = static_cast<Extractor::ComponentType>(0); constexpr auto zero = static_cast<Extractor::ComponentType>(0);
constexpr auto one = static_cast<Extractor::ComponentType>(1); constexpr auto one = static_cast<Extractor::ComponentType>(1);
Extractor extractor; auto extractor = Extractor::from_float(x);
extractor.d = x;
auto unbiased_exponent = extractor.exponent - Extractor::exponent_bias; auto unbiased_exponent = extractor.exponent - Extractor::exponent_bias;
@ -132,16 +131,18 @@ static FloatType internal_to_integer(FloatType x, RoundingMode rounding_mode)
break; break;
} }
auto result = extractor.to_float();
if (should_round) { if (should_round) {
// We could do this ourselves, but this saves us from manually // We could do this ourselves, but this saves us from manually
// handling overflow. // handling overflow.
if (extractor.sign) if (extractor.sign)
extractor.d -= static_cast<FloatType>(1.0); result -= static_cast<FloatType>(1.0);
else else
extractor.d += static_cast<FloatType>(1.0); result += static_cast<FloatType>(1.0);
} }
return extractor.d; return result;
} }
// This is much branchier than it really needs to be // This is much branchier than it really needs to be
@ -151,22 +152,21 @@ static FloatType internal_nextafter(FloatType x, bool up)
if (!isfinite(x)) if (!isfinite(x))
return x; return x;
using Extractor = FloatExtractor<decltype(x)>; using Extractor = FloatExtractor<decltype(x)>;
Extractor extractor; auto extractor = Extractor::from_float(x);
extractor.d = x;
if (x == 0) { if (x == 0) {
if (!extractor.sign) { if (!extractor.sign) {
extractor.mantissa = 1; extractor.mantissa = 1;
extractor.sign = !up; extractor.sign = !up;
return extractor.d; return extractor.to_float();
} }
if (up) { if (up) {
extractor.sign = false; extractor.sign = false;
extractor.mantissa = 1; extractor.mantissa = 1;
return extractor.d; return extractor.to_float();
} }
extractor.mantissa = 1; extractor.mantissa = 1;
extractor.sign = up != extractor.sign; extractor.sign = up != extractor.sign;
return extractor.d; return extractor.to_float();
} }
if (up != extractor.sign) { if (up != extractor.sign) {
extractor.mantissa++; extractor.mantissa++;
@ -179,22 +179,21 @@ static FloatType internal_nextafter(FloatType x, bool up)
extractor.mantissa = Extractor::mantissa_max; extractor.mantissa = Extractor::mantissa_max;
} }
} }
return extractor.d; return extractor.to_float();
} }
if (!extractor.mantissa) { if (!extractor.mantissa) {
if (extractor.exponent) { if (extractor.exponent) {
extractor.exponent--; extractor.exponent--;
extractor.mantissa = Extractor::mantissa_max; extractor.mantissa = Extractor::mantissa_max;
} else { return extractor.to_float();
extractor.d = 0;
} }
return extractor.d; return 0;
} }
extractor.mantissa--; extractor.mantissa--;
if (extractor.mantissa != Extractor::mantissa_max) if (extractor.mantissa != Extractor::mantissa_max)
return extractor.d; return extractor.to_float();
if (extractor.exponent) { if (extractor.exponent) {
extractor.exponent--; extractor.exponent--;
// normalize // normalize
@ -206,7 +205,7 @@ static FloatType internal_nextafter(FloatType x, bool up)
extractor.exponent = Extractor::exponent_max; extractor.exponent = Extractor::exponent_max;
} }
} }
return extractor.d; return extractor.to_float();
} }
template<typename FloatT> template<typename FloatT>
@ -223,8 +222,7 @@ static int internal_ilogb(FloatT x) NOEXCEPT
using Extractor = FloatExtractor<FloatT>; using Extractor = FloatExtractor<FloatT>;
Extractor extractor; auto extractor = Extractor::from_float(x);
extractor.d = x;
return (int)extractor.exponent - Extractor::exponent_bias; return (int)extractor.exponent - Extractor::exponent_bias;
} }
@ -247,12 +245,11 @@ static FloatT internal_scalbn(FloatT x, int exponent) NOEXCEPT
return x; return x;
using Extractor = FloatExtractor<FloatT>; using Extractor = FloatExtractor<FloatT>;
Extractor extractor; auto extractor = Extractor::from_float(x);
extractor.d = x;
if (extractor.exponent != 0) { if (extractor.exponent != 0) {
extractor.exponent = clamp((int)extractor.exponent + exponent, 0, (int)Extractor::exponent_max); extractor.exponent = clamp((int)extractor.exponent + exponent, 0, (int)Extractor::exponent_max);
return extractor.d; return extractor.to_float();
} }
unsigned leading_mantissa_zeroes = extractor.mantissa == 0 ? 32 : count_leading_zeroes(extractor.mantissa); unsigned leading_mantissa_zeroes = extractor.mantissa == 0 ? 32 : count_leading_zeroes(extractor.mantissa);
@ -262,7 +259,7 @@ static FloatT internal_scalbn(FloatT x, int exponent) NOEXCEPT
extractor.exponent <<= shift; extractor.exponent <<= shift;
extractor.exponent = exponent + 1; extractor.exponent = exponent + 1;
return extractor.d; return extractor.to_float();
} }
template<typename FloatT> template<typename FloatT>

View file

@ -49,8 +49,7 @@ UnsignedBigInteger::UnsignedBigInteger(double value)
return; return;
} }
FloatExtractor<double> extractor; auto extractor = FloatExtractor<double>::from_float(value);
extractor.d = value;
VERIFY(!extractor.sign); VERIFY(!extractor.sign);
i32 real_exponent = extractor.exponent - extractor.exponent_bias; i32 real_exponent = extractor.exponent - extractor.exponent_bias;
@ -350,7 +349,7 @@ double UnsignedBigInteger::to_double(UnsignedBigInteger::RoundingMode rounding_m
VERIFY((mantissa & 0xfff0000000000000) == 0); VERIFY((mantissa & 0xfff0000000000000) == 0);
extractor.mantissa = mantissa; extractor.mantissa = mantissa;
return extractor.d; return extractor.to_float();
} }
void UnsignedBigInteger::set_to_0() void UnsignedBigInteger::set_to_0()
@ -638,8 +637,7 @@ UnsignedBigInteger::CompareResult UnsignedBigInteger::compare_to_double(double v
if (is_zero()) if (is_zero())
return CompareResult::DoubleGreaterThanBigInt; return CompareResult::DoubleGreaterThanBigInt;
FloatExtractor<double> extractor; auto extractor = FloatExtractor<double>::from_float(value);
extractor.d = value;
// Value cannot be negative at this point. // Value cannot be negative at this point.
VERIFY(extractor.sign == 0); VERIFY(extractor.sign == 0);