/* * Copyright (c) 2021, Leon Albrecht . * * SPDX-License-Identifier: BSD-2-Clause */ #pragma once #include #include #include #include #include #include namespace AK { template constexpr T NaN = __builtin_nan(""); template constexpr T Infinity = __builtin_huge_vall(); template constexpr T Pi = 3.141592653589793238462643383279502884L; template constexpr T E = 2.718281828459045235360287471352662498L; template constexpr T Sqrt2 = 1.414213562373095048801688724209698079L; template constexpr T Sqrt1_2 = 0.707106781186547524400844362104849039L; template constexpr T L2_10 = 3.321928094887362347870319429489390175864L; template constexpr T L2_E = 1.442695040888963407359924681001892137L; namespace Details { template constexpr size_t product_even(); template<> constexpr size_t product_even<2>() { return 2; } template constexpr size_t product_even() { return value * product_even(); } template constexpr size_t product_odd(); template<> constexpr size_t product_odd<1>() { return 1; } template constexpr size_t product_odd() { return value * product_odd(); } } template constexpr T to_radians(T degrees) { return degrees * AK::Pi / 180; } template constexpr T to_degrees(T radians) { return radians * 180 / AK::Pi; } #define CONSTEXPR_STATE(function, args...) \ if (is_constant_evaluated()) { \ if (IsSame) \ return __builtin_##function##l(args); \ if (IsSame) \ return __builtin_##function(args); \ if (IsSame) \ return __builtin_##function##f(args); \ } #define AARCH64_INSTRUCTION(instruction, arg) \ if constexpr (IsSame) \ TODO(); \ if constexpr (IsSame) { \ double res; \ asm(#instruction " %d0, %d1" \ : "=w"(res) \ : "w"(arg)); \ return res; \ } \ if constexpr (IsSame) { \ float res; \ asm(#instruction " %s0, %s1" \ : "=w"(res) \ : "w"(arg)); \ return res; \ } template constexpr T fabs(T x) { // Both GCC and Clang inline fabs by default, so this is just a cmath like wrapper if constexpr (IsSame) return __builtin_fabsl(x); if constexpr (IsSame) return __builtin_fabs(x); if constexpr (IsSame) return __builtin_fabsf(x); } namespace Rounding { template constexpr T ceil(T num) { // FIXME: SSE4.1 rounds[sd] num, res, 0b110 if (is_constant_evaluated()) { if (num < NumericLimits::min() || num > NumericLimits::max()) return num; return (static_cast(static_cast(num)) == num) ? static_cast(num) : static_cast(num) + ((num > 0) ? 1 : 0); } #if ARCH(AARCH64) AARCH64_INSTRUCTION(frintp, num); #else if constexpr (IsSame) return __builtin_ceill(num); if constexpr (IsSame) return __builtin_ceil(num); if constexpr (IsSame) return __builtin_ceilf(num); #endif } template constexpr T floor(T num) { // FIXME: SSE4.1 rounds[sd] num, res, 0b101 if (is_constant_evaluated()) { if (num < NumericLimits::min() || num > NumericLimits::max()) return num; return (static_cast(static_cast(num)) == num) ? static_cast(num) : static_cast(num) - ((num > 0) ? 0 : 1); } #if ARCH(AARCH64) AARCH64_INSTRUCTION(frintm, num); #else if constexpr (IsSame) return __builtin_floorl(num); if constexpr (IsSame) return __builtin_floor(num); if constexpr (IsSame) return __builtin_floorf(num); #endif } template constexpr T trunc(T num) { #if ARCH(AARCH64) if (is_constant_evaluated()) { if (num < NumericLimits::min() || num > NumericLimits::max()) return num; return static_cast(static_cast(num)); } AARCH64_INSTRUCTION(frintz, num); #endif // FIXME: Use dedicated instruction in the non constexpr case // SSE4.1: rounds[sd] %num, %res, 0b111 if (num < NumericLimits::min() || num > NumericLimits::max()) return num; return static_cast(static_cast(num)); } template constexpr T rint(T x) { CONSTEXPR_STATE(rint, x); // Note: This does break tie to even // But the behavior of frndint/rounds[ds]/frintx can be configured // through the floating point control registers. // FIXME: We should decide if we rename this to allow us to get away from // the configurability "burden" rint has // this would make us use `rounds[sd] %num, %res, 0b100` // and `frintn` respectively, // no such guaranteed round exists for x87 `frndint` #if ARCH(X86_64) # ifdef __SSE4_1__ if constexpr (IsSame) { T r; asm( "roundsd %1, %0" : "=x"(r) : "x"(x)); return r; } if constexpr (IsSame) { T r; asm( "roundss %1, %0" : "=x"(r) : "x"(x)); return r; } # else asm( "frndint" : "+t"(x)); return x; # endif #elif ARCH(AARCH64) AARCH64_INSTRUCTION(frintx, x); #endif TODO(); } template constexpr T round(T x) { CONSTEXPR_STATE(round, x); // Note: This is break-tie-away-from-zero, so not the hw's understanding of // "nearest", which would be towards even. if (x == 0.) return x; if (x > 0.) return floor(x + .5); return ceil(x - .5); } template ALWAYS_INLINE I round_to(P value); #if ARCH(X86_64) template ALWAYS_INLINE I round_to(long double value) { // Note: fistps outputs into a signed integer location (i16, i32, i64), // so lets be nice and tell the compiler that. Conditional= sizeof(i16), MakeSigned, i16> ret; if constexpr (sizeof(I) == sizeof(i64)) { asm("fistpll %0" : "=m"(ret) : "t"(value) : "st"); } else if constexpr (sizeof(I) == sizeof(i32)) { asm("fistpl %0" : "=m"(ret) : "t"(value) : "st"); } else { asm("fistps %0" : "=m"(ret) : "t"(value) : "st"); } return static_cast(ret); } template ALWAYS_INLINE I round_to(float value) { // FIXME: round_to might will cause issues, aka the indefinite value being set, // if the value surpasses the i64 limit, even if the result could fit into an u64 // To solve this we would either need to detect that value or do a range check and // then do a more specialized conversion, which might include a division (which is expensive) if constexpr (sizeof(I) == sizeof(i64) || IsSame) { i64 ret; asm("cvtss2si %1, %0" : "=r"(ret) : "xm"(value)); return static_cast(ret); } i32 ret; asm("cvtss2si %1, %0" : "=r"(ret) : "xm"(value)); return static_cast(ret); } template ALWAYS_INLINE I round_to(double value) { // FIXME: round_to might will cause issues, aka the indefinite value being set, // if the value surpasses the i64 limit, even if the result could fit into an u64 // To solve this we would either need to detect that value or do a range check and // then do a more specialized conversion, which might include a division (which is expensive) if constexpr (sizeof(I) == sizeof(i64) || IsSame) { i64 ret; asm("cvtsd2si %1, %0" : "=r"(ret) : "xm"(value)); return static_cast(ret); } i32 ret; asm("cvtsd2si %1, %0" : "=r"(ret) : "xm"(value)); return static_cast(ret); } #elif ARCH(AARCH64) template ALWAYS_INLINE I round_to(float value) { if constexpr (sizeof(I) <= sizeof(u32)) { i32 res; asm("fcvtns %w0, %s1" : "=r"(res) : "w"(value)); return static_cast(res); } i64 res; asm("fcvtns %0, %s1" : "=r"(res) : "w"(value)); return static_cast(res); } template ALWAYS_INLINE I round_to(double value) { if constexpr (sizeof(I) <= sizeof(u32)) { i32 res; asm("fcvtns %w0, %d1" : "=r"(res) : "w"(value)); return static_cast(res); } i64 res; asm("fcvtns %0, %d1" : "=r"(res) : "w"(value)); return static_cast(res); } template ALWAYS_INLINE U round_to(float value) { if constexpr (sizeof(U) <= sizeof(u32)) { u32 res; asm("fcvtnu %w0, %s1" : "=r"(res) : "w"(value)); return static_cast(res); } i64 res; asm("fcvtnu %0, %s1" : "=r"(res) : "w"(value)); return static_cast(res); } template ALWAYS_INLINE U round_to(double value) { if constexpr (sizeof(U) <= sizeof(u32)) { u32 res; asm("fcvtns %w0, %d1" : "=r"(res) : "w"(value)); return static_cast(res); } i64 res; asm("fcvtns %0, %d1" : "=r"(res) : "w"(value)); return static_cast(res); } #else template ALWAYS_INLINE I round_to(P value) { if constexpr (IsSame) return static_cast(__builtin_llrintl(value)); if constexpr (IsSame) return static_cast(__builtin_llrint(value)); if constexpr (IsSame) return static_cast(__builtin_llrintf(value)); } #endif } using Rounding::ceil; using Rounding::floor; using Rounding::rint; using Rounding::round; using Rounding::round_to; using Rounding::trunc; namespace Division { template constexpr T fmod(T x, T y) { CONSTEXPR_STATE(fmod, x, y); #if ARCH(X86_64) u16 fpu_status; do { asm( "fprem\n" "fnstsw %%ax\n" : "+t"(x), "=a"(fpu_status) : "u"(y)); } while (fpu_status & 0x400); return x; // FIXME: Add a generic implementation of this // Neither // ``` // return x - (y * trunc(x/y)) // ``` // nor // ``` // double result = remainder(std::fabs(x), y = std::fabs(y)); // if (std::signbit(result)) // result += y; // return std::copysign(result, x); // ``` from (https://en.cppreference.com/w/cpp/numeric/math/fmod) // provide enough precision for all cases // other implementations seem to do this by hand with some fixed point steps in between // For `remainder` the trivial solution of // ``` // return x - (y * rint(x/y)) // ``` // might work #else # if defined(AK_OS_SERENITY) // TODO: Add implementation for this function. TODO(); # endif if constexpr (IsSame) return __builtin_fmodl(x, y); if constexpr (IsSame) return __builtin_fmod(x, y); if constexpr (IsSame) return __builtin_fmodf(x, y); #endif } template constexpr T remainder(T x, T y) { CONSTEXPR_STATE(remainder, x, y); #if ARCH(X86_64) u16 fpu_status; do { asm( "fprem1\n" "fnstsw %%ax\n" : "+t"(x), "=a"(fpu_status) : "u"(y)); } while (fpu_status & 0x400); return x; #else # if defined(AK_OS_SERENITY) // TODO: Add implementation for this function. TODO(); # endif if constexpr (IsSame) return __builtin_remainderl(x, y); if constexpr (IsSame) return __builtin_remainder(x, y); if constexpr (IsSame) return __builtin_remainderf(x, y); #endif } } using Division::fmod; using Division::remainder; template constexpr T sqrt(T x) { CONSTEXPR_STATE(sqrt, x); #if ARCH(X86_64) if constexpr (IsSame) { float res; asm("sqrtss %1, %0" : "=x"(res) : "x"(x)); return res; } if constexpr (IsSame) { double res; asm("sqrtsd %1, %0" : "=x"(res) : "x"(x)); return res; } T res; asm("fsqrt" : "=t"(res) : "0"(x)); return res; #elif ARCH(AARCH64) AARCH64_INSTRUCTION(fsqrt, x); #else return __builtin_sqrt(x); #endif } template constexpr T cbrt(T x) { CONSTEXPR_STATE(cbrt, x); if (__builtin_isinf(x) || x == 0) return x; if (x < 0) return -cbrt(-x); T r = x; T ex = 0; while (r < 0.125l) { r *= 8; ex--; } while (r > 1.0l) { r *= 0.125l; ex++; } r = (-0.46946116l * r + 1.072302l) * r + 0.3812513l; while (ex < 0) { r *= 0.5l; ex++; } while (ex > 0) { r *= 2.0l; ex--; } r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r); r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r); r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r); r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r); return r; } namespace Trigonometry { template constexpr T hypot(T x, T y) { return sqrt(x * x + y * y); } template constexpr T sin(T angle) { CONSTEXPR_STATE(sin, angle); #if ARCH(X86_64) T ret; asm( "fsin" : "=t"(ret) : "0"(angle)); return ret; #else # if defined(AK_OS_SERENITY) // FIXME: This is a very naive implementation, and is only valid for small x. // Probably a good idea to use a better algorithm in the future, such as a taylor approximation. return angle; # else return __builtin_sin(angle); # endif #endif } template constexpr T cos(T angle) { CONSTEXPR_STATE(cos, angle); #if ARCH(X86_64) T ret; asm( "fcos" : "=t"(ret) : "0"(angle)); return ret; #else # if defined(AK_OS_SERENITY) // FIXME: This is a very naive implementation, and is only valid for small x. // Probably a good idea to use a better algorithm in the future, such as a taylor approximation. return 1 - ((angle * angle) / 2); # else return __builtin_cos(angle); # endif #endif } template constexpr void sincos(T angle, T& sin_val, T& cos_val) { if (is_constant_evaluated()) { sin_val = sin(angle); cos_val = cos(angle); return; } #if ARCH(X86_64) asm( "fsincos" : "=t"(cos_val), "=u"(sin_val) : "0"(angle)); #else sin_val = sin(angle); cos_val = cos(angle); #endif } template constexpr T tan(T angle) { CONSTEXPR_STATE(tan, angle); #if ARCH(X86_64) T ret, one; asm( "fptan" : "=t"(one), "=u"(ret) : "0"(angle)); return ret; #else # if defined(AK_OS_SERENITY) // FIXME: This is a very naive implementation, and is only valid for small x. // Probably a good idea to use a better algorithm in the future, such as a taylor approximation. return angle; # else return __builtin_tan(angle); # endif #endif } template constexpr T atan(T value) { CONSTEXPR_STATE(atan, value); #if ARCH(X86_64) T ret; asm( "fld1\n" "fpatan\n" : "=t"(ret) : "0"(value)); return ret; #else # if defined(AK_OS_SERENITY) // TODO: Add implementation for this function. TODO(); # endif return __builtin_atan(value); #endif } template constexpr T asin(T x) { CONSTEXPR_STATE(asin, x); if (x > 1 || x < -1) return NaN; if (x > (T)0.5 || x < (T)-0.5) return 2 * atan(x / (1 + sqrt(1 - x * x))); T squared = x * x; T value = x; T i = x * squared; value += i * Details::product_odd<1>() / Details::product_even<2>() / 3; i *= squared; value += i * Details::product_odd<3>() / Details::product_even<4>() / 5; i *= squared; value += i * Details::product_odd<5>() / Details::product_even<6>() / 7; i *= squared; value += i * Details::product_odd<7>() / Details::product_even<8>() / 9; i *= squared; value += i * Details::product_odd<9>() / Details::product_even<10>() / 11; i *= squared; value += i * Details::product_odd<11>() / Details::product_even<12>() / 13; i *= squared; value += i * Details::product_odd<13>() / Details::product_even<14>() / 15; i *= squared; value += i * Details::product_odd<15>() / Details::product_even<16>() / 17; return value; } template constexpr T acos(T value) { CONSTEXPR_STATE(acos, value); // FIXME: I am naive return static_cast(0.5) * Pi - asin(value); } template constexpr T atan2(T y, T x) { CONSTEXPR_STATE(atan2, y, x); #if ARCH(X86_64) T ret; asm("fpatan" : "=t"(ret) : "0"(x), "u"(y) : "st(1)"); return ret; #else # if defined(AK_OS_SERENITY) // TODO: Add implementation for this function. TODO(); # endif return __builtin_atan2(y, x); #endif } } using Trigonometry::acos; using Trigonometry::asin; using Trigonometry::atan; using Trigonometry::atan2; using Trigonometry::cos; using Trigonometry::hypot; using Trigonometry::sin; using Trigonometry::sincos; using Trigonometry::tan; namespace Exponentials { template constexpr T log2(T x) { CONSTEXPR_STATE(log2, x); #if ARCH(X86_64) if constexpr (IsSame) { T ret; asm( "fld1\n" "fxch %%st(1)\n" "fyl2x\n" : "=t"(ret) : "0"(x)); return ret; } #endif // References: // Gist comparing some implementations // * https://gist.github.com/Hendiadyoin1/f58346d66637deb9156ef360aa158bf9 if (x == 0) return -Infinity; if (x <= 0 || __builtin_isnan(x)) return NaN; FloatExtractor ext { .d = x }; T exponent = ext.exponent - FloatExtractor::exponent_bias; // When the mantissa shows 0b00 (implicitly 1.0) we are on a power of 2 if (ext.mantissa == 0) return exponent; // FIXME: Handle denormalized numbers separately FloatExtractor mantissa_ext { .mantissa = ext.mantissa, .exponent = FloatExtractor::exponent_bias, .sign = ext.sign }; // (1 <= mantissa < 2) T m = mantissa_ext.d; // This is a reconstruction of one of Sun's algorithms // They use a transformation to lower the problem space, // while keeping the precision, and a 14th degree polynomial, // which is mirrored at sqrt(2) // TODO: Sun has some more algorithms for this and other math functions, // leveraging LUTs, investigate those, if they are better in performance and/or precision. // These seem to be related to crLibM's implementations, for which papers and references // are available. // FIXME: Dynamically adjust the amount of precision between floats and doubles // AKA floats might need less accuracy here, in comparison to doubles bool inverted = false; // m > sqrt(2) if (m > Sqrt2) { inverted = true; m = 2 / m; } T s = (m - 1) / (m + 1); // ln((1 + s) / (1 - s)) == ln(m) T s2 = s * s; // clang-format off T high_approx = s2 * (static_cast(0.6666666666666735130) + s2 * (static_cast(0.3999999999940941908) + s2 * (static_cast(0.2857142874366239149) + s2 * (static_cast(0.2222219843214978396) + s2 * (static_cast(0.1818357216161805012) + s2 * (static_cast(0.1531383769920937332) + s2 * static_cast(0.1479819860511658591))))))); // clang-format on // ln(m) == 2 * s + s * high_approx // log2(m) == log2(e) * ln(m) T log2_mantissa = L2_E * (2 * s + s * high_approx); if (inverted) log2_mantissa = 1 - log2_mantissa; return exponent + log2_mantissa; } template constexpr T log(T x) { CONSTEXPR_STATE(log, x); #if ARCH(X86_64) T ret; asm( "fldln2\n" "fxch %%st(1)\n" "fyl2x\n" : "=t"(ret) : "0"(x)); return ret; #elif defined(AK_OS_SERENITY) // FIXME: Adjust the polynomial and formula in log2 to fit this return log2(x) / L2_E; #else return __builtin_log(x); #endif } template constexpr T log10(T x) { CONSTEXPR_STATE(log10, x); #if ARCH(X86_64) T ret; asm( "fldlg2\n" "fxch %%st(1)\n" "fyl2x\n" : "=t"(ret) : "0"(x)); return ret; #elif defined(AK_OS_SERENITY) // FIXME: Adjust the polynomial and formula in log2 to fit this return log2(x) / L2_10; #else return __builtin_log10(x); #endif } template constexpr T exp(T exponent) { CONSTEXPR_STATE(exp, exponent); #if ARCH(X86_64) T res; asm("fldl2e\n" "fmulp\n" "fld1\n" "fld %%st(1)\n" "fprem\n" "f2xm1\n" "faddp\n" "fscale\n" "fstp %%st(1)" : "=t"(res) : "0"(exponent)); return res; #else # if defined(AK_OS_SERENITY) // TODO: Add implementation for this function. TODO(); # endif return __builtin_exp(exponent); #endif } template constexpr T exp2(T exponent) { CONSTEXPR_STATE(exp2, exponent); #if ARCH(X86_64) T res; asm("fld1\n" "fld %%st(1)\n" "fprem\n" "f2xm1\n" "faddp\n" "fscale\n" "fstp %%st(1)" : "=t"(res) : "0"(exponent)); return res; #else # if defined(AK_OS_SERENITY) // TODO: Add implementation for this function. TODO(); # endif return __builtin_exp2(exponent); #endif } } using Exponentials::exp; using Exponentials::exp2; using Exponentials::log; using Exponentials::log10; using Exponentials::log2; namespace Hyperbolic { template constexpr T sinh(T x) { T exponentiated = exp(x); if (x > 0) return (exponentiated * exponentiated - 1) / 2 / exponentiated; return (exponentiated - 1 / exponentiated) / 2; } template constexpr T cosh(T x) { CONSTEXPR_STATE(cosh, x); T exponentiated = exp(-x); if (x < 0) return (1 + exponentiated * exponentiated) / 2 / exponentiated; return (1 / exponentiated + exponentiated) / 2; } template constexpr T tanh(T x) { if (x > 0) { T exponentiated = exp(2 * x); return (exponentiated - 1) / (exponentiated + 1); } T plusX = exp(x); T minusX = 1 / plusX; return (plusX - minusX) / (plusX + minusX); } template constexpr T asinh(T x) { return log(x + sqrt(x * x + 1)); } template constexpr T acosh(T x) { return log(x + sqrt(x * x - 1)); } template constexpr T atanh(T x) { return log((1 + x) / (1 - x)) / (T)2.0l; } } using Hyperbolic::acosh; using Hyperbolic::asinh; using Hyperbolic::atanh; using Hyperbolic::cosh; using Hyperbolic::sinh; using Hyperbolic::tanh; template constexpr T pow(T x, T y) { CONSTEXPR_STATE(pow, x, y); // FIXME: I am naive if (__builtin_isnan(y)) return y; if (y == 0) return 1; if (x == 0) return 0; if (y == 1) return x; int y_as_int = (int)y; if (y == (T)y_as_int) { T result = x; for (int i = 0; i < fabs(y) - 1; ++i) result *= x; if (y < 0) result = 1.0l / result; return result; } return exp2(y * log2(x)); } template constexpr I clamp_to(T value) { if (value >= static_cast(NumericLimits::max())) return NumericLimits::max(); if (value <= static_cast(NumericLimits::min())) return NumericLimits::min(); if constexpr (IsFloatingPoint) return round_to(value); return value; } #undef CONSTEXPR_STATE #undef AARCH64_INSTRUCTION } #if USING_AK_GLOBALLY using AK::round_to; #endif