mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-22 09:21:57 -05:00
AK: Add simd_cast<T>
and replace to_TxN
with it
This commit is contained in:
parent
e0242abf93
commit
832b5ff603
7 changed files with 36 additions and 70 deletions
|
@ -80,6 +80,13 @@ constexpr static size_t vector_length = sizeof(V) / sizeof(ElementOf<V>);
|
|||
static_assert(vector_length<i8x4> == 4);
|
||||
static_assert(vector_length<f32x4> == 4);
|
||||
|
||||
template<SIMDVector T, SIMDVector U>
|
||||
requires(vector_length<T> == vector_length<U>)
|
||||
ALWAYS_INLINE constexpr T simd_cast(U v)
|
||||
{
|
||||
return __builtin_convertvector(v, T);
|
||||
}
|
||||
|
||||
namespace Detail {
|
||||
template<typename T>
|
||||
struct IndexVectorFor;
|
||||
|
|
|
@ -29,44 +29,6 @@ ALWAYS_INLINE static constexpr u32x4 expand4(u32 u)
|
|||
return u32x4 { u, u, u, u };
|
||||
}
|
||||
|
||||
// Casting
|
||||
|
||||
template<typename TSrc>
|
||||
ALWAYS_INLINE static u8x4 to_u8x4(TSrc v)
|
||||
{
|
||||
return __builtin_convertvector(v, u8x4);
|
||||
}
|
||||
|
||||
template<typename TSrc>
|
||||
ALWAYS_INLINE static u16x4 to_u16x4(TSrc v)
|
||||
{
|
||||
return __builtin_convertvector(v, u16x4);
|
||||
}
|
||||
|
||||
template<typename TSrc>
|
||||
ALWAYS_INLINE static u32x4 to_u32x4(TSrc v)
|
||||
{
|
||||
return __builtin_convertvector(v, u32x4);
|
||||
}
|
||||
|
||||
template<typename TSrc>
|
||||
ALWAYS_INLINE static i8x4 to_i8x4(TSrc v)
|
||||
{
|
||||
return __builtin_convertvector(v, i8x4);
|
||||
}
|
||||
|
||||
template<typename TSrc>
|
||||
ALWAYS_INLINE static i32x4 to_i32x4(TSrc v)
|
||||
{
|
||||
return __builtin_convertvector(v, i32x4);
|
||||
}
|
||||
|
||||
template<typename TSrc>
|
||||
ALWAYS_INLINE static f32x4 to_f32x4(TSrc v)
|
||||
{
|
||||
return __builtin_convertvector(v, f32x4);
|
||||
}
|
||||
|
||||
// Masking
|
||||
|
||||
ALWAYS_INLINE static i32 maskbits(i32x4 mask)
|
||||
|
|
|
@ -18,7 +18,7 @@ namespace AK::SIMD {
|
|||
|
||||
ALWAYS_INLINE static f32x4 truncate_int_range(f32x4 v)
|
||||
{
|
||||
return to_f32x4(to_i32x4(v));
|
||||
return simd_cast<f32x4>(simd_cast<i32x4>(v));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static f32x4 floor_int_range(f32x4 v)
|
||||
|
|
|
@ -192,7 +192,7 @@ ErrorOr<void> PNGWriter::add_IDAT_chunk(Gfx::Bitmap const& bitmap, Compress::Zli
|
|||
TRY(buffer.try_append(simd[2]));
|
||||
if constexpr (include_alpha)
|
||||
TRY(buffer.try_append(simd[3]));
|
||||
sum += AK::SIMD::to_i32x4(AK::SIMD::to_i8x4(simd));
|
||||
sum += AK::SIMD::simd_cast<AK::SIMD::i32x4>(AK::SIMD::simd_cast<AK::SIMD::i8x4>(simd));
|
||||
return {};
|
||||
}
|
||||
|
||||
|
@ -234,8 +234,8 @@ ErrorOr<void> PNGWriter::add_IDAT_chunk(Gfx::Bitmap const& bitmap, Compress::Zli
|
|||
TRY(up_filter.append(pixel - pixel_y_minus_1));
|
||||
|
||||
// The sum Orig(a) + Orig(b) shall be performed without overflow (using at least nine-bit arithmetic).
|
||||
auto sum = AK::SIMD::to_u16x4(pixel_x_minus_1) + AK::SIMD::to_u16x4(pixel_y_minus_1);
|
||||
auto average = AK::SIMD::to_u8x4(sum / 2);
|
||||
auto sum = AK::SIMD::simd_cast<AK::SIMD::u16x4>(pixel_x_minus_1) + AK::SIMD::simd_cast<AK::SIMD::u16x4>(pixel_y_minus_1);
|
||||
auto average = AK::SIMD::simd_cast<AK::SIMD::u8x4>(sum / 2);
|
||||
TRY(average_filter.append(pixel - average));
|
||||
|
||||
TRY(paeth_filter.append(pixel - PNG::paeth_predictor(pixel_x_minus_1, pixel_y_minus_1, pixel_xy_minus_1)));
|
||||
|
|
|
@ -46,9 +46,8 @@ using AK::SIMD::i32x4;
|
|||
using AK::SIMD::load4_masked;
|
||||
using AK::SIMD::maskbits;
|
||||
using AK::SIMD::maskcount;
|
||||
using AK::SIMD::simd_cast;
|
||||
using AK::SIMD::store4_masked;
|
||||
using AK::SIMD::to_f32x4;
|
||||
using AK::SIMD::to_u32x4;
|
||||
using AK::SIMD::u32x4;
|
||||
|
||||
static constexpr int subpixel_factor = 1 << SUBPIXEL_BITS;
|
||||
|
@ -84,10 +83,10 @@ static GPU::ColorType to_argb32(FloatVector4 const& color)
|
|||
ALWAYS_INLINE static u32x4 to_argb32(Vector4<f32x4> const& color)
|
||||
{
|
||||
auto clamped = color.clamped(expand4(0.0f), expand4(1.0f));
|
||||
auto r = to_u32x4(clamped.x() * 255);
|
||||
auto g = to_u32x4(clamped.y() * 255);
|
||||
auto b = to_u32x4(clamped.z() * 255);
|
||||
auto a = to_u32x4(clamped.w() * 255);
|
||||
auto r = simd_cast<u32x4>(clamped.x() * 255);
|
||||
auto g = simd_cast<u32x4>(clamped.y() * 255);
|
||||
auto b = simd_cast<u32x4>(clamped.z() * 255);
|
||||
auto a = simd_cast<u32x4>(clamped.w() * 255);
|
||||
|
||||
return a << 24 | r << 16 | g << 8 | b;
|
||||
}
|
||||
|
@ -96,10 +95,10 @@ static Vector4<f32x4> to_vec4(u32x4 bgra)
|
|||
{
|
||||
auto constexpr one_over_255 = expand4(1.0f / 255);
|
||||
return {
|
||||
to_f32x4((bgra >> 16) & 0xff) * one_over_255,
|
||||
to_f32x4((bgra >> 8) & 0xff) * one_over_255,
|
||||
to_f32x4(bgra & 0xff) * one_over_255,
|
||||
to_f32x4((bgra >> 24) & 0xff) * one_over_255,
|
||||
simd_cast<f32x4>((bgra >> 16) & 0xff) * one_over_255,
|
||||
simd_cast<f32x4>((bgra >> 8) & 0xff) * one_over_255,
|
||||
simd_cast<f32x4>(bgra & 0xff) * one_over_255,
|
||||
simd_cast<f32x4>((bgra >> 24) & 0xff) * one_over_255,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -779,9 +778,9 @@ void Device::rasterize_triangle(Triangle& triangle)
|
|||
quad.mask = test_point4(edge_values);
|
||||
|
||||
quad.barycentrics = {
|
||||
to_f32x4(edge_values.x()),
|
||||
to_f32x4(edge_values.y()),
|
||||
to_f32x4(edge_values.z()),
|
||||
simd_cast<f32x4>(edge_values.x()),
|
||||
simd_cast<f32x4>(edge_values.y()),
|
||||
simd_cast<f32x4>(edge_values.z()),
|
||||
};
|
||||
},
|
||||
[&](auto& quad) {
|
||||
|
|
|
@ -120,7 +120,7 @@ ALWAYS_INLINE static AK::SIMD::f32x4 log2_approximate(AK::SIMD::f32x4 v)
|
|||
} u { v };
|
||||
|
||||
// Extract just the exponent minus 1, giving a lower integral bound for log2.
|
||||
auto log = AK::SIMD::to_f32x4(((u.int_val >> 23) & 255) - 128);
|
||||
auto log = AK::SIMD::simd_cast<AK::SIMD::f32x4>(((u.int_val >> 23) & 255) - 128);
|
||||
|
||||
// Replace the exponent with 0, giving a value between 1 and 2.
|
||||
u.int_val &= ~(255 << 23);
|
||||
|
@ -134,8 +134,8 @@ ALWAYS_INLINE static AK::SIMD::f32x4 log2_approximate(AK::SIMD::f32x4 v)
|
|||
ALWAYS_INLINE static Vector2<AK::SIMD::f32x4> to_vec2_f32x4(Vector2<AK::SIMD::i32x4> const& v)
|
||||
{
|
||||
return {
|
||||
AK::SIMD::to_f32x4(v.x()),
|
||||
AK::SIMD::to_f32x4(v.y()),
|
||||
AK::SIMD::simd_cast<AK::SIMD::f32x4>(v.x()),
|
||||
AK::SIMD::simd_cast<AK::SIMD::f32x4>(v.y()),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -23,9 +23,7 @@ using AK::SIMD::expand4;
|
|||
using AK::SIMD::floor_int_range;
|
||||
using AK::SIMD::frac_int_range;
|
||||
using AK::SIMD::maskbits;
|
||||
using AK::SIMD::to_f32x4;
|
||||
using AK::SIMD::to_i32x4;
|
||||
using AK::SIMD::to_u32x4;
|
||||
using AK::SIMD::simd_cast;
|
||||
|
||||
static f32x4 wrap_repeat(f32x4 value)
|
||||
{
|
||||
|
@ -47,7 +45,7 @@ static f32x4 wrap_mirrored_repeat(f32x4 value, f32x4 num_texels)
|
|||
{
|
||||
f32x4 integer = floor_int_range(value);
|
||||
f32x4 frac = value - integer;
|
||||
auto is_odd = to_i32x4(integer) & 1;
|
||||
auto is_odd = simd_cast<i32x4>(integer) & 1;
|
||||
return wrap_clamp_to_edge(is_odd ? 1 - frac : frac, num_texels);
|
||||
}
|
||||
|
||||
|
@ -141,12 +139,12 @@ Vector4<AK::SIMD::f32x4> Sampler::sample_2d(Vector2<AK::SIMD::f32x4> const& uv)
|
|||
auto lambda_xy = log2_approximate(scale_factor) * .5f + texture_lod_bias;
|
||||
auto level = clamp(lambda_xy, min_level, max_level);
|
||||
|
||||
auto lower_level_texel = sample_2d_lod(uv, to_u32x4(level), m_config.texture_min_filter);
|
||||
auto lower_level_texel = sample_2d_lod(uv, simd_cast<u32x4>(level), m_config.texture_min_filter);
|
||||
|
||||
if (m_config.mipmap_filter == GPU::MipMapFilter::Nearest)
|
||||
return lower_level_texel;
|
||||
|
||||
auto higher_level_texel = sample_2d_lod(uv, to_u32x4(min(level + 1.f, max_level)), m_config.texture_min_filter);
|
||||
auto higher_level_texel = sample_2d_lod(uv, simd_cast<u32x4>(min(level + 1.f, max_level)), m_config.texture_min_filter);
|
||||
|
||||
return mix(lower_level_texel, higher_level_texel, frac_int_range(level));
|
||||
}
|
||||
|
@ -168,8 +166,8 @@ Vector4<AK::SIMD::f32x4> Sampler::sample_2d_lod(Vector2<AK::SIMD::f32x4> const&
|
|||
image.height_at_level(level[3]),
|
||||
};
|
||||
|
||||
auto f_width = to_f32x4(width);
|
||||
auto f_height = to_f32x4(height);
|
||||
auto f_width = simd_cast<f32x4>(width);
|
||||
auto f_height = simd_cast<f32x4>(height);
|
||||
|
||||
u32x4 width_mask = width - 1;
|
||||
u32x4 height_mask = height - 1;
|
||||
|
@ -178,8 +176,8 @@ Vector4<AK::SIMD::f32x4> Sampler::sample_2d_lod(Vector2<AK::SIMD::f32x4> const&
|
|||
f32x4 v = wrap(uv.y(), m_config.texture_wrap_v, f_height) * f_height;
|
||||
|
||||
if (filter == GPU::TextureFilter::Nearest) {
|
||||
u32x4 i = to_u32x4(u);
|
||||
u32x4 j = to_u32x4(v);
|
||||
u32x4 i = simd_cast<u32x4>(u);
|
||||
u32x4 j = simd_cast<u32x4>(v);
|
||||
|
||||
i = image.width_is_power_of_two() ? i & width_mask : i % width;
|
||||
j = image.height_is_power_of_two() ? j & height_mask : j % height;
|
||||
|
@ -193,9 +191,9 @@ Vector4<AK::SIMD::f32x4> Sampler::sample_2d_lod(Vector2<AK::SIMD::f32x4> const&
|
|||
f32x4 const floored_u = floor_int_range(u);
|
||||
f32x4 const floored_v = floor_int_range(v);
|
||||
|
||||
u32x4 i0 = to_u32x4(floored_u);
|
||||
u32x4 i0 = simd_cast<u32x4>(floored_u);
|
||||
u32x4 i1 = i0 + 1;
|
||||
u32x4 j0 = to_u32x4(floored_v);
|
||||
u32x4 j0 = simd_cast<u32x4>(floored_v);
|
||||
u32x4 j1 = j0 + 1;
|
||||
|
||||
if (m_config.texture_wrap_u == GPU::TextureWrapMode::Repeat) {
|
||||
|
|
Loading…
Reference in a new issue