mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-23 09:51:57 -05:00
LibGfx/PNGWriter: Use SIMD for PNG score calculation
Produces exactly the same output, but a bit faster. The speedup is relatively bigger for worse compression: image -o sunset_retro.png sunset_retro.bmp --png-compression-level 0 56.8 ms ± 1.5 ms -> 34.8 ms ± 0.9 ms (38.7% faster) image -o sunset_retro.png sunset_retro.bmp --png-compression-level 1 84.6 ms ± 1.7 ms -> 64.2 ms ± 4.9 ms (24.1% faster) image -o sunset_retro.png sunset_retro.bmp --png-compression-level 2 212.1 ms ± 2.5 ms -> 190.3 ms ± 1.6 ms (10.3% faster) image -o sunset_retro.png sunset_retro.bmp --png-compression-level 3 671.4 ms ± 12.3 ms -> 646.5 ms ± 4.7 ms (3.7% faster) Compression level 2 is the default, so about a 10% speedup in practice. For comparison, `sips` needs 49.9 ms ± 3.0 ms to convert sunset_retro.bmp to sunset_retro.png, and judging from the output file size, it uses something similar to our compression level 1. We used to take 1.7x as long as sips, now we take 1.29x as long.
This commit is contained in:
parent
34a4d16776
commit
781a39e613
1 changed files with 14 additions and 6 deletions
|
@ -183,12 +183,11 @@ ErrorOr<void> PNGWriter::add_IDAT_chunk(Gfx::Bitmap const& bitmap, Compress::Zli
|
|||
struct Filter {
|
||||
PNG::FilterType type;
|
||||
ByteBuffer buffer {};
|
||||
int sum = 0;
|
||||
AK::SIMD::i32x4 sum { 0, 0, 0, 0 };
|
||||
|
||||
ErrorOr<void> append(u8 byte)
|
||||
{
|
||||
TRY(buffer.try_append(byte));
|
||||
sum += static_cast<i8>(byte);
|
||||
return {};
|
||||
}
|
||||
|
||||
|
@ -199,8 +198,17 @@ ErrorOr<void> PNGWriter::add_IDAT_chunk(Gfx::Bitmap const& bitmap, Compress::Zli
|
|||
TRY(append(simd[2]));
|
||||
if constexpr (include_alpha)
|
||||
TRY(append(simd[3]));
|
||||
sum += AK::SIMD::to_i32x4(AK::SIMD::to_i8x4(simd));
|
||||
return {};
|
||||
}
|
||||
|
||||
i32 sum_of_signed_values() const
|
||||
{
|
||||
i32 result = sum[0] + sum[1] + sum[2];
|
||||
if constexpr (include_alpha)
|
||||
result += sum[3];
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
Filter none_filter { .type = PNG::FilterType::None };
|
||||
|
@ -251,13 +259,13 @@ ErrorOr<void> PNGWriter::add_IDAT_chunk(Gfx::Bitmap const& bitmap, Compress::Zli
|
|||
// compute the output scanline using all five filters, and select the filter that gives the smallest sum of absolute values of outputs.
|
||||
// (Consider the output bytes as signed differences for this test.)
|
||||
Filter& best_filter = none_filter;
|
||||
if (abs(best_filter.sum) > abs(sub_filter.sum))
|
||||
if (abs(best_filter.sum_of_signed_values()) > abs(sub_filter.sum_of_signed_values()))
|
||||
best_filter = sub_filter;
|
||||
if (abs(best_filter.sum) > abs(up_filter.sum))
|
||||
if (abs(best_filter.sum_of_signed_values()) > abs(up_filter.sum_of_signed_values()))
|
||||
best_filter = up_filter;
|
||||
if (abs(best_filter.sum) > abs(average_filter.sum))
|
||||
if (abs(best_filter.sum_of_signed_values()) > abs(average_filter.sum_of_signed_values()))
|
||||
best_filter = average_filter;
|
||||
if (abs(best_filter.sum) > abs(paeth_filter.sum))
|
||||
if (abs(best_filter.sum_of_signed_values()) > abs(paeth_filter.sum_of_signed_values()))
|
||||
best_filter = paeth_filter;
|
||||
|
||||
TRY(uncompressed_block_data.try_append(to_underlying(best_filter.type)));
|
||||
|
|
Loading…
Reference in a new issue