AK: Add a method to compute UTF-16 length from a UTF-8 string

(cherry picked from commit 7a17c654d293c4afaf3086dc94e8cd4bceac48b1;
amended to resolve minor conflict in TestUtf16.cpp due to us not
(yet?) having `TEST_CASE(null_view)`. Also amended to make the new
method not call simdutf -- it's now also inefficient, but at least
the inefficient code is now only in one place instead of in several)
This commit is contained in:
Timothy Flynn 2024-07-30 06:19:56 -04:00 committed by Nico Weber
parent 14e2f8c065
commit 59040d9bb8
3 changed files with 17 additions and 0 deletions

View file

@ -63,6 +63,13 @@ ErrorOr<void> code_point_to_utf16(Utf16Data& string, u32 code_point)
return {};
}
size_t utf16_code_unit_length_from_utf8(StringView string)
{
// FIXME: This is inefficient!
auto utf16_data = MUST(AK::utf8_to_utf16(string));
return Utf16View { utf16_data }.length_in_code_units();
}
bool Utf16View::is_high_surrogate(u16 code_unit)
{
return (code_unit >= high_surrogate_min) && (code_unit <= high_surrogate_max);

View file

@ -25,6 +25,8 @@ ErrorOr<Utf16Data> utf8_to_utf16(Utf8View const&);
ErrorOr<Utf16Data> utf32_to_utf16(Utf32View const&);
ErrorOr<void> code_point_to_utf16(Utf16Data&, u32);
size_t utf16_code_unit_length_from_utf8(StringView);
class Utf16View;
class Utf16CodePointIterator {

View file

@ -89,6 +89,14 @@ TEST_CASE(decode_utf16)
EXPECT_EQ(i, expected.size());
}
TEST_CASE(utf16_code_unit_length_from_utf8)
{
EXPECT_EQ(AK::utf16_code_unit_length_from_utf8(""sv), 0uz);
EXPECT_EQ(AK::utf16_code_unit_length_from_utf8("abc"sv), 3uz);
EXPECT_EQ(AK::utf16_code_unit_length_from_utf8("😀"sv), 2uz);
EXPECT_EQ(AK::utf16_code_unit_length_from_utf8("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv), 39uz);
}
TEST_CASE(utf16_literal)
{
{