serenity/AK/Tests/TestUtf8.cpp
Sergey Bugaev 5d3696174b AK: Add a Utf8View type for iterating over UTF-8 codepoints
Utf8View wraps a StringView and implements begin() and end() that
return a Utf8CodepointIterator, which parses UTF-8-encoded Unicode
codepoints and returns them as 32-bit integers.

This is the first step towards supporting emojis in Serenity ^)
https://github.com/SerenityOS/serenity/issues/490
2019-08-28 13:46:02 +02:00

58 lines
1.7 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <AK/TestSuite.h>
#include <AK/Utf8View.h>
TEST_CASE(decode_ascii)
{
Utf8View utf8 { "Hello World!11" };
EXPECT(utf8.validate());
u32 expected[] = { 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 49, 49 };
size_t expected_size = sizeof(expected) / sizeof(expected[0]);
size_t i = 0;
for (u32 codepoint : utf8) {
ASSERT(i < expected_size);
EXPECT_EQ(codepoint, expected[i]);
i++;
}
EXPECT_EQ(i, expected_size);
}
TEST_CASE(decode_utf8)
{
Utf8View utf8 { "Привет, мир! 😀 γειά σου κόσμος こんにちは世界" };
EXPECT(utf8.validate());
u32 expected[] = { 1055, 1088, 1080, 1074, 1077, 1090, 44, 32, 1084, 1080, 1088, 33, 32, 128512, 32, 947, 949, 953, 940, 32, 963, 959, 965, 32, 954, 972, 963, 956, 959, 962, 32, 12371, 12435, 12395, 12385, 12399, 19990, 30028 };
size_t expected_size = sizeof(expected) / sizeof(expected[0]);
size_t i = 0;
for (u32 codepoint : utf8) {
ASSERT(i < expected_size);
EXPECT_EQ(codepoint, expected[i]);
i++;
}
EXPECT_EQ(i, expected_size);
}
TEST_CASE(validate_invalid_ut8)
{
char invalid_utf8_1[] = { 42, 35, (char)182, 9, 0 };
Utf8View utf8_1 { invalid_utf8_1 };
EXPECT(!utf8_1.validate());
char invalid_utf8_2[] = { 42, 35, (char)208, (char)208, 0 };
Utf8View utf8_2 { invalid_utf8_2 };
EXPECT(!utf8_2.validate());
char invalid_utf8_3[] = { (char)208, 0 };
Utf8View utf8_3 { invalid_utf8_3 };
EXPECT(!utf8_3.validate());
char invalid_utf8_4[] = { (char)208, 35, 0 };
Utf8View utf8_4 { invalid_utf8_4 };
EXPECT(!utf8_4.validate());
}
TEST_MAIN(UTF8)