2020-01-18 09:38:21 +01:00
|
|
|
|
/*
|
2020-01-24 16:45:29 +03:00
|
|
|
|
* Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
|
2020-01-18 09:38:21 +01:00
|
|
|
|
*
|
2021-04-22 01:24:48 -07:00
|
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
2020-01-18 09:38:21 +01:00
|
|
|
|
*/
|
|
|
|
|
|
2021-04-24 23:53:23 -06:00
|
|
|
|
#include <LibTest/TestCase.h>
|
2019-08-28 00:57:15 +03:00
|
|
|
|
|
|
|
|
|
#include <AK/Utf8View.h>
|
|
|
|
|
|
|
|
|
|
TEST_CASE(decode_ascii)
|
|
|
|
|
{
|
|
|
|
|
Utf8View utf8 { "Hello World!11" };
|
|
|
|
|
EXPECT(utf8.validate());
|
|
|
|
|
|
|
|
|
|
u32 expected[] = { 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 49, 49 };
|
|
|
|
|
size_t expected_size = sizeof(expected) / sizeof(expected[0]);
|
|
|
|
|
|
|
|
|
|
size_t i = 0;
|
2020-08-05 16:31:20 -04:00
|
|
|
|
for (u32 code_point : utf8) {
|
2021-02-23 20:42:32 +01:00
|
|
|
|
VERIFY(i < expected_size);
|
2020-08-05 16:31:20 -04:00
|
|
|
|
EXPECT_EQ(code_point, expected[i]);
|
2019-08-28 00:57:15 +03:00
|
|
|
|
i++;
|
|
|
|
|
}
|
|
|
|
|
EXPECT_EQ(i, expected_size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_CASE(decode_utf8)
|
|
|
|
|
{
|
|
|
|
|
Utf8View utf8 { "Привет, мир! 😀 γειά σου κόσμος こんにちは世界" };
|
2020-05-18 13:45:18 +04:30
|
|
|
|
size_t valid_bytes;
|
|
|
|
|
EXPECT(utf8.validate(valid_bytes));
|
|
|
|
|
EXPECT(valid_bytes == (size_t)utf8.byte_length());
|
2019-08-28 00:57:15 +03:00
|
|
|
|
|
|
|
|
|
u32 expected[] = { 1055, 1088, 1080, 1074, 1077, 1090, 44, 32, 1084, 1080, 1088, 33, 32, 128512, 32, 947, 949, 953, 940, 32, 963, 959, 965, 32, 954, 972, 963, 956, 959, 962, 32, 12371, 12435, 12395, 12385, 12399, 19990, 30028 };
|
|
|
|
|
size_t expected_size = sizeof(expected) / sizeof(expected[0]);
|
|
|
|
|
|
|
|
|
|
size_t i = 0;
|
2020-08-05 16:31:20 -04:00
|
|
|
|
for (u32 code_point : utf8) {
|
2021-02-23 20:42:32 +01:00
|
|
|
|
VERIFY(i < expected_size);
|
2020-08-05 16:31:20 -04:00
|
|
|
|
EXPECT_EQ(code_point, expected[i]);
|
2019-08-28 00:57:15 +03:00
|
|
|
|
i++;
|
|
|
|
|
}
|
|
|
|
|
EXPECT_EQ(i, expected_size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_CASE(validate_invalid_ut8)
|
|
|
|
|
{
|
2020-05-18 13:45:18 +04:30
|
|
|
|
size_t valid_bytes;
|
2019-08-28 00:57:15 +03:00
|
|
|
|
char invalid_utf8_1[] = { 42, 35, (char)182, 9, 0 };
|
|
|
|
|
Utf8View utf8_1 { invalid_utf8_1 };
|
2020-05-18 13:45:18 +04:30
|
|
|
|
EXPECT(!utf8_1.validate(valid_bytes));
|
|
|
|
|
EXPECT(valid_bytes == 2);
|
2019-08-28 00:57:15 +03:00
|
|
|
|
|
|
|
|
|
char invalid_utf8_2[] = { 42, 35, (char)208, (char)208, 0 };
|
|
|
|
|
Utf8View utf8_2 { invalid_utf8_2 };
|
2020-05-18 13:45:18 +04:30
|
|
|
|
EXPECT(!utf8_2.validate(valid_bytes));
|
|
|
|
|
EXPECT(valid_bytes == 2);
|
2019-08-28 00:57:15 +03:00
|
|
|
|
|
|
|
|
|
char invalid_utf8_3[] = { (char)208, 0 };
|
|
|
|
|
Utf8View utf8_3 { invalid_utf8_3 };
|
2020-05-18 13:45:18 +04:30
|
|
|
|
EXPECT(!utf8_3.validate(valid_bytes));
|
|
|
|
|
EXPECT(valid_bytes == 0);
|
2019-08-28 00:57:15 +03:00
|
|
|
|
|
|
|
|
|
char invalid_utf8_4[] = { (char)208, 35, 0 };
|
|
|
|
|
Utf8View utf8_4 { invalid_utf8_4 };
|
2020-05-18 13:45:18 +04:30
|
|
|
|
EXPECT(!utf8_4.validate(valid_bytes));
|
|
|
|
|
EXPECT(valid_bytes == 0);
|
2019-08-28 00:57:15 +03:00
|
|
|
|
}
|
2021-05-24 00:29:16 +02:00
|
|
|
|
|
|
|
|
|
TEST_CASE(iterate_utf8)
|
|
|
|
|
{
|
|
|
|
|
Utf8View view("Some weird characters \u00A9\u266A\uA755");
|
2021-06-01 09:45:52 +02:00
|
|
|
|
Utf8CodePointIterator iterator = view.begin();
|
2021-05-24 00:29:16 +02:00
|
|
|
|
|
|
|
|
|
EXPECT(*iterator == 'S');
|
|
|
|
|
EXPECT(iterator.peek().has_value() && iterator.peek().value() == 'S');
|
|
|
|
|
EXPECT(iterator.peek(0).has_value() && iterator.peek(0).value() == 'S');
|
|
|
|
|
EXPECT(iterator.peek(1).has_value() && iterator.peek(1).value() == 'o');
|
|
|
|
|
EXPECT(iterator.peek(22).has_value() && iterator.peek(22).value() == 0x00A9);
|
|
|
|
|
EXPECT(iterator.peek(24).has_value() && iterator.peek(24).value() == 0xA755);
|
|
|
|
|
EXPECT(!iterator.peek(25).has_value());
|
|
|
|
|
|
|
|
|
|
++iterator;
|
|
|
|
|
|
|
|
|
|
EXPECT(*iterator == 'o');
|
|
|
|
|
EXPECT(iterator.peek(23).has_value() && iterator.peek(23).value() == 0xA755);
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < 23; ++i)
|
|
|
|
|
++iterator;
|
|
|
|
|
|
|
|
|
|
EXPECT(!iterator.done());
|
|
|
|
|
EXPECT(*iterator == 0xA755);
|
|
|
|
|
EXPECT(iterator.peek().has_value() && iterator.peek().value() == 0xA755);
|
|
|
|
|
EXPECT(!iterator.peek(1).has_value());
|
|
|
|
|
|
|
|
|
|
++iterator;
|
|
|
|
|
|
|
|
|
|
EXPECT(iterator.done());
|
|
|
|
|
EXPECT(!iterator.peek(0).has_value());
|
2021-06-01 09:45:52 +02:00
|
|
|
|
EXPECT_CRASH("Dereferencing Utf8CodePointIterator which is already done.", [&iterator] {
|
2021-05-24 00:29:16 +02:00
|
|
|
|
*iterator;
|
|
|
|
|
return Test::Crash::Failure::DidNotCrash;
|
|
|
|
|
});
|
|
|
|
|
}
|