mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-23 09:51:57 -05:00
AK: Implement Utf8CodepointIterator::peek(size_t)
This adds a peek method for Utf8CodepointIterator, which enables it to be used in some parsing cases where peeking is necessary. peek(0) is equivalent to operator*, expect that peek() does not contain any assertions and will just return an empty Optional<u32>. This also implements a test case for iterating UTF-8.
This commit is contained in:
parent
31f6ba0952
commit
14506e8f5e
3 changed files with 55 additions and 0 deletions
|
@ -240,4 +240,21 @@ u32 Utf8CodepointIterator::operator*() const
|
|||
return code_point_value_so_far;
|
||||
}
|
||||
|
||||
Optional<u32> Utf8CodepointIterator::peek(size_t offset) const
|
||||
{
|
||||
if (offset == 0) {
|
||||
if (this->done())
|
||||
return {};
|
||||
return this->operator*();
|
||||
}
|
||||
|
||||
auto new_iterator = *this;
|
||||
for (size_t index = 0; index < offset; ++index) {
|
||||
++new_iterator;
|
||||
if (new_iterator.done())
|
||||
return {};
|
||||
}
|
||||
return *new_iterator;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -25,6 +25,8 @@ public:
|
|||
bool operator!=(const Utf8CodepointIterator&) const;
|
||||
Utf8CodepointIterator& operator++();
|
||||
u32 operator*() const;
|
||||
// NOTE: This returns {} if the peek is at or past EOF.
|
||||
Optional<u32> peek(size_t offset = 0) const;
|
||||
|
||||
ssize_t operator-(const Utf8CodepointIterator& other) const
|
||||
{
|
||||
|
|
|
@ -67,3 +67,39 @@ TEST_CASE(validate_invalid_ut8)
|
|||
EXPECT(!utf8_4.validate(valid_bytes));
|
||||
EXPECT(valid_bytes == 0);
|
||||
}
|
||||
|
||||
TEST_CASE(iterate_utf8)
|
||||
{
|
||||
Utf8View view("Some weird characters \u00A9\u266A\uA755");
|
||||
Utf8CodepointIterator iterator = view.begin();
|
||||
|
||||
EXPECT(*iterator == 'S');
|
||||
EXPECT(iterator.peek().has_value() && iterator.peek().value() == 'S');
|
||||
EXPECT(iterator.peek(0).has_value() && iterator.peek(0).value() == 'S');
|
||||
EXPECT(iterator.peek(1).has_value() && iterator.peek(1).value() == 'o');
|
||||
EXPECT(iterator.peek(22).has_value() && iterator.peek(22).value() == 0x00A9);
|
||||
EXPECT(iterator.peek(24).has_value() && iterator.peek(24).value() == 0xA755);
|
||||
EXPECT(!iterator.peek(25).has_value());
|
||||
|
||||
++iterator;
|
||||
|
||||
EXPECT(*iterator == 'o');
|
||||
EXPECT(iterator.peek(23).has_value() && iterator.peek(23).value() == 0xA755);
|
||||
|
||||
for (size_t i = 0; i < 23; ++i)
|
||||
++iterator;
|
||||
|
||||
EXPECT(!iterator.done());
|
||||
EXPECT(*iterator == 0xA755);
|
||||
EXPECT(iterator.peek().has_value() && iterator.peek().value() == 0xA755);
|
||||
EXPECT(!iterator.peek(1).has_value());
|
||||
|
||||
++iterator;
|
||||
|
||||
EXPECT(iterator.done());
|
||||
EXPECT(!iterator.peek(0).has_value());
|
||||
EXPECT_CRASH("Dereferencing Utf8CodepointIterator which is already done.", [&iterator] {
|
||||
*iterator;
|
||||
return Test::Crash::Failure::DidNotCrash;
|
||||
});
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue