UTF-8: Add Utf8CodepointIterator::codepoint_length_in_bytes()

This allows you to retrieve the length (in bytes) of the codepoint the
iterator is currently pointing at.
This commit is contained in:
Andreas Kling 2019-10-18 22:49:23 +02:00
parent ab9e6166e8
commit f4e6dae6fe
Notes: sideshowbarker 2024-07-19 11:38:56 +09:00
2 changed files with 12 additions and 0 deletions

View file

@ -139,6 +139,16 @@ Utf8CodepointIterator& Utf8CodepointIterator::operator++()
return *this; return *this;
} }
int Utf8CodepointIterator::codepoint_length_in_bytes() const
{
ASSERT(m_length > 0);
int codepoint_length_in_bytes;
u32 value;
bool first_byte_makes_sense = decode_first_byte(*m_ptr, codepoint_length_in_bytes, value);
ASSERT(first_byte_makes_sense);
return codepoint_length_in_bytes;
}
u32 Utf8CodepointIterator::operator*() const u32 Utf8CodepointIterator::operator*() const
{ {
ASSERT(m_length > 0); ASSERT(m_length > 0);

View file

@ -18,6 +18,8 @@ public:
Utf8CodepointIterator& operator++(); Utf8CodepointIterator& operator++();
u32 operator*() const; u32 operator*() const;
int codepoint_length_in_bytes() const;
private: private:
Utf8CodepointIterator(const unsigned char*, int); Utf8CodepointIterator(const unsigned char*, int);
const unsigned char* m_ptr { nullptr }; const unsigned char* m_ptr { nullptr };