AK: Cache all the line positions in LineTrackingLexer

Also updates a LibWeb text test that used to report the wrong line
number.

(cherry picked from commit 02b50d463b174e5d525c7ab8ce8dd173d550de28;
amended to exclude LineTrackingLexer from KERNEL, since that now use
make<>)
This commit is contained in:
Ali Mohammad Pur 2024-10-10 09:58:31 +02:00 committed by Nico Weber
parent 2502b5713d
commit bc9e03ea38
4 changed files with 48 additions and 31 deletions

View file

@ -176,30 +176,37 @@ ErrorOr<T> GenericLexer::consume_decimal_integer()
}
}
#if !defined(KERNEL)
LineTrackingLexer::Position LineTrackingLexer::position_for(size_t index) const
{
auto& [cached_index, cached_line, cached_column] = m_cached_position;
if (cached_index <= index) {
for (size_t i = cached_index; i < index; ++i) {
if (m_input[i] == '\n')
++cached_line, cached_column = 0;
else
++cached_column;
}
} else {
auto lines_backtracked = m_input.substring_view(index, cached_index - index).count('\n');
cached_line -= lines_backtracked;
if (lines_backtracked == 0) {
cached_column -= cached_index - index;
} else {
auto current_line_start = m_input.substring_view(0, index).find_last('\n').value_or(0);
cached_column = index - current_line_start;
// Sad case: we have no idea where the nearest newline is, so we have to
// scan ahead a bit.
while (index > m_largest_known_line_start_position) {
auto next_newline = m_input.find('\n', m_largest_known_line_start_position);
if (!next_newline.has_value()) {
// No more newlines, add the end of the input as a line start to avoid searching again.
m_line_start_positions->insert(m_input.length(), m_line_start_positions->size());
m_largest_known_line_start_position = m_input.length();
break;
}
m_line_start_positions->insert(next_newline.value() + 1, m_line_start_positions->size());
m_largest_known_line_start_position = next_newline.value() + 1;
}
cached_index = index;
return m_cached_position;
// We should always have at least the first line start position.
auto previous_line_it = m_line_start_positions->find_largest_not_above_iterator(index);
auto previous_line_index = previous_line_it.key();
auto line = *previous_line_it;
auto column = index - previous_line_index;
if (line == 0) {
// First line, take into account the start position.
column += m_first_line_start_position.column;
}
line += m_first_line_start_position.line;
return { index, line, column };
}
#endif
template ErrorOr<u8> GenericLexer::consume_decimal_integer<u8>();
template ErrorOr<i8> GenericLexer::consume_decimal_integer<i8>();

View file

@ -6,6 +6,8 @@
#pragma once
#include <AK/NonnullOwnPtr.h>
#include <AK/RedBlackTree.h>
#include <AK/Result.h>
#include <AK/String.h>
#include <AK/StringView.h>
@ -233,10 +235,9 @@ protected:
size_t m_index { 0 };
};
#if !defined(KERNEL)
class LineTrackingLexer : public GenericLexer {
public:
using GenericLexer::GenericLexer;
struct Position {
size_t offset { 0 };
size_t line { 0 };
@ -245,21 +246,29 @@ public:
LineTrackingLexer(StringView input, Position start_position)
: GenericLexer(input)
, m_cached_position {
.line = start_position.line,
.column = start_position.column,
}
, m_first_line_start_position(start_position)
, m_line_start_positions(make<RedBlackTree<size_t, size_t>>())
{
m_line_start_positions->insert(0, 0);
auto first_newline = input.find('\n').map([](auto x) { return x + 1; }).value_or(input.length());
m_line_start_positions->insert(first_newline, 1);
m_largest_known_line_start_position = first_newline;
}
LineTrackingLexer(StringView input)
: LineTrackingLexer(input, { 0, 1, 1 })
{
}
Position cached_position() const { return m_cached_position; }
void restore_cached_offset(Position cached_position) { m_cached_position = cached_position; }
Position position_for(size_t) const;
Position current_position() const { return position_for(m_index); }
protected:
mutable Position m_cached_position;
Position m_first_line_start_position;
mutable NonnullOwnPtr<RedBlackTree<size_t, size_t>> m_line_start_positions; // offset -> line index
mutable size_t m_largest_known_line_start_position { 0 };
};
#endif
constexpr auto is_any_of(StringView values)
{
@ -281,5 +290,7 @@ using AK::GenericLexer;
using AK::is_any_of;
using AK::is_path_separator;
using AK::is_quote;
# if !defined(KERNEL)
using AK::LineTrackingLexer;
# endif
#endif

View file

@ -1,3 +1,3 @@
Got load event
[object HTMLDocument]
Failed to parse XML document: Expected '>' at line: 1, col: 20 (offset 59)
Failed to parse XML document: Expected '>' at line: 2, col: 20 (offset 59)

View file

@ -147,9 +147,8 @@ private:
[[nodiscard]] auto rollback_point(SourceLocation location = SourceLocation::current())
{
return ArmedScopeGuard {
[this, position = m_lexer.tell(), cached_position = m_lexer.cached_position(), location] {
[this, position = m_lexer.tell(), location] {
m_lexer.retreat(m_lexer.tell() - position);
m_lexer.restore_cached_offset(cached_position);
(void)location;
dbgln_if(XML_PARSER_DEBUG, "{:->{}}FAIL @ {} -- \x1b[31m{}\x1b[0m", " ", s_debug_indent_level * 2, location, m_lexer.remaining().substring_view(0, min(16, m_lexer.tell_remaining())).replace("\n"sv, "\\n"sv, ReplaceMode::All));
}