mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-22 09:21:57 -05:00
AK: Cache all the line positions in LineTrackingLexer
Also updates a LibWeb text test that used to report the wrong line number. (cherry picked from commit 02b50d463b174e5d525c7ab8ce8dd173d550de28; amended to exclude LineTrackingLexer from KERNEL, since that now use make<>)
This commit is contained in:
parent
2502b5713d
commit
bc9e03ea38
4 changed files with 48 additions and 31 deletions
|
@ -176,30 +176,37 @@ ErrorOr<T> GenericLexer::consume_decimal_integer()
|
|||
}
|
||||
}
|
||||
|
||||
#if !defined(KERNEL)
|
||||
LineTrackingLexer::Position LineTrackingLexer::position_for(size_t index) const
|
||||
{
|
||||
auto& [cached_index, cached_line, cached_column] = m_cached_position;
|
||||
|
||||
if (cached_index <= index) {
|
||||
for (size_t i = cached_index; i < index; ++i) {
|
||||
if (m_input[i] == '\n')
|
||||
++cached_line, cached_column = 0;
|
||||
else
|
||||
++cached_column;
|
||||
}
|
||||
} else {
|
||||
auto lines_backtracked = m_input.substring_view(index, cached_index - index).count('\n');
|
||||
cached_line -= lines_backtracked;
|
||||
if (lines_backtracked == 0) {
|
||||
cached_column -= cached_index - index;
|
||||
} else {
|
||||
auto current_line_start = m_input.substring_view(0, index).find_last('\n').value_or(0);
|
||||
cached_column = index - current_line_start;
|
||||
// Sad case: we have no idea where the nearest newline is, so we have to
|
||||
// scan ahead a bit.
|
||||
while (index > m_largest_known_line_start_position) {
|
||||
auto next_newline = m_input.find('\n', m_largest_known_line_start_position);
|
||||
if (!next_newline.has_value()) {
|
||||
// No more newlines, add the end of the input as a line start to avoid searching again.
|
||||
m_line_start_positions->insert(m_input.length(), m_line_start_positions->size());
|
||||
m_largest_known_line_start_position = m_input.length();
|
||||
break;
|
||||
}
|
||||
m_line_start_positions->insert(next_newline.value() + 1, m_line_start_positions->size());
|
||||
m_largest_known_line_start_position = next_newline.value() + 1;
|
||||
}
|
||||
cached_index = index;
|
||||
return m_cached_position;
|
||||
// We should always have at least the first line start position.
|
||||
auto previous_line_it = m_line_start_positions->find_largest_not_above_iterator(index);
|
||||
auto previous_line_index = previous_line_it.key();
|
||||
|
||||
auto line = *previous_line_it;
|
||||
auto column = index - previous_line_index;
|
||||
if (line == 0) {
|
||||
// First line, take into account the start position.
|
||||
column += m_first_line_start_position.column;
|
||||
}
|
||||
|
||||
line += m_first_line_start_position.line;
|
||||
return { index, line, column };
|
||||
}
|
||||
#endif
|
||||
|
||||
template ErrorOr<u8> GenericLexer::consume_decimal_integer<u8>();
|
||||
template ErrorOr<i8> GenericLexer::consume_decimal_integer<i8>();
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <AK/NonnullOwnPtr.h>
|
||||
#include <AK/RedBlackTree.h>
|
||||
#include <AK/Result.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/StringView.h>
|
||||
|
@ -233,10 +235,9 @@ protected:
|
|||
size_t m_index { 0 };
|
||||
};
|
||||
|
||||
#if !defined(KERNEL)
|
||||
class LineTrackingLexer : public GenericLexer {
|
||||
public:
|
||||
using GenericLexer::GenericLexer;
|
||||
|
||||
struct Position {
|
||||
size_t offset { 0 };
|
||||
size_t line { 0 };
|
||||
|
@ -245,21 +246,29 @@ public:
|
|||
|
||||
LineTrackingLexer(StringView input, Position start_position)
|
||||
: GenericLexer(input)
|
||||
, m_cached_position {
|
||||
.line = start_position.line,
|
||||
.column = start_position.column,
|
||||
}
|
||||
, m_first_line_start_position(start_position)
|
||||
, m_line_start_positions(make<RedBlackTree<size_t, size_t>>())
|
||||
{
|
||||
m_line_start_positions->insert(0, 0);
|
||||
auto first_newline = input.find('\n').map([](auto x) { return x + 1; }).value_or(input.length());
|
||||
m_line_start_positions->insert(first_newline, 1);
|
||||
m_largest_known_line_start_position = first_newline;
|
||||
}
|
||||
|
||||
LineTrackingLexer(StringView input)
|
||||
: LineTrackingLexer(input, { 0, 1, 1 })
|
||||
{
|
||||
}
|
||||
|
||||
Position cached_position() const { return m_cached_position; }
|
||||
void restore_cached_offset(Position cached_position) { m_cached_position = cached_position; }
|
||||
Position position_for(size_t) const;
|
||||
Position current_position() const { return position_for(m_index); }
|
||||
|
||||
protected:
|
||||
mutable Position m_cached_position;
|
||||
Position m_first_line_start_position;
|
||||
mutable NonnullOwnPtr<RedBlackTree<size_t, size_t>> m_line_start_positions; // offset -> line index
|
||||
mutable size_t m_largest_known_line_start_position { 0 };
|
||||
};
|
||||
#endif
|
||||
|
||||
constexpr auto is_any_of(StringView values)
|
||||
{
|
||||
|
@ -281,5 +290,7 @@ using AK::GenericLexer;
|
|||
using AK::is_any_of;
|
||||
using AK::is_path_separator;
|
||||
using AK::is_quote;
|
||||
# if !defined(KERNEL)
|
||||
using AK::LineTrackingLexer;
|
||||
# endif
|
||||
#endif
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
Got load event
|
||||
[object HTMLDocument]
|
||||
Failed to parse XML document: Expected '>' at line: 1, col: 20 (offset 59)
|
||||
Failed to parse XML document: Expected '>' at line: 2, col: 20 (offset 59)
|
||||
|
|
|
@ -147,9 +147,8 @@ private:
|
|||
[[nodiscard]] auto rollback_point(SourceLocation location = SourceLocation::current())
|
||||
{
|
||||
return ArmedScopeGuard {
|
||||
[this, position = m_lexer.tell(), cached_position = m_lexer.cached_position(), location] {
|
||||
[this, position = m_lexer.tell(), location] {
|
||||
m_lexer.retreat(m_lexer.tell() - position);
|
||||
m_lexer.restore_cached_offset(cached_position);
|
||||
(void)location;
|
||||
dbgln_if(XML_PARSER_DEBUG, "{:->{}}FAIL @ {} -- \x1b[31m{}\x1b[0m", " ", s_debug_indent_level * 2, location, m_lexer.remaining().substring_view(0, min(16, m_lexer.tell_remaining())).replace("\n"sv, "\\n"sv, ReplaceMode::All));
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue