LibCpp: Store the text of a token as a StringView member

2025-01-26 19:32:06 -05:00 · 2021-03-12 13:11:41 +02:00 · 2021-03-12 13:11:41 +02:00 · 26d9485562
commit 26d9485562
parent 5cd1c69b96
3 changed files with 25 additions and 23 deletions
--- a/Userland/Libraries/LibCpp/Lexer.cpp
+++ b/Userland/Libraries/LibCpp/Lexer.cpp
@ -231,8 +231,8 @@ Vector<Token> Lexer::lex()
    size_t token_start_index = 0;
    Position token_start_position;

-    auto emit_token = [&](auto type) {
-        tokens.empend(type, m_position, m_position);
+    auto emit_single_char_token = [&](auto type) {
+        tokens.empend(type, m_position, m_position, m_input.substring_view(m_index, 1));
        consume();
    };

@ -241,7 +241,7 @@ Vector<Token> Lexer::lex()
        token_start_position = m_position;
    };
    auto commit_token = [&](auto type) {
-        tokens.empend(type, token_start_position, m_previous_position);
+        tokens.empend(type, token_start_position, m_previous_position, m_input.substring_view(token_start_index, m_index - token_start_index));
    };

    auto emit_token_equals = [&](auto type, auto equals_type) {
@ -252,7 +252,7 @@ Vector<Token> Lexer::lex()
            commit_token(equals_type);
            return;
        }
-        emit_token(type);
+        emit_single_char_token(type);
    };

    auto match_escape_sequence = [&]() -> size_t {
@ -335,27 +335,27 @@ Vector<Token> Lexer::lex()
            continue;
        }
        if (ch == '(') {
-            emit_token(Token::Type::LeftParen);
+            emit_single_char_token(Token::Type::LeftParen);
            continue;
        }
        if (ch == ')') {
-            emit_token(Token::Type::RightParen);
+            emit_single_char_token(Token::Type::RightParen);
            continue;
        }
        if (ch == '{') {
-            emit_token(Token::Type::LeftCurly);
+            emit_single_char_token(Token::Type::LeftCurly);
            continue;
        }
        if (ch == '}') {
-            emit_token(Token::Type::RightCurly);
+            emit_single_char_token(Token::Type::RightCurly);
            continue;
        }
        if (ch == '[') {
-            emit_token(Token::Type::LeftBracket);
+            emit_single_char_token(Token::Type::LeftBracket);
            continue;
        }
        if (ch == ']') {
-            emit_token(Token::Type::RightBracket);
+            emit_single_char_token(Token::Type::RightBracket);
            continue;
        }
        if (ch == '<') {
@ -406,7 +406,7 @@ Vector<Token> Lexer::lex()
            continue;
        }
        if (ch == ',') {
-            emit_token(Token::Type::Comma);
+            emit_single_char_token(Token::Type::Comma);
            continue;
        }
        if (ch == '+') {
@ -504,11 +504,11 @@ Vector<Token> Lexer::lex()
            continue;
        }
        if (ch == '~') {
-            emit_token(Token::Type::Tilde);
+            emit_single_char_token(Token::Type::Tilde);
            continue;
        }
        if (ch == '?') {
-            emit_token(Token::Type::QuestionMark);
+            emit_single_char_token(Token::Type::QuestionMark);
            continue;
        }
        if (ch == ':') {
@ -528,7 +528,7 @@ Vector<Token> Lexer::lex()
            continue;
        }
        if (ch == ';') {
-            emit_token(Token::Type::Semicolon);
+            emit_single_char_token(Token::Type::Semicolon);
            continue;
        }
        if (ch == '.') {
@ -778,7 +778,7 @@ Vector<Token> Lexer::lex()
            continue;
        }
        dbgln("Unimplemented token character: {}", ch);
-        emit_token(Token::Type::Unknown);
+        emit_single_char_token(Token::Type::Unknown);
    }
    return tokens;
 }
--- a/Userland/Libraries/LibCpp/Parser.cpp
+++ b/Userland/Libraries/LibCpp/Parser.cpp
@ -657,7 +657,7 @@ Token Parser::consume()
 {
    if (eof()) {
        error("C++ Parser: out of tokens");
-        return { Token::Type::EOF_TOKEN, position(), position() };
+        return { Token::Type::EOF_TOKEN, position(), position(), {} };
    }
    return m_tokens[m_state.token_index++];
 }
@ -665,7 +665,7 @@ Token Parser::consume()
 Token Parser::peek(size_t offset) const
 {
    if (m_state.token_index + offset >= m_tokens.size())
-        return { Token::Type::EOF_TOKEN, position(), position() };
+        return { Token::Type::EOF_TOKEN, position(), position(), {} };
    return m_tokens[m_state.token_index + offset];
 }

@ -699,9 +699,7 @@ bool Parser::done()

 StringView Parser::text_of_token(const Cpp::Token& token) const
 {
-    VERIFY(token.start().line == token.end().line);
-    VERIFY(token.start().column <= token.end().column);
-    return m_lines[token.start().line].substring_view(token.start().column, token.end().column - token.start().column + 1);
+     return token.text();
 }

 StringView Parser::text_of_node(const ASTNode& node) const
--- a/Userland/Libraries/LibCpp/Token.h
+++ b/Userland/Libraries/LibCpp/Token.h
@ -26,6 +26,7 @@

 #pragma once

+#include <AK/StringView.h>
 #include <AK/Types.h>

 namespace Cpp {
@ -114,10 +115,11 @@ struct Token {
 #undef __TOKEN
    };

-    Token(Type type, const Position& start, const Position& end)
+    Token(Type type, const Position& start, const Position& end, const StringView& text)
        : m_type(type)
        , m_start(start)
        , m_end(end)
+        , m_text(text)
    {
    }

@ -140,14 +142,16 @@ struct Token {
    const Position& start() const { return m_start; }
    const Position& end() const { return m_end; }

-    void set_start(const Position& other) {m_start = other;}
-    void set_end(const Position& other) {m_end = other;}
+    void set_start(const Position& other) { m_start = other; }
+    void set_end(const Position& other) { m_end = other; }
    Type type() const { return m_type; }
+    const StringView& text() const { return m_text; }

 private:
    Type m_type { Type::Unknown };
    Position m_start;
    Position m_end;
+    StringView m_text;
 };

 }