From b8498dc55e1c62d2ad34816f4b897454da4b23f2 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Sat, 16 May 2020 19:47:49 +0200 Subject: [PATCH] LibVT: Add incremental UTF-8 parsing to the terminal input handler Instead of relying on the GUI code to handle UTF-8, we now process and parse the incoming data into 32-bit codepoints ourselves. This means that you can now show emojis in the terminal and they will only take up one character cell each. :^) --- Libraries/LibVT/Terminal.cpp | 74 ++++++++++++++++++++++++++++++------ Libraries/LibVT/Terminal.h | 10 ++++- 2 files changed, 70 insertions(+), 14 deletions(-) diff --git a/Libraries/LibVT/Terminal.cpp b/Libraries/LibVT/Terminal.cpp index 3e495c84168..625f56827a3 100644 --- a/Libraries/LibVT/Terminal.cpp +++ b/Libraries/LibVT/Terminal.cpp @@ -809,6 +809,23 @@ void Terminal::on_input(u8 ch) #ifdef TERMINAL_DEBUG dbgprintf("Terminal::on_char: %b (%c), fg=%u, bg=%u\n", ch, ch, m_current_attribute.foreground_color, m_current_attribute.background_color); #endif + + auto fail_utf8_parse = [this] { + m_parser_state = Normal; + on_codepoint('%'); + }; + + auto advance_utf8_parse = [this, ch] { + m_parser_codepoint <<= 6; + m_parser_codepoint |= ch & 0x3f; + if (m_parser_state == UTF8Needs1Byte) { + on_codepoint(m_parser_codepoint); + m_parser_state = Normal; + } else { + m_parser_state = (ParserState)(m_parser_state + 1); + } + }; + switch (m_parser_state) { case GotEscape: if (ch == '[') { @@ -888,8 +905,36 @@ void Terminal::on_input(u8 ch) m_parser_state = Normal; m_swallow_current = false; return; + case UTF8Needs1Byte: + case UTF8Needs2Bytes: + case UTF8Needs3Bytes: + if ((ch & 0xc0) != 0x80) { + fail_utf8_parse(); + } else { + advance_utf8_parse(); + } + return; + case Normal: - break; + if (!(ch & 0x80)) + break; + if ((ch & 0xe0) == 0xc0) { + m_parser_state = UTF8Needs1Byte; + m_parser_codepoint = ch & 0x1f; + return; + } + if ((ch & 0xf0) == 0xe0) { + m_parser_state = UTF8Needs2Bytes; + m_parser_codepoint = ch & 0x0f; + return; + } + if ((ch & 0xf8) == 0xf0) { + m_parser_state = UTF8Needs3Bytes; + m_parser_codepoint = ch & 0x07; + return; + } + fail_utf8_parse(); + return; } switch (ch) { @@ -925,21 +970,26 @@ void Terminal::on_input(u8 ch) return; } + on_codepoint(ch); +} + +void Terminal::on_codepoint(u32 codepoint) +{ auto new_column = m_cursor_column + 1; if (new_column < columns()) { - put_character_at(m_cursor_row, m_cursor_column, ch); + put_character_at(m_cursor_row, m_cursor_column, codepoint); set_cursor(m_cursor_row, new_column); + return; + } + if (m_stomp) { + m_stomp = false; + newline(); + put_character_at(m_cursor_row, m_cursor_column, codepoint); + set_cursor(m_cursor_row, 1); } else { - if (m_stomp) { - m_stomp = false; - newline(); - put_character_at(m_cursor_row, m_cursor_column, ch); - set_cursor(m_cursor_row, 1); - } else { - // Curious: We wait once on the right-hand side - m_stomp = true; - put_character_at(m_cursor_row, m_cursor_column, ch); - } + // Curious: We wait once on the right-hand side + m_stomp = true; + put_character_at(m_cursor_row, m_cursor_column, codepoint); } } diff --git a/Libraries/LibVT/Terminal.h b/Libraries/LibVT/Terminal.h index 0ac38ec5a0a..d4061856dfc 100644 --- a/Libraries/LibVT/Terminal.h +++ b/Libraries/LibVT/Terminal.h @@ -101,6 +101,8 @@ public: private: typedef Vector ParamVector; + void on_codepoint(u32); + void scroll_up(); void scroll_down(); void newline(); @@ -171,7 +173,7 @@ private: void execute_xterm_command(); void execute_hashtag(u8); - enum EscapeState { + enum ParserState { Normal, GotEscape, ExpectParameter, @@ -180,9 +182,13 @@ private: ExpectHashtagDigit, ExpectXtermParameter, ExpectStringTerminator, + UTF8Needs3Bytes, + UTF8Needs2Bytes, + UTF8Needs1Byte, }; - EscapeState m_parser_state { Normal }; + ParserState m_parser_state { Normal }; + u32 m_parser_codepoint { 0 }; Vector m_parameters; Vector m_intermediates; Vector m_xterm_parameters;