ladybird/Userland/Libraries/LibWebView/SourceHighlighter.cpp
Timothy Flynn 1e1c3e5a34 LibWebView: Create a method to syntax-highlight page source with HTML
This tokenizes a page's source to produce HTML with syntax highlighting.
The first implementation here is rather simple; we do not yet implement
code folding, line numbers, etc.

The goal is for this to be used as the View Source implementation for
all Ladybird chromes.
2023-08-29 08:11:11 -04:00

138 lines
3.8 KiB
C++

/*
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/StringBuilder.h>
#include <AK/URL.h>
#include <LibWeb/HTML/Parser/HTMLTokenizer.h>
#include <LibWebView/SourceHighlighter.h>
namespace WebView {
String highlight_source(URL const& url, StringView source)
{
Web::HTML::HTMLTokenizer tokenizer { source, "utf-8"sv };
StringBuilder builder;
builder.append(R"~~~(
<!DOCTYPE html>
<html>
<head>
<meta name="color-scheme" content="dark light">)~~~"sv);
builder.appendff("<title>View Source - {}</title>", url);
builder.append(R"~~~(
<style type="text/css">
html {
font-size: 10pt;
}
@media (prefers-color-scheme: dark) {
/* FIXME: We should be able to remove the HTML style when "color-scheme" is supported */
html {
background-color: rgb(30, 30, 30);
color: white;
}
.comment {
color: lightgreen;
}
.tag {
color: orangered;
}
.attribute-name {
color: orange;
}
.attribute-value {
color: deepskyblue;
}
}
@media (prefers-color-scheme: light) {
.comment {
color: green;
}
.tag {
color: red;
}
.attribute-name {
color: darkorange;
}
.attribute-value {
color: blue;
}
}
</style>
</head>
<body>
<pre>
)~~~"sv);
size_t previous_position = 0;
auto append_source = [&](auto end_position, Optional<StringView> const& class_name = {}) {
if (end_position <= previous_position)
return;
auto segment = source.substring_view(previous_position, end_position - previous_position);
if (class_name.has_value())
builder.appendff("<span class=\"{}\">"sv, *class_name);
for (auto code_point : Utf8View { segment }) {
if (code_point == '&')
builder.append("&amp;"sv);
else if (code_point == 0xA0)
builder.append("&nbsp;"sv);
else if (code_point == '<')
builder.append("&lt;"sv);
else if (code_point == '>')
builder.append("&gt;"sv);
else
builder.append_code_point(code_point);
}
if (class_name.has_value())
builder.append("</span>"sv);
previous_position = end_position;
};
for (auto token = tokenizer.next_token(); token.has_value(); token = tokenizer.next_token()) {
if (token->is_comment()) {
append_source(token->start_position().byte_offset);
append_source(token->end_position().byte_offset, "comment"sv);
} else if (token->is_start_tag() || token->is_end_tag()) {
auto tag_name_start = token->start_position().byte_offset;
append_source(tag_name_start);
append_source(tag_name_start + token->tag_name().length(), "tag"sv);
token->for_each_attribute([&](auto const& attribute) {
append_source(attribute.name_start_position.byte_offset);
append_source(attribute.name_end_position.byte_offset, "attribute-name"sv);
append_source(attribute.value_start_position.byte_offset);
append_source(attribute.value_end_position.byte_offset, "attribute-value"sv);
return IterationDecision::Continue;
});
append_source(token->end_position().byte_offset);
} else {
append_source(token->end_position().byte_offset);
}
}
builder.append(R"~~~(
</pre>
</body>
</html>
)~~~"sv);
return MUST(builder.to_string());
}
}