mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-22 17:31:58 -05:00
LibWeb: Add rules for distinguishing if a resource is text or binary
Resolves a FIXME in MimeSniff::Resource allowing us to determine the computed MIME type given supplied types that are used in older versions of Apache that need special handling.
This commit is contained in:
parent
0b7148e2a6
commit
5d14691149
3 changed files with 102 additions and 13 deletions
|
@ -31,11 +31,13 @@ TEST_CASE(determine_computed_mime_type_given_no_sniff_is_unset)
|
|||
auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = supplied_type }));
|
||||
|
||||
EXPECT_EQ("application/octet-stream"sv, MUST(computed_mime_type.serialized()));
|
||||
}
|
||||
|
||||
// Make sure we cover the XML code path in the mime type sniffing algorithm.
|
||||
TEST_CASE(determine_computed_mime_type_given_xml_mime_type_as_supplied_type)
|
||||
{
|
||||
auto xml_mime_type = "application/rss+xml"sv;
|
||||
supplied_type = MUST(Web::MimeSniff::MimeType::parse(xml_mime_type)).release_value();
|
||||
computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = supplied_type }));
|
||||
auto supplied_type = MUST(Web::MimeSniff::MimeType::parse(xml_mime_type)).release_value();
|
||||
auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = supplied_type }));
|
||||
|
||||
EXPECT_EQ(xml_mime_type, MUST(computed_mime_type.serialized()));
|
||||
}
|
||||
|
@ -60,6 +62,53 @@ static void set_audio_or_video_type_mappings(HashMap<StringView, Vector<StringVi
|
|||
mime_type_to_headers_map.set("audio/wave"sv, { "RIFF\x00\x00\x00\x00WAVE"sv });
|
||||
}
|
||||
|
||||
static void set_text_plain_type_mappings(HashMap<StringView, Vector<StringView>>& mime_type_to_headers_map)
|
||||
{
|
||||
mime_type_to_headers_map.set("text/plain"sv, {
|
||||
"\xFE\xFF\x00\x00"sv,
|
||||
"\xFF\xFE\x00\x00"sv,
|
||||
"\xEF\xBB\xBF\x00"sv,
|
||||
"Hello world!"sv,
|
||||
});
|
||||
}
|
||||
|
||||
TEST_CASE(determine_computed_mime_type_given_supplied_type_that_is_an_apache_bug_mime_type)
|
||||
{
|
||||
Vector<StringView> apache_bug_mime_types = {
|
||||
"text/plain"sv,
|
||||
"text/plain; charset=ISO-8859-1"sv,
|
||||
"text/plain; charset=iso-8859-1"sv,
|
||||
"text/plain; charset=UTF-8"sv
|
||||
};
|
||||
|
||||
// Cover all Apache bug MIME types.
|
||||
for (auto const& apache_bug_mime_type : apache_bug_mime_types) {
|
||||
auto supplied_type = MUST(Web::MimeSniff::MimeType::parse(apache_bug_mime_type)).release_value();
|
||||
auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("Hello world!"sv.bytes(),
|
||||
Web::MimeSniff::SniffingConfiguration { .scheme = "http"sv, .supplied_type = supplied_type }));
|
||||
|
||||
EXPECT_EQ("text/plain"sv, MUST(computed_mime_type.serialized()));
|
||||
}
|
||||
|
||||
// Cover all code paths in "rules for distinguishing if a resource is text or binary".
|
||||
HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
|
||||
mime_type_to_headers_map.set("application/octet-stream"sv, { "\x00"sv });
|
||||
|
||||
set_text_plain_type_mappings(mime_type_to_headers_map);
|
||||
|
||||
auto supplied_type = MUST(Web::MimeSniff::MimeType::create("text"_string, "plain"_string));
|
||||
for (auto const& mime_type_to_headers : mime_type_to_headers_map) {
|
||||
auto mime_type = mime_type_to_headers.key;
|
||||
|
||||
for (auto const& header : mime_type_to_headers.value) {
|
||||
auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes(),
|
||||
Web::MimeSniff::SniffingConfiguration { .scheme = "http"sv, .supplied_type = supplied_type }));
|
||||
|
||||
EXPECT_EQ(mime_type, MUST(computed_mime_type.serialized()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(determine_computed_mime_type_in_both_none_and_browsing_sniffing_context)
|
||||
{
|
||||
HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
|
||||
|
@ -87,13 +136,8 @@ TEST_CASE(determine_computed_mime_type_in_both_none_and_browsing_sniffing_contex
|
|||
mime_type_to_headers_map.set("text/xml"sv, { "<?xml"sv });
|
||||
mime_type_to_headers_map.set("application/pdf"sv, { "%PDF-"sv });
|
||||
mime_type_to_headers_map.set("application/postscript"sv, { "%!PS-Adobe-"sv });
|
||||
mime_type_to_headers_map.set("text/plain"sv, {
|
||||
"\xFE\xFF\x00\x00"sv,
|
||||
"\xFF\xFE\x00\x00"sv,
|
||||
"\xEF\xBB\xBF\x00"sv,
|
||||
"Hello world!"sv,
|
||||
});
|
||||
|
||||
set_text_plain_type_mappings(mime_type_to_headers_map);
|
||||
set_image_type_mappings(mime_type_to_headers_map);
|
||||
set_audio_or_video_type_mappings(mime_type_to_headers_map);
|
||||
|
||||
|
|
|
@ -446,11 +446,13 @@ ErrorOr<void> Resource::supplied_mime_type_detection_algorithm(StringView scheme
|
|||
// NOTE: Non-standard but this algorithm expects the caller to handle step 2.1.1.
|
||||
if (supplied_type.has_value()) {
|
||||
if (Fetch::Infrastructure::is_http_or_https_scheme(scheme)) {
|
||||
// NOTE: The spec expects a space between the semicolon and the start of the charset parameter. However, we will lose this
|
||||
// space because MimeType::parse() ignores any spaces found there.
|
||||
static Array<StringView, 4> constexpr apache_bug_mime_types = {
|
||||
"text/plain"sv,
|
||||
"text/plain; charset=ISO-8859-1"sv,
|
||||
"text/plain; charset=iso-8859-1"sv,
|
||||
"text/plain; charset=UTF-8"sv
|
||||
"text/plain;charset=ISO-8859-1"sv,
|
||||
"text/plain;charset=iso-8859-1"sv,
|
||||
"text/plain;charset=UTF-8"sv
|
||||
};
|
||||
|
||||
auto serialized_supplied_type = TRY(supplied_type->serialized());
|
||||
|
@ -517,7 +519,7 @@ ErrorOr<void> Resource::mime_type_sniffing_algorithm()
|
|||
// 3. If the check-for-apache-bug flag is set, execute the rules for distinguishing
|
||||
// if a resource is text or binary and abort these steps.
|
||||
if (m_check_for_apache_bug_flag) {
|
||||
// FIXME: Execute the rules for distinguishing if a resource is text or binary and abort these steps.
|
||||
TRY(rules_for_distinguishing_if_a_resource_is_text_or_binary());
|
||||
return {};
|
||||
}
|
||||
|
||||
|
@ -564,6 +566,46 @@ ErrorOr<void> Resource::mime_type_sniffing_algorithm()
|
|||
return {};
|
||||
}
|
||||
|
||||
// https://mimesniff.spec.whatwg.org/#sniffing-a-mislabeled-binary-resource
|
||||
ErrorOr<void> Resource::rules_for_distinguishing_if_a_resource_is_text_or_binary()
|
||||
{
|
||||
// 1. Let length be the number of bytes in the resource header.
|
||||
auto length = m_resource_header.size();
|
||||
|
||||
// 2. If length is greater than or equal to 2 and the first 2 bytes of the
|
||||
// resource header are equal to 0xFE 0xFF (UTF-16BE BOM) or 0xFF 0xFE (UTF-16LE BOM), the computed MIME type is "text/plain".
|
||||
// Abort these steps.
|
||||
auto resource_header_span = m_resource_header.span();
|
||||
auto utf_16_be_bom = "\xFE\xFF"sv.bytes();
|
||||
auto utf_16_le_bom = "\xFF\xFE"sv.bytes();
|
||||
if (length >= 2
|
||||
&& (resource_header_span.starts_with(utf_16_be_bom)
|
||||
|| resource_header_span.starts_with(utf_16_le_bom))) {
|
||||
m_computed_mime_type = TRY(MimeType::create("text"_string, "plain"_string));
|
||||
return {};
|
||||
}
|
||||
|
||||
// 3. If length is greater than or equal to 3 and the first 3 bytes of the resource header are equal to 0xEF 0xBB 0xBF (UTF-8 BOM),
|
||||
// the computed MIME type is "text/plain".
|
||||
// Abort these steps.
|
||||
auto utf_8_bom = "\xEF\xBB\xBF"sv.bytes();
|
||||
if (length >= 3 && resource_header_span.starts_with(utf_8_bom)) {
|
||||
m_computed_mime_type = TRY(MimeType::create("text"_string, "plain"_string));
|
||||
return {};
|
||||
}
|
||||
|
||||
// 4. If the resource header contains no binary data bytes, the computed MIME type is "text/plain".
|
||||
// Abort these steps.
|
||||
if (!any_of(resource_header(), is_binary_data_byte)) {
|
||||
m_computed_mime_type = TRY(MimeType::create("text"_string, "plain"_string));
|
||||
return {};
|
||||
}
|
||||
|
||||
// 5. The computed MIME type is "application/octet-stream".
|
||||
// NOTE: This is the default MIME type of the computed MIME type.
|
||||
return {};
|
||||
}
|
||||
|
||||
// https://mimesniff.spec.whatwg.org/#context-specific-sniffing-algorithm
|
||||
ErrorOr<void> Resource::context_specific_sniffing_algorithm(SniffingContext sniffing_context)
|
||||
{
|
||||
|
|
|
@ -42,6 +42,9 @@ private:
|
|||
void read_the_resource_header(ReadonlyBytes data);
|
||||
ErrorOr<void> supplied_mime_type_detection_algorithm(StringView scheme, Optional<MimeType> supplied_type);
|
||||
ErrorOr<void> mime_type_sniffing_algorithm();
|
||||
|
||||
ErrorOr<void> rules_for_distinguishing_if_a_resource_is_text_or_binary();
|
||||
|
||||
ErrorOr<void> context_specific_sniffing_algorithm(SniffingContext sniffing_context);
|
||||
ErrorOr<void> rules_for_sniffing_images_specifically();
|
||||
ErrorOr<void> rules_for_sniffing_audio_or_video_specifically();
|
||||
|
|
Loading…
Reference in a new issue