From 5bed8f405570af91102f7470d6e8cc20091876b1 Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Fri, 10 Jan 2025 04:50:34 +1300 Subject: [PATCH] LibURL+LibWeb: Make URL::basic_parse return an Optional URL::basic_parse has a subtle bug where the resulting URL is not set to valid when StateOveride is provided and the URL parser early returns a valid URL. This has not surfaced as a problem so far, as the only users of the state override API provide an already valid URL buffer and also ignore the result of basic parsing with a state override. However, this bug surfaces implementing the URL pattern spec, which as part of URL canonicalization: * Provides a dummy URL record * Basic URL parses that URL with state override * Checks the result of the URL parser to validate the URL While we could set URL validity on every early return of the URL parser during state override, it has been a long standing FIXME around the code to try and remove the awkward validity state of the URL class. So this commit makes the first stage of this change by migrating the basic parser API to return Optional, which also happens to make this subtle issue not a problem any more. --- Libraries/LibURL/Parser.cpp | 2 +- Libraries/LibURL/Parser.h | 2 +- Libraries/LibURL/URL.cpp | 14 ++++--- Libraries/LibWeb/CSS/Fetch.cpp | 4 +- Libraries/LibWeb/DOMURL/DOMURL.cpp | 30 +++++++-------- Libraries/LibWeb/HTML/Location.cpp | 2 +- Libraries/LibWeb/HTML/NavigatorBeacon.cpp | 6 +-- Tests/LibURL/TestURL.cpp | 46 +++++++++++------------ Utilities/xml.cpp | 6 +-- 9 files changed, 56 insertions(+), 56 deletions(-) diff --git a/Libraries/LibURL/Parser.cpp b/Libraries/LibURL/Parser.cpp index 1e4794e5601..21fd9bd7a98 100644 --- a/Libraries/LibURL/Parser.cpp +++ b/Libraries/LibURL/Parser.cpp @@ -708,7 +708,7 @@ String Parser::percent_encode_after_encoding(TextCodec::Encoder& encoder, String } // https://url.spec.whatwg.org/#concept-basic-url-parser -URL Parser::basic_parse(StringView raw_input, Optional base_url, URL* url, Optional state_override, Optional encoding) +Optional Parser::basic_parse(StringView raw_input, Optional base_url, URL* url, Optional state_override, Optional encoding) { dbgln_if(URL_PARSER_DEBUG, "URL::Parser::basic_parse: Parsing '{}'", raw_input); diff --git a/Libraries/LibURL/Parser.h b/Libraries/LibURL/Parser.h index 6cb98bf45d7..a0a903093c3 100644 --- a/Libraries/LibURL/Parser.h +++ b/Libraries/LibURL/Parser.h @@ -58,7 +58,7 @@ public: } // https://url.spec.whatwg.org/#concept-basic-url-parser - static URL basic_parse(StringView input, Optional base_url = {}, URL* url = nullptr, Optional state_override = {}, Optional encoding = {}); + static Optional basic_parse(StringView input, Optional base_url = {}, URL* url = nullptr, Optional state_override = {}, Optional encoding = {}); // https://url.spec.whatwg.org/#string-percent-encode-after-encoding static String percent_encode_after_encoding(TextCodec::Encoder&, StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus = false); diff --git a/Libraries/LibURL/URL.cpp b/Libraries/LibURL/URL.cpp index a3f825d170f..ca06c31464b 100644 --- a/Libraries/LibURL/URL.cpp +++ b/Libraries/LibURL/URL.cpp @@ -23,7 +23,7 @@ namespace URL { // FIXME: It could make sense to force users of URL to use URL::Parser::basic_parse() explicitly instead of using a constructor. URL::URL(StringView string) - : URL(Parser::basic_parse(string)) + : URL(Parser::basic_parse(string).value_or(URL {})) { if constexpr (URL_PARSER_DEBUG) { if (m_data->valid) @@ -38,7 +38,11 @@ URL URL::complete_url(StringView relative_url) const if (!is_valid()) return {}; - return Parser::basic_parse(relative_url, *this); + auto result = Parser::basic_parse(relative_url, *this); + if (!result.has_value()) + return {}; + + return result.release_value(); } ByteString URL::path_segment_at_index(size_t index) const @@ -367,12 +371,12 @@ Origin URL::origin() const auto path_url = Parser::basic_parse(serialize_path()); // 3. If pathURL is failure, then return a new opaque origin. - if (!path_url.is_valid()) + if (!path_url.has_value()) return Origin {}; // 4. If pathURL’s scheme is "http", "https", or "file", then return pathURL’s origin. - if (path_url.scheme().is_one_of("http"sv, "https"sv, "file"sv)) - return path_url.origin(); + if (path_url->scheme().is_one_of("http"sv, "https"sv, "file"sv)) + return path_url->origin(); // 5. Return a new opaque origin. return Origin {}; diff --git a/Libraries/LibWeb/CSS/Fetch.cpp b/Libraries/LibWeb/CSS/Fetch.cpp index 1b3857391d8..53a3fa6089a 100644 --- a/Libraries/LibWeb/CSS/Fetch.cpp +++ b/Libraries/LibWeb/CSS/Fetch.cpp @@ -24,14 +24,14 @@ void fetch_a_style_resource(String const& url_value, CSSStyleSheet const& sheet, // 3. Let parsedUrl be the result of the URL parser steps with urlValue’s url and base. If the algorithm returns an error, return. auto parsed_url = URL::Parser::basic_parse(url_value, base); - if (!parsed_url.is_valid()) + if (!parsed_url.has_value()) return; // 4. Let req be a new request whose url is parsedUrl, whose destination is destination, mode is corsMode, // origin is environmentSettings’s origin, credentials mode is "same-origin", use-url-credentials flag is set, // client is environmentSettings, and whose referrer is environmentSettings’s API base URL. auto request = Fetch::Infrastructure::Request::create(vm); - request->set_url(parsed_url); + request->set_url(parsed_url.release_value()); request->set_destination(destination); request->set_mode(cors_mode == CorsMode::Cors ? Fetch::Infrastructure::Request::Mode::CORS : Fetch::Infrastructure::Request::Mode::NoCORS); request->set_origin(environment_settings.origin()); diff --git a/Libraries/LibWeb/DOMURL/DOMURL.cpp b/Libraries/LibWeb/DOMURL/DOMURL.cpp index c230e063e76..3601f9acec9 100644 --- a/Libraries/LibWeb/DOMURL/DOMURL.cpp +++ b/Libraries/LibWeb/DOMURL/DOMURL.cpp @@ -28,9 +28,6 @@ GC::Ref DOMURL::create(JS::Realm& realm, URL::URL url, GC::Ref parse_api_url(String const& url, Optional const& base) { - // FIXME: We somewhat awkwardly have two failure states encapsulated in the return type (and convert between them in the steps), - // ideally we'd get rid of URL's valid flag - // 1. Let parsedBase be null. Optional parsed_base; @@ -40,15 +37,14 @@ static Optional parse_api_url(String const& url, Optional cons auto parsed_base_url = URL::Parser::basic_parse(*base); // 2. If parsedBase is failure, then return failure. - if (!parsed_base_url.is_valid()) + if (!parsed_base_url.has_value()) return {}; parsed_base = parsed_base_url; } // 3. Return the result of running the basic URL parser on url with parsedBase. - auto parsed = URL::Parser::basic_parse(url, parsed_base); - return parsed.is_valid() ? parsed : Optional {}; + return URL::Parser::basic_parse(url, parsed_base); } // https://url.spec.whatwg.org/#url-initialize @@ -183,17 +179,17 @@ String DOMURL::to_json() const } // https://url.spec.whatwg.org/#ref-for-dom-url-href② -WebIDL::ExceptionOr DOMURL::set_href(String const& href) +WebIDL::ExceptionOr DOMURL::set_href(String const& value) { // 1. Let parsedURL be the result of running the basic URL parser on the given value. - URL::URL parsed_url = href; + auto parsed_url = URL::Parser::basic_parse(value); // 2. If parsedURL is failure, then throw a TypeError. - if (!parsed_url.is_valid()) + if (!parsed_url.has_value()) return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Invalid URL"sv }; // 3. Set this’s URL to parsedURL. - m_url = move(parsed_url); + m_url = parsed_url.release_value(); // 4. Empty this’s query object’s list. m_query->m_list.clear(); @@ -509,17 +505,17 @@ URL::URL parse(StringView input, Optional base_url, Optionalscheme() != "blob") + return url.release_value(); // 4. Set url’s blob URL entry to the result of resolving the blob URL url, if that did not return failure, and null otherwise. - auto blob_url_entry = FileAPI::resolve_a_blob_url(url); + auto blob_url_entry = FileAPI::resolve_a_blob_url(*url); if (blob_url_entry.has_value()) { - url.set_blob_url_entry(URL::BlobURLEntry { + url->set_blob_url_entry(URL::BlobURLEntry { .type = blob_url_entry->object->type(), .byte_buffer = MUST(ByteBuffer::copy(blob_url_entry->object->raw_bytes())), .environment_origin = blob_url_entry->environment->origin(), @@ -527,7 +523,7 @@ URL::URL parse(StringView input, Optional base_url, Optional Location::set_protocol(String const& value) auto possible_failure = URL::Parser::basic_parse(value, {}, ©_url, URL::Parser::State::SchemeStart); // 5. If possibleFailure is failure, then throw a "SyntaxError" DOMException. - if (!possible_failure.is_valid()) + if (!possible_failure.has_value()) return WebIDL::SyntaxError::create(realm(), MUST(String::formatted("Failed to set protocol. '{}' is an invalid protocol", value))); // 6. if copyURL's scheme is not an HTTP(S) scheme, then terminate these steps. diff --git a/Libraries/LibWeb/HTML/NavigatorBeacon.cpp b/Libraries/LibWeb/HTML/NavigatorBeacon.cpp index e753748b78c..44442f993b5 100644 --- a/Libraries/LibWeb/HTML/NavigatorBeacon.cpp +++ b/Libraries/LibWeb/HTML/NavigatorBeacon.cpp @@ -32,9 +32,9 @@ WebIDL::ExceptionOr NavigatorBeaconMixin::send_beacon(String const& url, O // 3. Set parsedUrl to the result of the URL parser steps with url and base. If the algorithm returns an error, or if parsedUrl's scheme is not "http" or "https", throw a "TypeError" exception and terminate these steps. auto parsed_url = URL::Parser::basic_parse(url, base_url); - if (!parsed_url.is_valid()) + if (!parsed_url.has_value()) return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, MUST(String::formatted("Beacon URL {} is invalid.", url)) }; - if (parsed_url.scheme() != "http" && parsed_url.scheme() != "https") + if (parsed_url->scheme() != "http" && parsed_url->scheme() != "https") return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, MUST(String::formatted("Beacon URL {} must be either http:// or https://.", url)) }; // 4. Let headerList be an empty list. @@ -76,7 +76,7 @@ WebIDL::ExceptionOr NavigatorBeaconMixin::send_beacon(String const& url, O auto req = Fetch::Infrastructure::Request::create(vm); req->set_method(MUST(ByteBuffer::copy("POST"sv.bytes()))); // method: POST req->set_client(&relevant_settings_object); // client: this's relevant settings object - req->set_url_list({ parsed_url }); // url: parsedUrl + req->set_url_list({ parsed_url.release_value() }); // url: parsedUrl req->set_header_list(header_list); // header list: headerList req->set_origin(origin); // origin: origin req->set_keepalive(true); // keepalive: true diff --git a/Tests/LibURL/TestURL.cpp b/Tests/LibURL/TestURL.cpp index e5ea48a1065..18858d53a42 100644 --- a/Tests/LibURL/TestURL.cpp +++ b/Tests/LibURL/TestURL.cpp @@ -342,36 +342,36 @@ TEST_CASE(unicode) TEST_CASE(query_with_non_ascii) { { - URL::URL url = URL::Parser::basic_parse("http://example.com/?utf8=✓"sv); - EXPECT(url.is_valid()); - EXPECT_EQ(url.serialize_path(), "/"sv); - EXPECT_EQ(url.query(), "utf8=%E2%9C%93"); - EXPECT(!url.fragment().has_value()); + Optional url = URL::Parser::basic_parse("http://example.com/?utf8=✓"sv); + EXPECT(url.has_value()); + EXPECT_EQ(url->serialize_path(), "/"sv); + EXPECT_EQ(url->query(), "utf8=%E2%9C%93"); + EXPECT(!url->fragment().has_value()); } { - URL::URL url = URL::Parser::basic_parse("http://example.com/?shift_jis=✓"sv, {}, nullptr, {}, "shift_jis"sv); - EXPECT(url.is_valid()); - EXPECT_EQ(url.serialize_path(), "/"sv); - EXPECT_EQ(url.query(), "shift_jis=%26%2310003%3B"); - EXPECT(!url.fragment().has_value()); + Optional url = URL::Parser::basic_parse("http://example.com/?shift_jis=✓"sv, {}, nullptr, {}, "shift_jis"sv); + EXPECT(url.has_value()); + EXPECT_EQ(url->serialize_path(), "/"sv); + EXPECT_EQ(url->query(), "shift_jis=%26%2310003%3B"); + EXPECT(!url->fragment().has_value()); } } TEST_CASE(fragment_with_non_ascii) { { - URL::URL url = URL::Parser::basic_parse("http://example.com/#✓"sv); - EXPECT(url.is_valid()); - EXPECT_EQ(url.serialize_path(), "/"sv); - EXPECT(!url.query().has_value()); - EXPECT_EQ(url.fragment(), "%E2%9C%93"); + Optional url = URL::Parser::basic_parse("http://example.com/#✓"sv); + EXPECT(url.has_value()); + EXPECT_EQ(url->serialize_path(), "/"sv); + EXPECT(!url->query().has_value()); + EXPECT_EQ(url->fragment(), "%E2%9C%93"); } { - URL::URL url = URL::Parser::basic_parse("http://example.com/#✓"sv, {}, nullptr, {}, "shift_jis"sv); - EXPECT(url.is_valid()); - EXPECT_EQ(url.serialize_path(), "/"sv); - EXPECT(!url.query().has_value()); - EXPECT_EQ(url.fragment(), "%E2%9C%93"); + Optional url = URL::Parser::basic_parse("http://example.com/#✓"sv, {}, nullptr, {}, "shift_jis"sv); + EXPECT(url.has_value()); + EXPECT_EQ(url->serialize_path(), "/"sv); + EXPECT(!url->query().has_value()); + EXPECT_EQ(url->fragment(), "%E2%9C%93"); } } @@ -392,9 +392,9 @@ TEST_CASE(complete_file_url_with_base) TEST_CASE(empty_url_with_base_url) { URL::URL base_url { "https://foo.com/"sv }; - URL::URL parsed_url = URL::Parser::basic_parse(""sv, base_url); - EXPECT_EQ(parsed_url.is_valid(), true); - EXPECT(base_url.equals(parsed_url)); + Optional parsed_url = URL::Parser::basic_parse(""sv, base_url); + EXPECT_EQ(parsed_url.has_value(), true); + EXPECT(base_url.equals(*parsed_url)); } TEST_CASE(google_street_view) diff --git a/Utilities/xml.cpp b/Utilities/xml.cpp index a3b6acb3d67..8b992993f92 100644 --- a/Utilities/xml.cpp +++ b/Utilities/xml.cpp @@ -365,13 +365,13 @@ static auto parse(StringView contents) .resolve_external_resource = [&](XML::SystemID const& system_id, Optional const&) -> ErrorOr>> { auto base = URL::create_with_file_scheme(s_path); auto url = URL::Parser::basic_parse(system_id.system_literal, base); - if (!url.is_valid()) + if (!url.has_value()) return Error::from_string_literal("Invalid URL"); - if (url.scheme() != "file") + if (url->scheme() != "file") return Error::from_string_literal("NYI: Nonlocal entity"); - auto file = TRY(Core::File::open(URL::percent_decode(url.serialize_path()), Core::File::OpenMode::Read)); + auto file = TRY(Core::File::open(URL::percent_decode(url->serialize_path()), Core::File::OpenMode::Read)); return ByteString::copy(TRY(file->read_until_eof())); }, },