LibURL+LibWeb: Make URL::basic_parse return an Optional<URL>

URL::basic_parse has a subtle bug where the resulting URL is not set
to valid when StateOveride is provided and the URL parser early returns
a valid URL.

This has not surfaced as a problem so far, as the only users of the
state override API provide an already valid URL buffer and also ignore
the result of basic parsing with a state override.

However, this bug surfaces implementing the URL pattern spec, which as
part of URL canonicalization:
 * Provides a dummy URL record
 * Basic URL parses that URL with state override
 * Checks the result of the URL parser to validate the URL

While we could set URL validity on every early return of the URL parser
during state override, it has been a long standing FIXME around the code
to try and remove the awkward validity state of the URL class. So this
commit makes the first stage of this change by migrating the basic
parser API to return Optional, which also happens to make this subtle
issue not a problem any more.
This commit is contained in:
Shannon Booth 2025-01-10 04:50:34 +13:00 committed by Tim Flynn
parent b6ec055bf9
commit 5bed8f4055
Notes: github-actions[bot] 2025-01-11 15:09:28 +00:00
9 changed files with 56 additions and 56 deletions

View file

@ -708,7 +708,7 @@ String Parser::percent_encode_after_encoding(TextCodec::Encoder& encoder, String
}
// https://url.spec.whatwg.org/#concept-basic-url-parser
URL Parser::basic_parse(StringView raw_input, Optional<URL const&> base_url, URL* url, Optional<State> state_override, Optional<StringView> encoding)
Optional<URL> Parser::basic_parse(StringView raw_input, Optional<URL const&> base_url, URL* url, Optional<State> state_override, Optional<StringView> encoding)
{
dbgln_if(URL_PARSER_DEBUG, "URL::Parser::basic_parse: Parsing '{}'", raw_input);

View file

@ -58,7 +58,7 @@ public:
}
// https://url.spec.whatwg.org/#concept-basic-url-parser
static URL basic_parse(StringView input, Optional<URL const&> base_url = {}, URL* url = nullptr, Optional<State> state_override = {}, Optional<StringView> encoding = {});
static Optional<URL> basic_parse(StringView input, Optional<URL const&> base_url = {}, URL* url = nullptr, Optional<State> state_override = {}, Optional<StringView> encoding = {});
// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
static String percent_encode_after_encoding(TextCodec::Encoder&, StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus = false);

View file

@ -23,7 +23,7 @@ namespace URL {
// FIXME: It could make sense to force users of URL to use URL::Parser::basic_parse() explicitly instead of using a constructor.
URL::URL(StringView string)
: URL(Parser::basic_parse(string))
: URL(Parser::basic_parse(string).value_or(URL {}))
{
if constexpr (URL_PARSER_DEBUG) {
if (m_data->valid)
@ -38,7 +38,11 @@ URL URL::complete_url(StringView relative_url) const
if (!is_valid())
return {};
return Parser::basic_parse(relative_url, *this);
auto result = Parser::basic_parse(relative_url, *this);
if (!result.has_value())
return {};
return result.release_value();
}
ByteString URL::path_segment_at_index(size_t index) const
@ -367,12 +371,12 @@ Origin URL::origin() const
auto path_url = Parser::basic_parse(serialize_path());
// 3. If pathURL is failure, then return a new opaque origin.
if (!path_url.is_valid())
if (!path_url.has_value())
return Origin {};
// 4. If pathURLs scheme is "http", "https", or "file", then return pathURLs origin.
if (path_url.scheme().is_one_of("http"sv, "https"sv, "file"sv))
return path_url.origin();
if (path_url->scheme().is_one_of("http"sv, "https"sv, "file"sv))
return path_url->origin();
// 5. Return a new opaque origin.
return Origin {};

View file

@ -24,14 +24,14 @@ void fetch_a_style_resource(String const& url_value, CSSStyleSheet const& sheet,
// 3. Let parsedUrl be the result of the URL parser steps with urlValues url and base. If the algorithm returns an error, return.
auto parsed_url = URL::Parser::basic_parse(url_value, base);
if (!parsed_url.is_valid())
if (!parsed_url.has_value())
return;
// 4. Let req be a new request whose url is parsedUrl, whose destination is destination, mode is corsMode,
// origin is environmentSettingss origin, credentials mode is "same-origin", use-url-credentials flag is set,
// client is environmentSettings, and whose referrer is environmentSettingss API base URL.
auto request = Fetch::Infrastructure::Request::create(vm);
request->set_url(parsed_url);
request->set_url(parsed_url.release_value());
request->set_destination(destination);
request->set_mode(cors_mode == CorsMode::Cors ? Fetch::Infrastructure::Request::Mode::CORS : Fetch::Infrastructure::Request::Mode::NoCORS);
request->set_origin(environment_settings.origin());

View file

@ -28,9 +28,6 @@ GC::Ref<DOMURL> DOMURL::create(JS::Realm& realm, URL::URL url, GC::Ref<URLSearch
// https://url.spec.whatwg.org/#api-url-parser
static Optional<URL::URL> parse_api_url(String const& url, Optional<String> const& base)
{
// FIXME: We somewhat awkwardly have two failure states encapsulated in the return type (and convert between them in the steps),
// ideally we'd get rid of URL's valid flag
// 1. Let parsedBase be null.
Optional<URL::URL> parsed_base;
@ -40,15 +37,14 @@ static Optional<URL::URL> parse_api_url(String const& url, Optional<String> cons
auto parsed_base_url = URL::Parser::basic_parse(*base);
// 2. If parsedBase is failure, then return failure.
if (!parsed_base_url.is_valid())
if (!parsed_base_url.has_value())
return {};
parsed_base = parsed_base_url;
}
// 3. Return the result of running the basic URL parser on url with parsedBase.
auto parsed = URL::Parser::basic_parse(url, parsed_base);
return parsed.is_valid() ? parsed : Optional<URL::URL> {};
return URL::Parser::basic_parse(url, parsed_base);
}
// https://url.spec.whatwg.org/#url-initialize
@ -183,17 +179,17 @@ String DOMURL::to_json() const
}
// https://url.spec.whatwg.org/#ref-for-dom-url-href②
WebIDL::ExceptionOr<void> DOMURL::set_href(String const& href)
WebIDL::ExceptionOr<void> DOMURL::set_href(String const& value)
{
// 1. Let parsedURL be the result of running the basic URL parser on the given value.
URL::URL parsed_url = href;
auto parsed_url = URL::Parser::basic_parse(value);
// 2. If parsedURL is failure, then throw a TypeError.
if (!parsed_url.is_valid())
if (!parsed_url.has_value())
return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Invalid URL"sv };
// 3. Set thiss URL to parsedURL.
m_url = move(parsed_url);
m_url = parsed_url.release_value();
// 4. Empty thiss query objects list.
m_query->m_list.clear();
@ -509,17 +505,17 @@ URL::URL parse(StringView input, Optional<URL::URL const&> base_url, Optional<St
auto url = URL::Parser::basic_parse(input, base_url, {}, {}, encoding);
// 2. If url is failure, return failure.
if (!url.is_valid())
return {};
if (!url.has_value())
return {}; // FIXME: Migrate this API to return an OptionalNone on failure.
// 3. If urls scheme is not "blob", return url.
if (url.scheme() != "blob")
return url;
if (url->scheme() != "blob")
return url.release_value();
// 4. Set urls blob URL entry to the result of resolving the blob URL url, if that did not return failure, and null otherwise.
auto blob_url_entry = FileAPI::resolve_a_blob_url(url);
auto blob_url_entry = FileAPI::resolve_a_blob_url(*url);
if (blob_url_entry.has_value()) {
url.set_blob_url_entry(URL::BlobURLEntry {
url->set_blob_url_entry(URL::BlobURLEntry {
.type = blob_url_entry->object->type(),
.byte_buffer = MUST(ByteBuffer::copy(blob_url_entry->object->raw_bytes())),
.environment_origin = blob_url_entry->environment->origin(),
@ -527,7 +523,7 @@ URL::URL parse(StringView input, Optional<URL::URL const&> base_url, Optional<St
}
// 5. Return url
return url;
return url.release_value();
}
}

View file

@ -196,7 +196,7 @@ WebIDL::ExceptionOr<void> Location::set_protocol(String const& value)
auto possible_failure = URL::Parser::basic_parse(value, {}, &copy_url, URL::Parser::State::SchemeStart);
// 5. If possibleFailure is failure, then throw a "SyntaxError" DOMException.
if (!possible_failure.is_valid())
if (!possible_failure.has_value())
return WebIDL::SyntaxError::create(realm(), MUST(String::formatted("Failed to set protocol. '{}' is an invalid protocol", value)));
// 6. if copyURL's scheme is not an HTTP(S) scheme, then terminate these steps.

View file

@ -32,9 +32,9 @@ WebIDL::ExceptionOr<bool> NavigatorBeaconMixin::send_beacon(String const& url, O
// 3. Set parsedUrl to the result of the URL parser steps with url and base. If the algorithm returns an error, or if parsedUrl's scheme is not "http" or "https", throw a "TypeError" exception and terminate these steps.
auto parsed_url = URL::Parser::basic_parse(url, base_url);
if (!parsed_url.is_valid())
if (!parsed_url.has_value())
return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, MUST(String::formatted("Beacon URL {} is invalid.", url)) };
if (parsed_url.scheme() != "http" && parsed_url.scheme() != "https")
if (parsed_url->scheme() != "http" && parsed_url->scheme() != "https")
return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, MUST(String::formatted("Beacon URL {} must be either http:// or https://.", url)) };
// 4. Let headerList be an empty list.
@ -76,7 +76,7 @@ WebIDL::ExceptionOr<bool> NavigatorBeaconMixin::send_beacon(String const& url, O
auto req = Fetch::Infrastructure::Request::create(vm);
req->set_method(MUST(ByteBuffer::copy("POST"sv.bytes()))); // method: POST
req->set_client(&relevant_settings_object); // client: this's relevant settings object
req->set_url_list({ parsed_url }); // url: parsedUrl
req->set_url_list({ parsed_url.release_value() }); // url: parsedUrl
req->set_header_list(header_list); // header list: headerList
req->set_origin(origin); // origin: origin
req->set_keepalive(true); // keepalive: true

View file

@ -342,36 +342,36 @@ TEST_CASE(unicode)
TEST_CASE(query_with_non_ascii)
{
{
URL::URL url = URL::Parser::basic_parse("http://example.com/?utf8=✓"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT_EQ(url.query(), "utf8=%E2%9C%93");
EXPECT(!url.fragment().has_value());
Optional<URL::URL> url = URL::Parser::basic_parse("http://example.com/?utf8=✓"sv);
EXPECT(url.has_value());
EXPECT_EQ(url->serialize_path(), "/"sv);
EXPECT_EQ(url->query(), "utf8=%E2%9C%93");
EXPECT(!url->fragment().has_value());
}
{
URL::URL url = URL::Parser::basic_parse("http://example.com/?shift_jis=✓"sv, {}, nullptr, {}, "shift_jis"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT_EQ(url.query(), "shift_jis=%26%2310003%3B");
EXPECT(!url.fragment().has_value());
Optional<URL::URL> url = URL::Parser::basic_parse("http://example.com/?shift_jis=✓"sv, {}, nullptr, {}, "shift_jis"sv);
EXPECT(url.has_value());
EXPECT_EQ(url->serialize_path(), "/"sv);
EXPECT_EQ(url->query(), "shift_jis=%26%2310003%3B");
EXPECT(!url->fragment().has_value());
}
}
TEST_CASE(fragment_with_non_ascii)
{
{
URL::URL url = URL::Parser::basic_parse("http://example.com/#✓"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT(!url.query().has_value());
EXPECT_EQ(url.fragment(), "%E2%9C%93");
Optional<URL::URL> url = URL::Parser::basic_parse("http://example.com/#✓"sv);
EXPECT(url.has_value());
EXPECT_EQ(url->serialize_path(), "/"sv);
EXPECT(!url->query().has_value());
EXPECT_EQ(url->fragment(), "%E2%9C%93");
}
{
URL::URL url = URL::Parser::basic_parse("http://example.com/#✓"sv, {}, nullptr, {}, "shift_jis"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT(!url.query().has_value());
EXPECT_EQ(url.fragment(), "%E2%9C%93");
Optional<URL::URL> url = URL::Parser::basic_parse("http://example.com/#✓"sv, {}, nullptr, {}, "shift_jis"sv);
EXPECT(url.has_value());
EXPECT_EQ(url->serialize_path(), "/"sv);
EXPECT(!url->query().has_value());
EXPECT_EQ(url->fragment(), "%E2%9C%93");
}
}
@ -392,9 +392,9 @@ TEST_CASE(complete_file_url_with_base)
TEST_CASE(empty_url_with_base_url)
{
URL::URL base_url { "https://foo.com/"sv };
URL::URL parsed_url = URL::Parser::basic_parse(""sv, base_url);
EXPECT_EQ(parsed_url.is_valid(), true);
EXPECT(base_url.equals(parsed_url));
Optional<URL::URL> parsed_url = URL::Parser::basic_parse(""sv, base_url);
EXPECT_EQ(parsed_url.has_value(), true);
EXPECT(base_url.equals(*parsed_url));
}
TEST_CASE(google_street_view)

View file

@ -365,13 +365,13 @@ static auto parse(StringView contents)
.resolve_external_resource = [&](XML::SystemID const& system_id, Optional<XML::PublicID> const&) -> ErrorOr<Variant<ByteString, Vector<XML::MarkupDeclaration>>> {
auto base = URL::create_with_file_scheme(s_path);
auto url = URL::Parser::basic_parse(system_id.system_literal, base);
if (!url.is_valid())
if (!url.has_value())
return Error::from_string_literal("Invalid URL");
if (url.scheme() != "file")
if (url->scheme() != "file")
return Error::from_string_literal("NYI: Nonlocal entity");
auto file = TRY(Core::File::open(URL::percent_decode(url.serialize_path()), Core::File::OpenMode::Read));
auto file = TRY(Core::File::open(URL::percent_decode(url->serialize_path()), Core::File::OpenMode::Read));
return ByteString::copy(TRY(file->read_until_eof()));
},
},