LibWeb: Use correct URL parsing methods throughout LibWeb

There are essentially 3 URL parsing AOs defined by the spec:
1. Parse a URL
2. Encoding parse a URL
3. Encoding parse a URL and serialize the result

Further, these are replicated between the Document and the ESO.

This patch defines these methods in accordance with the spec and updates
existing users to invoke the correct method. In places where the correct
method is ambiguous, we use the encoding parser to preserve existing ad-
hoc behavior.
This commit is contained in:
Timothy Flynn 2024-12-06 16:24:08 -05:00 committed by Tim Flynn
parent 0b2fe008a3
commit fe891727dc
Notes: github-actions[bot] 2024-12-10 18:38:02 +00:00
25 changed files with 119 additions and 57 deletions

View file

@ -464,7 +464,7 @@ static inline bool matches_pseudo_class(CSS::Selector::SimpleSelector::PseudoCla
if (!matches_link_pseudo_class(element))
return false;
auto document_url = element.document().url();
URL::URL target_url = element.document().parse_url(element.attribute(HTML::AttributeNames::href).value_or({}));
URL::URL target_url = element.document().encoding_parse_url(element.attribute(HTML::AttributeNames::href).value_or({}));
if (target_url.fragment().has_value())
return document_url.equals(target_url, URL::ExcludeFragment::No);
return document_url.equals(target_url, URL::ExcludeFragment::Yes);

View file

@ -2825,7 +2825,7 @@ Optional<FontLoader&> StyleComputer::load_font_face(ParsedFontFace const& font_f
for (auto const& source : font_face.sources()) {
// FIXME: These should be loaded relative to the stylesheet URL instead of the document URL.
if (source.local_or_url.has<URL::URL>())
urls.append(m_document->parse_url(source.local_or_url.get<URL::URL>().to_string()));
urls.append(m_document->encoding_parse_url(source.local_or_url.get<URL::URL>().to_string()));
// FIXME: Handle local()
}

View file

@ -1081,7 +1081,38 @@ URL::URL Document::parse_url(StringView url) const
auto base_url = this->base_url();
// 2. Return the result of applying the URL parser to url, with baseURL.
return DOMURL::parse(url, base_url, Optional<StringView> { m_encoding });
return DOMURL::parse(url, base_url);
}
// https://html.spec.whatwg.org/multipage/urls-and-fetching.html#encoding-parsing-a-url
URL::URL Document::encoding_parse_url(StringView url) const
{
// 1. Let encoding be UTF-8.
// 2. If environment is a Document object, then set encoding to environment's character encoding.
auto encoding = encoding_or_default();
// 3. Otherwise, if environment's relevant global object is a Window object, set encoding to environment's relevant
// global object's associated Document's character encoding.
// 4. Let baseURL be environment's base URL, if environment is a Document object; otherwise environment's API base URL.
auto base_url = this->base_url();
// 5. Return the result of applying the URL parser to url, with baseURL and encoding.
return DOMURL::parse(url, base_url, encoding);
}
// https://html.spec.whatwg.org/multipage/urls-and-fetching.html#encoding-parsing-and-serializing-a-url
Optional<String> Document::encoding_parse_and_serialize_url(StringView url) const
{
// 1. Let url be the result of encoding-parsing a URL given url, relative to environment.
auto parsed_url = encoding_parse_url(url);
// 2. If url is failure, then return failure.
if (!parsed_url.is_valid())
return {};
// 3. Return the result of applying the URL serializer to url.
return parsed_url.serialize();
}
void Document::set_needs_layout()

View file

@ -157,6 +157,8 @@ public:
void set_opener_policy(HTML::OpenerPolicy policy) { m_opener_policy = move(policy); }
URL::URL parse_url(StringView) const;
URL::URL encoding_parse_url(StringView) const;
Optional<String> encoding_parse_and_serialize_url(StringView) const;
CSS::StyleComputer& style_computer() { return *m_style_computer; }
const CSS::StyleComputer& style_computer() const { return *m_style_computer; }

View file

@ -43,7 +43,7 @@ WebIDL::ExceptionOr<GC::Ref<EventSource>> EventSource::construct_impl(JS::Realm&
auto& settings = relevant_settings_object(event_source);
// 3. Let urlRecord be the result of encoding-parsing a URL given url, relative to settings.
auto url_record = settings.parse_url(url);
auto url_record = settings.encoding_parse_url(url);
// 4. If urlRecord is failure, then throw a "SyntaxError" DOMException.
if (!url_record.is_valid())

View file

@ -113,7 +113,8 @@ void HTMLBodyElement::attribute_changed(FlyString const& name, Optional<String>
if (color.has_value())
document().set_visited_link_color(color.value());
} else if (name.equals_ignoring_ascii_case("background"sv)) {
m_background_style_value = CSS::ImageStyleValue::create(document().parse_url(value.value_or(String {})));
// https://html.spec.whatwg.org/multipage/rendering.html#the-page:attr-background
m_background_style_value = CSS::ImageStyleValue::create(document().encoding_parse_url(value.value_or(String {})));
m_background_style_value->on_animate = [this] {
if (paintable()) {
paintable()->set_needs_display();

View file

@ -222,7 +222,7 @@ WebIDL::ExceptionOr<void> HTMLFormElement::submit_form(GC::Ref<HTMLElement> subm
// 14. Parse a URL given action, relative to the submitter element's node document. If this fails, return.
// 15. Let parsed action be the resulting URL record.
auto parsed_action = document().parse_url(action);
auto parsed_action = submitter->document().parse_url(action);
if (!parsed_action.is_valid()) {
dbgln("Failed to submit form: Invalid URL: {}", action);
return {};

View file

@ -486,22 +486,15 @@ void HTMLHyperlinkElementUtils::follow_the_hyperlink(Optional<String> hyperlink_
// 8. Let urlString be the result of encoding-parsing-and-serializing a URL given subject's href attribute value,
// relative to subject's node document.
auto url = hyperlink_element_utils_document().parse_url(href());
auto url_string = hyperlink_element_utils_document().encoding_parse_and_serialize_url(href());
// 9. If urlString is failure, then return.
if (!url.is_valid())
if (!url_string.has_value())
return;
auto url_string = url.to_string();
// 10. If hyperlinkSuffix is non-null, then append it to urlString.
if (hyperlink_suffix.has_value()) {
StringBuilder url_builder;
url_builder.append(url_string);
url_builder.append(*hyperlink_suffix);
url_string = MUST(url_builder.to_string());
}
if (hyperlink_suffix.has_value())
url_string = MUST(String::formatted("{}{}", *url_string, *hyperlink_suffix));
// 11. Let referrerPolicy be the current state of subject's referrerpolicy content attribute.
auto referrer_policy = ReferrerPolicy::from_string(hyperlink_element_utils_referrerpolicy().value_or({})).value_or(ReferrerPolicy::ReferrerPolicy::EmptyString);
@ -509,7 +502,7 @@ void HTMLHyperlinkElementUtils::follow_the_hyperlink(Optional<String> hyperlink_
// FIXME: 12. If subject's link types includes the noreferrer keyword, then set referrerPolicy to "no-referrer".
// 13. Navigate targetNavigable to urlString using subject's node document, with referrerPolicy set to referrerPolicy and userInvolvement set to userInvolvement.
MUST(target_navigable->navigate({ .url = url_string, .source_document = hyperlink_element_utils_document(), .referrer_policy = referrer_policy, .user_involvement = user_involvement }));
MUST(target_navigable->navigate({ .url = *url_string, .source_document = hyperlink_element_utils_document(), .referrer_policy = referrer_policy, .user_involvement = user_involvement }));
}
}

View file

@ -1319,7 +1319,7 @@ WebIDL::ExceptionOr<void> HTMLInputElement::handle_src_attribute(String const& v
// 1. Let url be the result of encoding-parsing a URL given the src attribute's value, relative to the element's
// node document.
auto url = document().parse_url(value);
auto url = document().encoding_parse_url(value);
// 2. If url is failure, then return.
if (!url.is_valid())

View file

@ -78,14 +78,14 @@ void HTMLLinkElement::inserted()
if (m_relationship & Relationship::Preload) {
// FIXME: Respect the "as" attribute.
LoadRequest request;
request.set_url(document().parse_url(get_attribute_value(HTML::AttributeNames::href)));
request.set_url(document().encoding_parse_url(get_attribute_value(HTML::AttributeNames::href)));
set_resource(ResourceLoader::the().load_resource(Resource::Type::Generic, request));
} else if (m_relationship & Relationship::DNSPrefetch) {
ResourceLoader::the().prefetch_dns(document().parse_url(get_attribute_value(HTML::AttributeNames::href)));
ResourceLoader::the().prefetch_dns(document().encoding_parse_url(get_attribute_value(HTML::AttributeNames::href)));
} else if (m_relationship & Relationship::Preconnect) {
ResourceLoader::the().preconnect(document().parse_url(get_attribute_value(HTML::AttributeNames::href)));
ResourceLoader::the().preconnect(document().encoding_parse_url(get_attribute_value(HTML::AttributeNames::href)));
} else if (m_relationship & Relationship::Icon) {
auto favicon_url = document().parse_url(href());
auto favicon_url = document().encoding_parse_url(href());
auto favicon_request = LoadRequest::create_for_url_on_page(favicon_url, &document().page());
set_resource(ResourceLoader::the().load_resource(Resource::Type::Generic, favicon_request));
}
@ -261,6 +261,8 @@ GC::Ptr<Fetch::Infrastructure::Request> HTMLLinkElement::create_link_request(HTM
// FIXME: 2. If options's destination is null, then return null.
// 3. Let url be the result of encoding-parsing a URL given options's href, relative to options's base URL.
// FIXME: Spec issue: We should be parsing this URL relative to a document or environment settings object.
// https://github.com/whatwg/html/issues/9715
auto url = options.base_url.complete_url(options.href);
// 4. If url is failure, then return null.

View file

@ -137,7 +137,7 @@ String HTMLObjectElement::data() const
if (!data.has_value())
return {};
return document().parse_url(*data).to_string();
return document().encoding_parse_url(*data).to_string();
}
GC::Ptr<Layout::Node> HTMLObjectElement::create_layout_node(CSS::StyleProperties style)
@ -401,7 +401,7 @@ void HTMLObjectElement::load_image()
{
// NOTE: This currently reloads the image instead of reusing the resource we've already downloaded.
auto data = get_attribute_value(HTML::AttributeNames::data);
auto url = document().parse_url(data);
auto url = document().encoding_parse_url(data);
m_resource_request = HTML::SharedResourceRequest::get_or_create(realm(), document().page(), url);
m_resource_request->add_callbacks(
[this] {

View file

@ -74,7 +74,8 @@ void HTMLTableCellElement::apply_presentational_hints(CSS::StyleProperties& styl
style.set_property(CSS::PropertyID::Height, parsed_value.release_nonnull());
return;
} else if (name == HTML::AttributeNames::background) {
if (auto parsed_value = document().parse_url(value); parsed_value.is_valid())
// https://html.spec.whatwg.org/multipage/rendering.html#tables-2:encoding-parsing-and-serializing-a-url
if (auto parsed_value = document().encoding_parse_url(value); parsed_value.is_valid())
style.set_property(CSS::PropertyID::BackgroundImage, CSS::ImageStyleValue::create(parsed_value));
return;
}

View file

@ -74,7 +74,8 @@ void HTMLTableElement::apply_presentational_hints(CSS::StyleProperties& style) c
return;
}
if (name == HTML::AttributeNames::background) {
if (auto parsed_value = document().parse_url(value); parsed_value.is_valid())
// https://html.spec.whatwg.org/multipage/rendering.html#tables-2:encoding-parsing-and-serializing-a-url
if (auto parsed_value = document().encoding_parse_url(value); parsed_value.is_valid())
style.set_property(CSS::PropertyID::BackgroundImage, CSS::ImageStyleValue::create(parsed_value));
return;
}

View file

@ -49,7 +49,8 @@ void HTMLTableRowElement::apply_presentational_hints(CSS::StyleProperties& style
if (color.has_value())
style.set_property(CSS::PropertyID::BackgroundColor, CSS::CSSColorValue::create_from_color(color.value()));
} else if (name == HTML::AttributeNames::background) {
if (auto parsed_value = document().parse_url(value); parsed_value.is_valid())
// https://html.spec.whatwg.org/multipage/rendering.html#tables-2:encoding-parsing-and-serializing-a-url
if (auto parsed_value = document().encoding_parse_url(value); parsed_value.is_valid())
style.set_property(CSS::PropertyID::BackgroundImage, CSS::ImageStyleValue::create(parsed_value));
} else if (name == HTML::AttributeNames::height) {
if (auto parsed_value = parse_dimension_value(value))

View file

@ -105,7 +105,7 @@ void HTMLTableSectionElement::apply_presentational_hints(CSS::StyleProperties& s
for_each_attribute([&](auto& name, auto& value) {
// https://html.spec.whatwg.org/multipage/rendering.html#tables-2:encoding-parsing-and-serializing-a-url
if (name == HTML::AttributeNames::background) {
if (auto parsed_value = document().parse_url(value); parsed_value.is_valid())
if (auto parsed_value = document().encoding_parse_url(value); parsed_value.is_valid())
style.set_property(CSS::PropertyID::BackgroundImage, CSS::ImageStyleValue::create(parsed_value));
}
// https://html.spec.whatwg.org/multipage/rendering.html#tables-2:rules-for-parsing-a-legacy-colour-value

View file

@ -108,22 +108,21 @@ WebIDL::ExceptionOr<String> Location::href() const
WebIDL::ExceptionOr<void> Location::set_href(String const& new_href)
{
auto& realm = this->realm();
auto& window = verify_cast<HTML::Window>(HTML::current_principal_global_object());
// 1. If this's relevant Document is null, then return.
auto const relevant_document = this->relevant_document();
if (!relevant_document)
return {};
// FIXME: 2. Let url be the result of encoding-parsing a URL given the given value, relative to the entry settings object.
auto href_url = window.associated_document().parse_url(new_href.to_byte_string());
// 2. Let url be the result of encoding-parsing a URL given the given value, relative to the entry settings object.
auto url = entry_settings_object().encoding_parse_url(new_href.to_byte_string());
// 3. If url is failure, then throw a "SyntaxError" DOMException.
if (!href_url.is_valid())
if (!url.is_valid())
return WebIDL::SyntaxError::create(realm, MUST(String::formatted("Invalid URL '{}'", new_href)));
// 4. Location-object navigate this to url.
TRY(navigate(href_url));
TRY(navigate(url));
return {};
}

View file

@ -11,6 +11,7 @@
#include <LibWeb/Bindings/PrincipalHostDefined.h>
#include <LibWeb/Bindings/SyntheticHostDefined.h>
#include <LibWeb/DOM/Document.h>
#include <LibWeb/DOMURL/DOMURL.h>
#include <LibWeb/Fetch/Infrastructure/FetchRecord.h>
#include <LibWeb/HTML/Scripting/Environments.h>
#include <LibWeb/HTML/Scripting/ExceptionReporter.h>
@ -201,17 +202,45 @@ void prepare_to_run_callback(JS::Realm& realm)
// https://html.spec.whatwg.org/multipage/urls-and-fetching.html#parse-a-url
URL::URL EnvironmentSettingsObject::parse_url(StringView url)
{
// 1. Let encoding be document's character encoding, if document was given, and environment settings object's API URL character encoding otherwise.
// FIXME: Pass in environment settings object's API URL character encoding.
// 2. Let baseURL be document's base URL, if document was given, and environment settings object's API base URL otherwise.
// 1. Let baseURL be environment's base URL, if environment is a Document object; otherwise environment's API base URL.
auto base_url = api_base_url();
// 3. Let urlRecord be the result of applying the URL parser to url, with baseURL and encoding.
// 4. If urlRecord is failure, then return failure.
// 5. Let urlString be the result of applying the URL serializer to urlRecord.
// 6. Return urlString as the resulting URL string and urlRecord as the resulting URL record.
return base_url.complete_url(url);
// 2. Return the result of applying the URL parser to url, with baseURL.
return DOMURL::parse(url, base_url);
}
// https://html.spec.whatwg.org/multipage/urls-and-fetching.html#encoding-parsing-a-url
URL::URL EnvironmentSettingsObject::encoding_parse_url(StringView url)
{
// 1. Let encoding be UTF-8.
auto encoding = "UTF-8"_string;
// 2. If environment is a Document object, then set encoding to environment's character encoding.
// 3. Otherwise, if environment's relevant global object is a Window object, set encoding to environment's relevant
// global object's associated Document's character encoding.
if (is<HTML::Window>(global_object()))
encoding = static_cast<HTML::Window const&>(global_object()).associated_document().encoding_or_default();
// 4. Let baseURL be environment's base URL, if environment is a Document object; otherwise environment's API base URL.
auto base_url = api_base_url();
// 5. Return the result of applying the URL parser to url, with baseURL and encoding.
return DOMURL::parse(url, base_url, encoding);
}
// https://html.spec.whatwg.org/multipage/urls-and-fetching.html#encoding-parsing-and-serializing-a-url
Optional<String> EnvironmentSettingsObject::encoding_parse_and_serialize_url(StringView url)
{
// 1. Let url be the result of encoding-parsing a URL given url, relative to environment.
auto parsed_url = encoding_parse_url(url);
// 2. If url is failure, then return failure.
if (!parsed_url.is_valid())
return {};
// 3. Return the result of applying the URL serializer to url.
return parsed_url.serialize();
}
// https://html.spec.whatwg.org/multipage/webappapis.html#clean-up-after-running-a-callback

View file

@ -88,6 +88,8 @@ public:
virtual CanUseCrossOriginIsolatedAPIs cross_origin_isolated_capability() const = 0;
URL::URL parse_url(StringView);
URL::URL encoding_parse_url(StringView);
Optional<String> encoding_parse_and_serialize_url(StringView);
JS::Realm& realm();
JS::Object& global_object();

View file

@ -197,8 +197,8 @@ WebIDL::ExceptionOr<Window::OpenedWindow> Window::window_open_steps_internal(Str
// 4. If url is not the empty string, then:
if (!url.is_empty()) {
// FIXME: 1. Set urlRecord to the result of encoding-parsing a URL given url, relative to sourceDocument.
url_record = entry_settings_object().parse_url(url);
// 1. Set urlRecord to the result of encoding-parsing a URL given url, relative to sourceDocument.
url_record = source_document.encoding_parse_url(url);
// 2. If urlRecord is failure, then throw a "SyntaxError" DOMException.
if (!url_record->is_valid())

View file

@ -60,7 +60,7 @@ WebIDL::ExceptionOr<GC::Ref<Worker>> Worker::create(String const& script_url, Wo
auto& outside_settings = current_principal_settings_object();
// 3. Parse the scriptURL argument relative to outside settings.
auto url = document.parse_url(script_url);
auto url = outside_settings.parse_url(script_url);
// 4. If this fails, throw a "SyntaxError" DOMException.
if (!url.is_valid()) {

View file

@ -103,7 +103,7 @@ WebIDL::ExceptionOr<void> WorkerGlobalScope::import_scripts(Vector<String> const
// 5. For each url of urls:
for (auto const& url : urls) {
// 1. Let urlRecord be the result of encoding-parsing a URL given url, relative to settings object.
auto url_record = settings_object.parse_url(url);
auto url_record = settings_object.encoding_parse_url(url);
// 2. If urlRecord is failure, then throw a "SyntaxError" DOMException.
if (!url_record.is_valid())

View file

@ -331,7 +331,7 @@ EventResult EventHandler::handle_mouseup(CSSPixelPoint viewport_position, CSSPix
if (GC::Ptr<HTML::HTMLAnchorElement const> link = node->enclosing_link_element()) {
GC::Ref<DOM::Document> document = *m_navigable->active_document();
auto href = link->href();
auto url = document->parse_url(href);
auto url = document->encoding_parse_url(href);
if (button == UIEvents::MouseButton::Primary && (modifiers & UIEvents::Mod_PlatformCtrl) != 0) {
m_navigable->page().client().page_did_click_link(url, link->target().to_byte_string(), modifiers);
@ -343,13 +343,13 @@ EventResult EventHandler::handle_mouseup(CSSPixelPoint viewport_position, CSSPix
} else if (button == UIEvents::MouseButton::Secondary) {
if (is<HTML::HTMLImageElement>(*node)) {
auto& image_element = verify_cast<HTML::HTMLImageElement>(*node);
auto image_url = image_element.document().parse_url(image_element.src());
auto image_url = image_element.document().encoding_parse_url(image_element.src());
m_navigable->page().client().page_did_request_image_context_menu(viewport_position, image_url, "", modifiers, image_element.immutable_bitmap()->bitmap());
} else if (is<HTML::HTMLMediaElement>(*node)) {
auto& media_element = verify_cast<HTML::HTMLMediaElement>(*node);
Page::MediaContextMenu menu {
.media_url = media_element.document().parse_url(media_element.current_src()),
.media_url = media_element.document().encoding_parse_url(media_element.current_src()),
.is_video = is<HTML::HTMLVideoElement>(*node),
.is_playing = media_element.potentially_playing(),
.is_muted = media_element.muted(),
@ -636,7 +636,7 @@ EventResult EventHandler::handle_mousemove(CSSPixelPoint viewport_position, CSSP
if (is_hovering_link) {
page.set_is_hovering_link(true);
page.client().page_did_hover_link(document.parse_url(hovered_link_element->href()));
page.client().page_did_hover_link(document.encoding_parse_url(hovered_link_element->href()));
} else if (page.is_hovering_link()) {
page.set_is_hovering_link(false);
page.client().page_did_unhover_link();

View file

@ -128,7 +128,7 @@ GC::Ptr<SVGGradientElement const> SVGGradientElement::linked_gradient(HashTable<
auto link = has_attribute(AttributeNames::href) ? get_attribute(AttributeNames::href) : get_attribute("xlink:href"_fly_string);
if (auto href = link; href.has_value() && !link->is_empty()) {
auto url = document().parse_url(*href);
auto url = document().encoding_parse_url(*href);
auto id = url.fragment();
if (!id.has_value() || id->is_empty())
return {};

View file

@ -3626,7 +3626,7 @@ JS_DEFINE_NATIVE_FUNCTION(@class_name@::@attribute.getter_callback@)
}
}
if (!has_keyword && !did_set_to_missing_value)
if (!has_keyword && !did_set_to_missing_value)
retval = "@invalid_enum_default_value@"_string;
)~~~");
@ -3782,9 +3782,9 @@ JS_DEFINE_NATIVE_FUNCTION(@class_name@::@attribute.getter_callback@)
if (!content_attribute_value.has_value())
return JS::PrimitiveString::create(vm, String {});
auto url_string = impl->document().parse_url(*content_attribute_value);
if (url_string.is_valid())
return JS::PrimitiveString::create(vm, url_string.to_string());
auto url_string = impl->document().encoding_parse_and_serialize_url(*content_attribute_value);
if (url_string.has_value())
return JS::PrimitiveString::create(vm, url_string.release_value());
)~~~");
}

View file

@ -432,7 +432,7 @@ void ConnectionFromClient::debug_request(u64 page_id, ByteString const& request,
load_html(page_id, "<h1>Failed to find &lt;link rel=&quot;match&quot; /&gt; or &lt;link rel=&quot;mismatch&quot; /&gt; in ref test page!</h1> Make sure you added it.");
} else {
auto link = maybe_link.release_value();
auto url = document->parse_url(link->get_attribute_value(Web::HTML::AttributeNames::href));
auto url = document->encoding_parse_url(link->get_attribute_value(Web::HTML::AttributeNames::href));
if (url.query().has_value() && !url.query()->is_empty()) {
load_html(page_id, "<h1>Invalid ref test link - query string must be empty</h1>");
return;