LibWeb: Cache attribute names in lowercase to speed up selector matching

When matching a CSS attribute selector against an HTML element, the
attribute name is case-insensitive. Before this change, that meant we
had to call equals_ignoring_ascii_case() on all the attribute names.

We now cache the attribute name lowercased on each Attr node, which
allows us to do FlyString-to-FlyString comparison (simple pointer
comparison).

This brings attribute selector matching from 6% to <1% when loading our
GitHub repo at https://github.com/SerenityOS/serenity
This commit is contained in:
Andreas Kling 2024-03-15 18:19:59 +01:00
parent a3b4c2a30f
commit 43ef3dc0ab
5 changed files with 35 additions and 8 deletions

View file

@ -9,8 +9,10 @@
#include <LibWeb/CSS/SelectorEngine.h>
#include <LibWeb/CSS/StyleProperties.h>
#include <LibWeb/CSS/ValueID.h>
#include <LibWeb/DOM/Attr.h>
#include <LibWeb/DOM/Document.h>
#include <LibWeb/DOM/Element.h>
#include <LibWeb/DOM/NamedNodeMap.h>
#include <LibWeb/DOM/Text.h>
#include <LibWeb/HTML/AttributeNames.h>
#include <LibWeb/HTML/HTMLAnchorElement.h>
@ -28,6 +30,7 @@
#include <LibWeb/HTML/HTMLSelectElement.h>
#include <LibWeb/HTML/HTMLTextAreaElement.h>
#include <LibWeb/Infra/Strings.h>
#include <LibWeb/Namespace.h>
namespace Web::SelectorEngine {
@ -131,11 +134,17 @@ static inline bool matches_attribute(CSS::Selector::SimpleSelector::Attribute co
auto const& attribute_name = attribute.qualified_name.name.name;
auto const* attr = element.namespace_uri() == Namespace::HTML ? element.attributes()->get_attribute_with_lowercase_qualified_name(attribute_name)
: element.attributes()->get_attribute(attribute_name);
if (attribute.match_type == CSS::Selector::SimpleSelector::Attribute::MatchType::HasAttribute) {
// Early way out in case of an attribute existence selector.
return element.has_attribute(attribute_name);
return attr != nullptr;
}
if (!attr)
return false;
auto const case_insensitive_match = (attribute.case_type == CSS::Selector::SimpleSelector::Attribute::CaseType::CaseInsensitiveMatch);
auto const case_sensitivity = case_insensitive_match
? CaseSensitivity::CaseInsensitive
@ -144,14 +153,14 @@ static inline bool matches_attribute(CSS::Selector::SimpleSelector::Attribute co
switch (attribute.match_type) {
case CSS::Selector::SimpleSelector::Attribute::MatchType::ExactValueMatch:
return case_insensitive_match
? Infra::is_ascii_case_insensitive_match(element.attribute(attribute_name).value_or({}), attribute.value)
: element.attribute(attribute_name) == attribute.value;
? Infra::is_ascii_case_insensitive_match(attr->value(), attribute.value)
: attr->value() == attribute.value;
case CSS::Selector::SimpleSelector::Attribute::MatchType::ContainsWord: {
if (attribute.value.is_empty()) {
// This selector is always false is match value is empty.
return false;
}
auto attribute_value = element.attribute(attribute_name).value_or({});
auto const& attribute_value = attr->value();
auto const view = attribute_value.bytes_as_string_view().split_view(' ');
auto const size = view.size();
for (size_t i = 0; i < size; ++i) {
@ -166,9 +175,9 @@ static inline bool matches_attribute(CSS::Selector::SimpleSelector::Attribute co
}
case CSS::Selector::SimpleSelector::Attribute::MatchType::ContainsString:
return !attribute.value.is_empty()
&& element.attribute(attribute_name).value_or({}).contains(attribute.value, case_sensitivity);
&& attr->value().contains(attribute.value, case_sensitivity);
case CSS::Selector::SimpleSelector::Attribute::MatchType::StartsWithSegment: {
auto const element_attr_value = element.attribute(attribute_name).value_or({});
auto const& element_attr_value = attr->value();
if (element_attr_value.is_empty()) {
// If the attribute value on element is empty, the selector is true
// if the match value is also empty and false otherwise.
@ -184,10 +193,10 @@ static inline bool matches_attribute(CSS::Selector::SimpleSelector::Attribute co
}
case CSS::Selector::SimpleSelector::Attribute::MatchType::StartsWithString:
return !attribute.value.is_empty()
&& element.attribute(attribute_name).value_or({}).bytes_as_string_view().starts_with(attribute.value, case_sensitivity);
&& attr->value().bytes_as_string_view().starts_with(attribute.value, case_sensitivity);
case CSS::Selector::SimpleSelector::Attribute::MatchType::EndsWithString:
return !attribute.value.is_empty()
&& element.attribute(attribute_name).value_or({}).bytes_as_string_view().ends_with(attribute.value, case_sensitivity);
&& attr->value().bytes_as_string_view().ends_with(attribute.value, case_sensitivity);
default:
break;
}

View file

@ -35,6 +35,7 @@ JS::NonnullGCPtr<Attr> Attr::clone(Document& document)
Attr::Attr(Document& document, QualifiedName qualified_name, String value, Element* owner_element)
: Node(document, NodeType::ATTRIBUTE_NODE)
, m_qualified_name(move(qualified_name))
, m_lowercase_name(MUST(String(m_qualified_name.as_string()).to_lowercase()))
, m_value(move(value))
, m_owner_element(owner_element)
{

View file

@ -30,6 +30,7 @@ public:
Optional<FlyString> const& prefix() const { return m_qualified_name.prefix(); }
FlyString const& local_name() const { return m_qualified_name.local_name(); }
FlyString const& name() const { return m_qualified_name.as_string(); }
FlyString const& lowercase_name() const { return m_lowercase_name; }
String const& value() const { return m_value; }
void set_value(String value);
@ -51,6 +52,7 @@ private:
virtual void visit_edges(Cell::Visitor&) override;
QualifiedName m_qualified_name;
FlyString m_lowercase_name;
String m_value;
JS::GCPtr<Element> m_owner_element;
};

View file

@ -174,6 +174,19 @@ Attr const* NamedNodeMap::get_attribute(FlyString const& qualified_name, size_t*
return nullptr;
}
Attr const* NamedNodeMap::get_attribute_with_lowercase_qualified_name(FlyString const& lowercase_qualified_name) const
{
bool compare_as_lowercase = associated_element().namespace_uri() == Namespace::HTML;
VERIFY(compare_as_lowercase);
for (auto const& attribute : m_attributes) {
if (attribute->lowercase_name() == lowercase_qualified_name)
return attribute;
}
return nullptr;
}
// https://dom.spec.whatwg.org/#concept-element-attributes-get-by-namespace
Attr* NamedNodeMap::get_attribute_ns(Optional<FlyString> const& namespace_, FlyString const& local_name, size_t* item_index)
{

View file

@ -54,6 +54,8 @@ public:
Attr const* remove_attribute(FlyString const& qualified_name);
Attr const* remove_attribute_ns(Optional<FlyString> const& namespace_, FlyString const& local_name);
Attr const* get_attribute_with_lowercase_qualified_name(FlyString const&) const;
private:
explicit NamedNodeMap(Element&);