mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-22 09:21:57 -05:00
AK+LibWeb: Add {Fly,}String::to_ascii_{upper,lower}_case()
These don't have to worry about the input not being valid UTF-8 and so can be infallible (and can even return self if no changes needed.) We use this instead of Infra::to_ascii_{upper,lower}_case in LibWeb. (cherry picked from commit 073bcfd3866852a4c4bcca2bd131bd65ae53541f)
This commit is contained in:
parent
14b6e5b89c
commit
e1ba881587
16 changed files with 147 additions and 13 deletions
|
@ -173,6 +173,54 @@ ErrorOr<void> Formatter<FlyString>::format(FormatBuilder& builder, FlyString con
|
|||
return Formatter<StringView>::format(builder, fly_string.bytes_as_string_view());
|
||||
}
|
||||
|
||||
FlyString FlyString::to_ascii_lowercase() const
|
||||
{
|
||||
bool const has_ascii_uppercase = [&] {
|
||||
for (u8 const byte : bytes()) {
|
||||
if (AK::is_ascii_upper_alpha(byte))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}();
|
||||
|
||||
if (!has_ascii_uppercase)
|
||||
return *this;
|
||||
|
||||
Vector<u8> lowercase_bytes;
|
||||
lowercase_bytes.ensure_capacity(bytes().size());
|
||||
for (u8 const byte : bytes()) {
|
||||
if (AK::is_ascii_upper_alpha(byte))
|
||||
lowercase_bytes.unchecked_append(AK::to_ascii_lowercase(byte));
|
||||
else
|
||||
lowercase_bytes.unchecked_append(byte);
|
||||
}
|
||||
return String::from_utf8_without_validation(lowercase_bytes);
|
||||
}
|
||||
|
||||
FlyString FlyString::to_ascii_uppercase() const
|
||||
{
|
||||
bool const has_ascii_lowercase = [&] {
|
||||
for (u8 const byte : bytes()) {
|
||||
if (AK::is_ascii_lower_alpha(byte))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}();
|
||||
|
||||
if (!has_ascii_lowercase)
|
||||
return *this;
|
||||
|
||||
Vector<u8> uppercase_bytes;
|
||||
uppercase_bytes.ensure_capacity(bytes().size());
|
||||
for (u8 const byte : bytes()) {
|
||||
if (AK::is_ascii_lower_alpha(byte))
|
||||
uppercase_bytes.unchecked_append(AK::to_ascii_uppercase(byte));
|
||||
else
|
||||
uppercase_bytes.unchecked_append(byte);
|
||||
}
|
||||
return String::from_utf8_without_validation(uppercase_bytes);
|
||||
}
|
||||
|
||||
bool FlyString::equals_ignoring_ascii_case(FlyString const& other) const
|
||||
{
|
||||
if (*this == other)
|
||||
|
|
|
@ -66,6 +66,9 @@ public:
|
|||
[[nodiscard]] bool equals_ignoring_ascii_case(FlyString const&) const;
|
||||
[[nodiscard]] bool equals_ignoring_ascii_case(StringView) const;
|
||||
|
||||
[[nodiscard]] FlyString to_ascii_lowercase() const;
|
||||
[[nodiscard]] FlyString to_ascii_uppercase() const;
|
||||
|
||||
[[nodiscard]] bool starts_with_bytes(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
|
||||
|
||||
[[nodiscard]] bool ends_with_bytes(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
|
||||
|
|
|
@ -319,6 +319,59 @@ ErrorOr<String> String::from_byte_string(ByteString const& byte_string)
|
|||
return String::from_utf8(byte_string.view());
|
||||
}
|
||||
|
||||
String String::to_ascii_lowercase() const
|
||||
{
|
||||
bool const has_ascii_uppercase = [&] {
|
||||
for (u8 const byte : bytes()) {
|
||||
if (AK::is_ascii_upper_alpha(byte))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}();
|
||||
|
||||
if (!has_ascii_uppercase)
|
||||
return *this;
|
||||
|
||||
Vector<u8> lowercase_bytes;
|
||||
lowercase_bytes.ensure_capacity(bytes().size());
|
||||
for (u8 const byte : bytes()) {
|
||||
if (AK::is_ascii_upper_alpha(byte))
|
||||
lowercase_bytes.unchecked_append(AK::to_ascii_lowercase(byte));
|
||||
else
|
||||
lowercase_bytes.unchecked_append(byte);
|
||||
}
|
||||
return String::from_utf8_without_validation(lowercase_bytes);
|
||||
}
|
||||
|
||||
String String::to_ascii_uppercase() const
|
||||
{
|
||||
bool const has_ascii_lowercase = [&] {
|
||||
for (u8 const byte : bytes()) {
|
||||
if (AK::is_ascii_lower_alpha(byte))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}();
|
||||
|
||||
if (!has_ascii_lowercase)
|
||||
return *this;
|
||||
|
||||
Vector<u8> uppercase_bytes;
|
||||
uppercase_bytes.ensure_capacity(bytes().size());
|
||||
for (u8 const byte : bytes()) {
|
||||
if (AK::is_ascii_lower_alpha(byte))
|
||||
uppercase_bytes.unchecked_append(AK::to_ascii_uppercase(byte));
|
||||
else
|
||||
uppercase_bytes.unchecked_append(byte);
|
||||
}
|
||||
return String::from_utf8_without_validation(uppercase_bytes);
|
||||
}
|
||||
|
||||
bool String::equals_ignoring_ascii_case(String const& other) const
|
||||
{
|
||||
return StringUtils::equals_ignoring_ascii_case(bytes_as_string_view(), other.bytes_as_string_view());
|
||||
}
|
||||
|
||||
bool String::equals_ignoring_ascii_case(StringView other) const
|
||||
{
|
||||
return StringUtils::equals_ignoring_ascii_case(bytes_as_string_view(), other);
|
||||
|
|
|
@ -95,9 +95,13 @@ public:
|
|||
ErrorOr<String> to_titlecase(Optional<StringView> const& locale = {}, TrailingCodePointTransformation trailing_code_point_transformation = TrailingCodePointTransformation::Lowercase) const;
|
||||
ErrorOr<String> to_casefold() const;
|
||||
|
||||
[[nodiscard]] String to_ascii_lowercase() const;
|
||||
[[nodiscard]] String to_ascii_uppercase() const;
|
||||
|
||||
// Compare this String against another string with caseless matching. Using this method requires linking LibUnicode into your application.
|
||||
[[nodiscard]] bool equals_ignoring_case(String const&) const;
|
||||
|
||||
[[nodiscard]] bool equals_ignoring_ascii_case(String const&) const;
|
||||
[[nodiscard]] bool equals_ignoring_ascii_case(StringView) const;
|
||||
|
||||
[[nodiscard]] bool starts_with(u32 code_point) const;
|
||||
|
|
|
@ -1407,3 +1407,27 @@ TEST_CASE(ends_with)
|
|||
EXPECT(emoji.ends_with(0x1F643));
|
||||
EXPECT(!emoji.ends_with(0x1F600));
|
||||
}
|
||||
|
||||
TEST_CASE(to_ascii_lowercase)
|
||||
{
|
||||
EXPECT_EQ("foobar"_string.to_ascii_lowercase(), "foobar"_string);
|
||||
EXPECT_EQ("FooBar"_string.to_ascii_lowercase(), "foobar"_string);
|
||||
EXPECT_EQ("FOOBAR"_string.to_ascii_lowercase(), "foobar"_string);
|
||||
|
||||
// NOTE: We expect to_ascii_lowercase() to return the same underlying string if no changes are needed.
|
||||
auto long_string = "this is a long string that cannot use the short string optimization"_string;
|
||||
auto lowercased = long_string.to_ascii_lowercase();
|
||||
EXPECT_EQ(long_string.bytes().data(), lowercased.bytes().data());
|
||||
}
|
||||
|
||||
TEST_CASE(to_ascii_uppercase)
|
||||
{
|
||||
EXPECT_EQ("foobar"_string.to_ascii_uppercase(), "FOOBAR"_string);
|
||||
EXPECT_EQ("FooBar"_string.to_ascii_uppercase(), "FOOBAR"_string);
|
||||
EXPECT_EQ("FOOBAR"_string.to_ascii_uppercase(), "FOOBAR"_string);
|
||||
|
||||
// NOTE: We expect to_ascii_uppercase() to return the same underlying string if no changes are needed.
|
||||
auto long_string = "THIS IS A LONG STRING THAT CANNOT USE THE SHORT STRING OPTIMIZATION"_string;
|
||||
auto uppercased = long_string.to_ascii_uppercase();
|
||||
EXPECT_EQ(long_string.bytes().data(), uppercased.bytes().data());
|
||||
}
|
||||
|
|
|
@ -395,7 +395,7 @@ Parser::ParseErrorOr<Selector::SimpleSelector> Parser::parse_pseudo_simple_selec
|
|||
return Selector::SimpleSelector {
|
||||
.type = Selector::SimpleSelector::Type::PseudoElement,
|
||||
// Unknown -webkit- pseudo-elements must be serialized in ASCII lowercase.
|
||||
.value = Selector::PseudoElement { Selector::PseudoElement::Type::UnknownWebKit, MUST(Infra::to_ascii_lowercase(pseudo_name.to_string())) },
|
||||
.value = Selector::PseudoElement { Selector::PseudoElement::Type::UnknownWebKit, pseudo_name.to_string().to_ascii_lowercase() },
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -3682,7 +3682,7 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<Attr>> Document::create_attribute(String co
|
|||
|
||||
// 2. If this is an HTML document, then set localName to localName in ASCII lowercase.
|
||||
// 3. Return a new attribute whose local name is localName and node document is this.
|
||||
return Attr::create(*this, is_html_document() ? MUST(Infra::to_ascii_lowercase(local_name)) : local_name);
|
||||
return Attr::create(*this, is_html_document() ? local_name.to_ascii_lowercase() : local_name);
|
||||
}
|
||||
|
||||
// https://dom.spec.whatwg.org/#dom-document-createattributens
|
||||
|
|
|
@ -184,7 +184,7 @@ WebIDL::ExceptionOr<void> Element::set_attribute(FlyString const& name, String c
|
|||
// 4. If attribute is null, create an attribute whose local name is qualifiedName, value is value, and node document
|
||||
// is this’s node document, then append this attribute to this, and then return.
|
||||
if (!attribute) {
|
||||
auto new_attribute = Attr::create(document(), insert_as_lowercase ? MUST(Infra::to_ascii_lowercase(name)) : name, value);
|
||||
auto new_attribute = Attr::create(document(), insert_as_lowercase ? name.to_ascii_lowercase() : name, value);
|
||||
m_attributes->append_attribute(new_attribute);
|
||||
|
||||
return {};
|
||||
|
@ -354,7 +354,7 @@ WebIDL::ExceptionOr<bool> Element::toggle_attribute(FlyString const& name, Optio
|
|||
// 1. If force is not given or is true, create an attribute whose local name is qualifiedName, value is the empty
|
||||
// string, and node document is this’s node document, then append this attribute to this, and then return true.
|
||||
if (!force.has_value() || force.value()) {
|
||||
auto new_attribute = Attr::create(document(), insert_as_lowercase ? MUST(Infra::to_ascii_lowercase(name)) : name.to_string(), String {});
|
||||
auto new_attribute = Attr::create(document(), insert_as_lowercase ? name.to_ascii_lowercase() : name.to_string(), String {});
|
||||
m_attributes->append_attribute(new_attribute);
|
||||
|
||||
return true;
|
||||
|
@ -891,7 +891,7 @@ void Element::make_html_uppercased_qualified_name()
|
|||
{
|
||||
// This is allowed by the spec: "User agents could optimize qualified name and HTML-uppercased qualified name by storing them in internal slots."
|
||||
if (namespace_uri() == Namespace::HTML && document().document_type() == Document::Type::HTML)
|
||||
m_html_uppercased_qualified_name = MUST(Infra::to_ascii_uppercase(qualified_name()));
|
||||
m_html_uppercased_qualified_name = qualified_name().to_ascii_uppercase();
|
||||
else
|
||||
m_html_uppercased_qualified_name = qualified_name();
|
||||
}
|
||||
|
|
|
@ -65,7 +65,7 @@ Vector<FlyString> NamedNodeMap::supported_property_names() const
|
|||
if (associated_element().namespace_uri() == Namespace::HTML) {
|
||||
// 1. Let lowercaseName be name, in ASCII lowercase.
|
||||
// 2. If lowercaseName is not equal to name, remove name from names.
|
||||
names.remove_all_matching([](auto const& name) { return name != MUST(Infra::to_ascii_lowercase(name)); });
|
||||
names.remove_all_matching([](auto const& name) { return name != name.to_ascii_lowercase(); });
|
||||
}
|
||||
|
||||
// 3. Return names.
|
||||
|
|
|
@ -139,7 +139,7 @@ JS::NonnullGCPtr<HTMLCollection> ParentNode::get_elements_by_tag_name(FlyString
|
|||
|
||||
// 2. Otherwise, if root’s node document is an HTML document, return a HTMLCollection rooted at root, whose filter matches the following descendant elements:
|
||||
if (root().document().document_type() == Document::Type::HTML) {
|
||||
FlyString qualified_name_in_ascii_lowercase = MUST(Infra::to_ascii_lowercase(qualified_name));
|
||||
FlyString qualified_name_in_ascii_lowercase = qualified_name.to_ascii_lowercase();
|
||||
return HTMLCollection::create(*this, HTMLCollection::Scope::Descendants, [qualified_name, qualified_name_in_ascii_lowercase](Element const& element) {
|
||||
// - Whose namespace is the HTML namespace and whose qualified name is qualifiedName, in ASCII lowercase.
|
||||
if (element.namespace_uri() == Namespace::HTML)
|
||||
|
|
|
@ -292,7 +292,7 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<Blob>> Blob::slice_blob(Optional<i64> start
|
|||
}
|
||||
// 2. Convert every character in relativeContentType to ASCII lowercase.
|
||||
else {
|
||||
relative_content_type = TRY_OR_THROW_OOM(vm, Infra::to_ascii_lowercase(content_type.value()));
|
||||
relative_content_type = content_type.value().to_ascii_lowercase();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -142,7 +142,7 @@ String DataTransfer::get_data(String const& format_argument) const
|
|||
return {};
|
||||
|
||||
// 3. Let format be the first argument, converted to ASCII lowercase.
|
||||
auto format = MUST(Infra::to_ascii_lowercase(format_argument));
|
||||
auto format = format_argument.to_ascii_lowercase();
|
||||
|
||||
// 4. Let convert-to-URL be false.
|
||||
[[maybe_unused]] bool convert_to_url = false;
|
||||
|
|
|
@ -76,7 +76,7 @@ WebIDL::ExceptionOr<JS::GCPtr<DataTransferItem>> DataTransferItemList::add(Strin
|
|||
// method's first argument.
|
||||
auto item = m_data_transfer->add_item({
|
||||
.kind = HTML::DragDataStoreItem::Kind::Text,
|
||||
.type_string = MUST(Infra::to_ascii_lowercase(type)),
|
||||
.type_string = type.to_ascii_lowercase(),
|
||||
.data = MUST(ByteBuffer::copy(data.bytes())),
|
||||
.file_name = {},
|
||||
});
|
||||
|
@ -100,7 +100,7 @@ JS::GCPtr<DataTransferItem> DataTransferItemList::add(JS::NonnullGCPtr<FileAPI::
|
|||
// converted to ASCII lowercase, and whose data is the same as the File's data.
|
||||
auto item = m_data_transfer->add_item({
|
||||
.kind = HTML::DragDataStoreItem::Kind::File,
|
||||
.type_string = MUST(Infra::to_ascii_lowercase(file->type())),
|
||||
.type_string = file->type().to_ascii_lowercase(),
|
||||
.data = MUST(ByteBuffer::copy(file->raw_bytes())),
|
||||
.file_name = file->name().to_byte_string(),
|
||||
});
|
||||
|
|
|
@ -1498,7 +1498,7 @@ String HTMLInputElement::value_sanitization_algorithm(String const& value) const
|
|||
// https://html.spec.whatwg.org/multipage/input.html#color-state-(type=color):value-sanitization-algorithm
|
||||
// If the value of the element is a valid simple color, then set it to the value of the element converted to ASCII lowercase;
|
||||
if (is_valid_simple_color(value))
|
||||
return MUST(Infra::to_ascii_lowercase(value));
|
||||
return value.to_ascii_lowercase();
|
||||
// otherwise, set it to the string "#000000".
|
||||
return "#000000"_string;
|
||||
}
|
||||
|
|
|
@ -136,7 +136,7 @@ void HTMLLinkElement::attribute_changed(FlyString const& name, Optional<String>
|
|||
if (name == HTML::AttributeNames::rel) {
|
||||
m_relationship = 0;
|
||||
// Keywords are always ASCII case-insensitive, and must be compared as such.
|
||||
auto lowercased_value = MUST(Infra::to_ascii_lowercase(value.value_or(String {})));
|
||||
auto lowercased_value = value.value_or(String {}).to_ascii_lowercase();
|
||||
// To determine which link types apply to a link, a, area, or form element,
|
||||
// the element's rel attribute must be split on ASCII whitespace.
|
||||
// The resulting tokens are the keywords for the link types that apply to that element.
|
||||
|
|
|
@ -3,11 +3,13 @@
|
|||
* Copyright (c) 2022, networkException <networkexception@serenityos.org>
|
||||
* Copyright (c) 2023, Kenneth Myhra <kennethmyhra@serenityos.org>
|
||||
* Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
|
||||
* Copyright (c) 2024, Andreas Kling <andreas@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/CharacterTypes.h>
|
||||
#include <AK/FlyString.h>
|
||||
#include <AK/GenericLexer.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/Utf16View.h>
|
||||
|
|
Loading…
Reference in a new issue