2024-08-14 14:46:19 -04:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
|
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <LibUnicode/Collator.h>
|
|
|
|
#include <LibUnicode/ICU.h>
|
|
|
|
|
|
|
|
#include <unicode/coll.h>
|
|
|
|
|
|
|
|
namespace Unicode {
|
|
|
|
|
|
|
|
Usage usage_from_string(StringView usage)
|
|
|
|
{
|
|
|
|
if (usage == "sort"sv)
|
|
|
|
return Usage::Sort;
|
|
|
|
if (usage == "search"sv)
|
|
|
|
return Usage::Search;
|
|
|
|
VERIFY_NOT_REACHED();
|
|
|
|
}
|
|
|
|
|
|
|
|
StringView usage_to_string(Usage usage)
|
|
|
|
{
|
|
|
|
switch (usage) {
|
|
|
|
case Usage::Sort:
|
|
|
|
return "sort"sv;
|
|
|
|
case Usage::Search:
|
|
|
|
return "search"sv;
|
|
|
|
}
|
|
|
|
VERIFY_NOT_REACHED();
|
|
|
|
}
|
|
|
|
|
|
|
|
static NonnullOwnPtr<icu::Locale> apply_usage_to_locale(icu::Locale const& locale, Usage usage, StringView collation)
|
|
|
|
{
|
|
|
|
auto result = adopt_own(*locale.clone());
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
|
|
|
|
switch (usage) {
|
|
|
|
case Usage::Sort:
|
|
|
|
result->setUnicodeKeywordValue("co", icu_string_piece(collation), status);
|
|
|
|
break;
|
|
|
|
case Usage::Search:
|
|
|
|
result->setUnicodeKeywordValue("co", "search", status);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
VERIFY(icu_success(status));
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
Sensitivity sensitivity_from_string(StringView sensitivity)
|
|
|
|
{
|
|
|
|
if (sensitivity == "base"sv)
|
|
|
|
return Sensitivity::Base;
|
|
|
|
if (sensitivity == "accent"sv)
|
|
|
|
return Sensitivity::Accent;
|
|
|
|
if (sensitivity == "case"sv)
|
|
|
|
return Sensitivity::Case;
|
|
|
|
if (sensitivity == "variant"sv)
|
|
|
|
return Sensitivity::Variant;
|
|
|
|
VERIFY_NOT_REACHED();
|
|
|
|
}
|
|
|
|
|
|
|
|
StringView sensitivity_to_string(Sensitivity sensitivity)
|
|
|
|
{
|
|
|
|
switch (sensitivity) {
|
|
|
|
case Sensitivity::Base:
|
|
|
|
return "base"sv;
|
|
|
|
case Sensitivity::Accent:
|
|
|
|
return "accent"sv;
|
|
|
|
case Sensitivity::Case:
|
|
|
|
return "case"sv;
|
|
|
|
case Sensitivity::Variant:
|
|
|
|
return "variant"sv;
|
|
|
|
}
|
|
|
|
VERIFY_NOT_REACHED();
|
|
|
|
}
|
|
|
|
|
|
|
|
static constexpr UColAttributeValue icu_sensitivity(Sensitivity sensitivity)
|
|
|
|
{
|
|
|
|
switch (sensitivity) {
|
|
|
|
case Sensitivity::Base:
|
|
|
|
return UCOL_PRIMARY;
|
|
|
|
case Sensitivity::Accent:
|
|
|
|
return UCOL_SECONDARY;
|
|
|
|
case Sensitivity::Case:
|
|
|
|
return UCOL_PRIMARY;
|
|
|
|
case Sensitivity::Variant:
|
|
|
|
return UCOL_TERTIARY;
|
|
|
|
}
|
|
|
|
VERIFY_NOT_REACHED();
|
|
|
|
}
|
|
|
|
|
2024-08-14 16:18:06 -04:00
|
|
|
static Sensitivity sensitivity_for_collator(icu::Collator const& collator)
|
|
|
|
{
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
|
|
|
|
auto attribute = collator.getAttribute(UCOL_STRENGTH, status);
|
|
|
|
VERIFY(icu_success(status));
|
|
|
|
|
|
|
|
switch (attribute) {
|
|
|
|
case UCOL_PRIMARY:
|
|
|
|
attribute = collator.getAttribute(UCOL_CASE_LEVEL, status);
|
|
|
|
VERIFY(icu_success(status));
|
|
|
|
|
|
|
|
return attribute == UCOL_ON ? Sensitivity::Case : Sensitivity::Base;
|
|
|
|
|
|
|
|
case UCOL_SECONDARY:
|
|
|
|
return Sensitivity::Accent;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return Sensitivity::Variant;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-08-14 14:46:19 -04:00
|
|
|
CaseFirst case_first_from_string(StringView case_first)
|
|
|
|
{
|
|
|
|
if (case_first == "upper"sv)
|
|
|
|
return CaseFirst::Upper;
|
|
|
|
if (case_first == "lower"sv)
|
|
|
|
return CaseFirst::Lower;
|
|
|
|
if (case_first == "false"sv)
|
|
|
|
return CaseFirst::False;
|
|
|
|
VERIFY_NOT_REACHED();
|
|
|
|
}
|
|
|
|
|
|
|
|
StringView case_first_to_string(CaseFirst case_first)
|
|
|
|
{
|
|
|
|
switch (case_first) {
|
|
|
|
case CaseFirst::Upper:
|
|
|
|
return "upper"sv;
|
|
|
|
case CaseFirst::Lower:
|
|
|
|
return "lower"sv;
|
|
|
|
case CaseFirst::False:
|
|
|
|
return "false"sv;
|
|
|
|
}
|
|
|
|
VERIFY_NOT_REACHED();
|
|
|
|
}
|
|
|
|
|
|
|
|
static constexpr UColAttributeValue icu_case_first(CaseFirst case_first)
|
|
|
|
{
|
|
|
|
switch (case_first) {
|
|
|
|
case CaseFirst::Upper:
|
|
|
|
return UCOL_UPPER_FIRST;
|
|
|
|
case CaseFirst::Lower:
|
|
|
|
return UCOL_LOWER_FIRST;
|
|
|
|
case CaseFirst::False:
|
|
|
|
return UCOL_OFF;
|
|
|
|
}
|
|
|
|
VERIFY_NOT_REACHED();
|
|
|
|
}
|
|
|
|
|
2024-08-14 15:53:47 -04:00
|
|
|
static bool ignore_punctuation_for_collator(icu::Collator const& collator)
|
|
|
|
{
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
|
|
|
|
auto attribute = collator.getAttribute(UCOL_ALTERNATE_HANDLING, status);
|
|
|
|
VERIFY(icu_success(status));
|
|
|
|
|
|
|
|
return attribute == UCOL_SHIFTED;
|
|
|
|
}
|
|
|
|
|
2024-08-14 14:46:19 -04:00
|
|
|
class CollatorImpl : public Collator {
|
|
|
|
public:
|
|
|
|
explicit CollatorImpl(NonnullOwnPtr<icu::Collator> collator)
|
|
|
|
: m_collator(move(collator))
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual Collator::Order compare(StringView lhs, StringView rhs) const override
|
|
|
|
{
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
|
|
|
|
auto result = m_collator->compareUTF8(icu_string_piece(lhs), icu_string_piece(rhs), status);
|
|
|
|
VERIFY(icu_success(status));
|
|
|
|
|
|
|
|
switch (result) {
|
|
|
|
case UCOL_LESS:
|
|
|
|
return Order::Before;
|
|
|
|
case UCOL_EQUAL:
|
|
|
|
return Order::Equal;
|
|
|
|
case UCOL_GREATER:
|
|
|
|
return Order::After;
|
|
|
|
}
|
|
|
|
|
|
|
|
VERIFY_NOT_REACHED();
|
|
|
|
}
|
|
|
|
|
2024-08-14 16:18:06 -04:00
|
|
|
virtual Sensitivity sensitivity() const override
|
|
|
|
{
|
|
|
|
return sensitivity_for_collator(*m_collator);
|
|
|
|
}
|
|
|
|
|
2024-08-14 15:53:47 -04:00
|
|
|
virtual bool ignore_punctuation() const override
|
|
|
|
{
|
|
|
|
return ignore_punctuation_for_collator(*m_collator);
|
|
|
|
}
|
|
|
|
|
2024-08-14 14:46:19 -04:00
|
|
|
private:
|
|
|
|
NonnullOwnPtr<icu::Collator> m_collator;
|
|
|
|
};
|
|
|
|
|
|
|
|
NonnullOwnPtr<Collator> Collator::create(
|
|
|
|
StringView locale,
|
|
|
|
Usage usage,
|
|
|
|
StringView collation,
|
2024-08-14 16:18:06 -04:00
|
|
|
Optional<Sensitivity> sensitivity,
|
2024-08-14 14:46:19 -04:00
|
|
|
CaseFirst case_first,
|
|
|
|
bool numeric,
|
2024-08-14 15:53:47 -04:00
|
|
|
Optional<bool> ignore_punctuation)
|
2024-08-14 14:46:19 -04:00
|
|
|
{
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
|
|
|
|
auto locale_data = LocaleData::for_locale(locale);
|
|
|
|
VERIFY(locale_data.has_value());
|
|
|
|
|
|
|
|
auto locale_with_usage = apply_usage_to_locale(locale_data->locale(), usage, collation);
|
|
|
|
|
|
|
|
auto collator = adopt_own(*icu::Collator::createInstance(*locale_with_usage, status));
|
|
|
|
VERIFY(icu_success(status));
|
|
|
|
|
|
|
|
auto set_attribute = [&](UColAttribute attribute, UColAttributeValue value) {
|
|
|
|
collator->setAttribute(attribute, value, status);
|
|
|
|
VERIFY(icu_success(status));
|
|
|
|
};
|
|
|
|
|
2024-08-14 16:18:06 -04:00
|
|
|
if (!sensitivity.has_value())
|
|
|
|
sensitivity = sensitivity_for_collator(*collator);
|
|
|
|
|
2024-08-14 15:53:47 -04:00
|
|
|
if (!ignore_punctuation.has_value())
|
|
|
|
ignore_punctuation = ignore_punctuation_for_collator(*collator);
|
|
|
|
|
2024-08-14 16:18:06 -04:00
|
|
|
set_attribute(UCOL_STRENGTH, icu_sensitivity(*sensitivity));
|
2024-08-14 14:46:19 -04:00
|
|
|
set_attribute(UCOL_CASE_LEVEL, sensitivity == Sensitivity::Case ? UCOL_ON : UCOL_OFF);
|
|
|
|
set_attribute(UCOL_CASE_FIRST, icu_case_first(case_first));
|
|
|
|
set_attribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF);
|
2024-08-14 15:53:47 -04:00
|
|
|
set_attribute(UCOL_ALTERNATE_HANDLING, *ignore_punctuation ? UCOL_SHIFTED : UCOL_NON_IGNORABLE);
|
2024-08-14 14:46:19 -04:00
|
|
|
set_attribute(UCOL_NORMALIZATION_MODE, UCOL_ON);
|
|
|
|
|
|
|
|
return adopt_own(*new CollatorImpl(move(collator)));
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|