From f907a7dc386e7cee729b8d96b6287dd0f1af1b72 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Mon, 30 Aug 2021 15:51:08 -0400 Subject: [PATCH] LibUnicode: Canonicalize the subtag "yes" to "true" --- Tests/LibUnicode/TestUnicodeLocale.cpp | 6 ++++++ Userland/Libraries/LibUnicode/Locale.cpp | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/Tests/LibUnicode/TestUnicodeLocale.cpp b/Tests/LibUnicode/TestUnicodeLocale.cpp index 45cd4170f6e..5a7857d6bb5 100644 --- a/Tests/LibUnicode/TestUnicodeLocale.cpp +++ b/Tests/LibUnicode/TestUnicodeLocale.cpp @@ -302,6 +302,10 @@ TEST_CASE(canonicalize_unicode_locale_id) test("EN-U-CCC-BBB-2K-AAA-1K-BBB"sv, "en-u-bbb-ccc-1k-bbb-2k-aaa"sv); test("en-u-1k-true"sv, "en-u-1k"sv); test("EN-U-1K-TRUE"sv, "en-u-1k"sv); + test("en-u-kb-yes"sv, "en-u-kb"sv); + test("EN-U-KB-YES"sv, "en-u-kb"sv); + test("en-u-ka-yes"sv, "en-u-ka-yes"sv); + test("EN-U-KA-YES"sv, "en-u-ka-yes"sv); test("en-t-en"sv, "en-t-en"sv); test("EN-T-EN"sv, "en-t-en"sv); @@ -315,6 +319,8 @@ TEST_CASE(canonicalize_unicode_locale_id) test("EN-T-EN-K2-BBB-K1-AAA"sv, "en-t-en-k1-aaa-k2-bbb"sv); test("en-t-k1-true"sv, "en-t-k1-true"sv); test("EN-T-K1-TRUE"sv, "en-t-k1-true"sv); + test("en-t-k1-yes"sv, "en-t-k1-yes"sv); + test("EN-T-K1-YES"sv, "en-t-k1-yes"sv); test("en-0-aaa"sv, "en-0-aaa"sv); test("EN-0-AAA"sv, "en-0-aaa"sv); diff --git a/Userland/Libraries/LibUnicode/Locale.cpp b/Userland/Libraries/LibUnicode/Locale.cpp index 6cbeb241b1e..dea0d692edc 100644 --- a/Userland/Libraries/LibUnicode/Locale.cpp +++ b/Userland/Libraries/LibUnicode/Locale.cpp @@ -480,6 +480,17 @@ Optional parse_unicode_locale_id(StringView locale) return locale_id; } +static void perform_hard_coded_key_value_substitutions(String& key, String& value) +{ + // FIXME: In the XML export of CLDR, there are some aliases defined in the following file: + // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/collation.xml + // + // There doesn't seem to be a counterpart in the JSON export. Since there aren't many such + // aliases, until an XML parser is implemented, those aliases are implemented here. + if (key.is_one_of("kb"sv, "kc"sv, "kh"sv, "kk"sv, "kn"sv) && (value == "yes"sv)) + value = "true"sv; +} + static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id) { auto canonicalize_language = [](LanguageID& language_id, bool force_lowercase) { @@ -542,6 +553,7 @@ static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id) for (auto& value : raw_values) { value = value.to_lowercase(); + perform_hard_coded_key_value_substitutions(key, value); // Note: The spec says to remove "true" type and tfield values but that is believed to be a bug in the spec // because, for tvalues, that would result in invalid syntax: