mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-23 09:51:57 -05:00
LibUnicode: Support locales-without-script aliases for ECMA-402
As noted by ECMA-402, if a supported locale contains all of a language, script, and region subtag, then the implementation must also support the locale without the script subtag. The most complicated example of this is the zh-TW locale. The list of locales in the CLDR database does not include zh-TW or its maximized zh-Hant-TW variant. Instead, it inlcudes the zh-Hant locale. However, zh-Hant-TW is listed in the default-content locale list in the cldr-core package. This defines an alias from zh-Hant-TW to zh-Hant. We must then also support the zh-Hant-TW alias without the script subtag: zh-TW. This transitively maps zh-TW to zh-Hant, which is a case quite heavily tested by test262.
This commit is contained in:
parent
4b535ce1c8
commit
93ee922027
2 changed files with 51 additions and 0 deletions
|
@ -427,6 +427,48 @@ static void parse_default_content_locales(String core_path, UnicodeLocaleData& l
|
|||
});
|
||||
}
|
||||
|
||||
static void define_aliases_without_scripts(UnicodeLocaleData& locale_data)
|
||||
{
|
||||
// From ECMA-402: https://tc39.es/ecma402/#sec-internal-slots
|
||||
//
|
||||
// For locales that include a script subtag in addition to language and region, the
|
||||
// corresponding locale without a script subtag must also be supported.
|
||||
//
|
||||
// So we define aliases for locales that contain all three subtags, but we must also take
|
||||
// care to handle when the locale itself or the locale without a script subtag are an alias
|
||||
// by way of default-content locales.
|
||||
auto find_alias = [&](auto const& locale) {
|
||||
return locale_data.locale_aliases.find_if([&](auto const& alias) { return locale == alias.alias; });
|
||||
};
|
||||
|
||||
auto append_alias_without_script = [&](auto const& locale) {
|
||||
auto parsed_locale = CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, locale);
|
||||
VERIFY(parsed_locale.has_value());
|
||||
|
||||
if ((parsed_locale->language == 0) || (parsed_locale->script == 0) || (parsed_locale->region == 0))
|
||||
return;
|
||||
|
||||
auto locale_without_script = String::formatted("{}-{}",
|
||||
locale_data.unique_strings.get(parsed_locale->language),
|
||||
locale_data.unique_strings.get(parsed_locale->region));
|
||||
|
||||
if (locale_data.locales.contains(locale_without_script))
|
||||
return;
|
||||
if (find_alias(locale_without_script) != locale_data.locale_aliases.end())
|
||||
return;
|
||||
|
||||
if (auto it = find_alias(locale); it != locale_data.locale_aliases.end())
|
||||
locale_data.locale_aliases.append({ it->name, locale_without_script });
|
||||
else
|
||||
locale_data.locale_aliases.append({ locale, locale_without_script });
|
||||
};
|
||||
|
||||
for (auto const& locale : locale_data.locales)
|
||||
append_alias_without_script(locale.key);
|
||||
for (auto const& locale : locale_data.locale_aliases)
|
||||
append_alias_without_script(locale.alias);
|
||||
}
|
||||
|
||||
static void parse_all_locales(String core_path, String locale_names_path, String misc_path, String numbers_path, UnicodeLocaleData& locale_data)
|
||||
{
|
||||
auto identity_iterator = path_to_dir_iterator(locale_names_path);
|
||||
|
@ -508,6 +550,7 @@ static void parse_all_locales(String core_path, String locale_names_path, String
|
|||
}
|
||||
|
||||
parse_default_content_locales(move(core_path), locale_data);
|
||||
define_aliases_without_scripts(locale_data);
|
||||
}
|
||||
|
||||
static String format_identifier(StringView owner, String identifier)
|
||||
|
|
|
@ -463,3 +463,11 @@ TEST_CASE(canonicalize_unicode_locale_id)
|
|||
test("zh-Hans-CN"sv, "zh-Hans-CN"sv);
|
||||
test("ZH-HANS-CN"sv, "zh-Hans-CN"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(supports_locale_aliases)
|
||||
{
|
||||
EXPECT(Unicode::is_locale_available("zh"sv));
|
||||
EXPECT(Unicode::is_locale_available("zh-Hant"sv));
|
||||
EXPECT(Unicode::is_locale_available("zh-TW"sv));
|
||||
EXPECT(Unicode::is_locale_available("zh-Hant-TW"sv));
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue