LibJS+LibUnicode: Store parsed Unicode locale data as full strings

Originally, it was convenient to store the parsed Unicode locale data as
views into the original string being parsed. But to implement locale
aliases will require mutating the data that was parsed. To prepare for
that, store the parsed data as proper strings.
This commit is contained in:
Timothy Flynn 2021-08-30 14:31:48 -04:00 committed by Linus Groh
parent caf5b6fa6f
commit d13142f015
4 changed files with 22 additions and 22 deletions

View file

@ -75,7 +75,7 @@ TEST_CASE(parse_unicode_locale_id)
auto locale_id = Unicode::parse_unicode_locale_id(locale);
EXPECT(!locale_id.has_value());
};
auto pass = [](StringView locale, Optional<StringView> expected_language, Optional<StringView> expected_script, Optional<StringView> expected_region, Vector<StringView> expected_variants) {
auto pass = [](StringView locale, Optional<StringView> expected_language, Optional<StringView> expected_script, Optional<StringView> expected_region, Vector<String> expected_variants) {
auto locale_id = Unicode::parse_unicode_locale_id(locale);
VERIFY(locale_id.has_value());
@ -252,7 +252,7 @@ TEST_CASE(parse_unicode_locale_id_with_private_use_extension)
auto locale_id = Unicode::parse_unicode_locale_id(locale);
EXPECT(!locale_id.has_value());
};
auto pass = [](StringView locale, Vector<StringView> const& expected_extension) {
auto pass = [](StringView locale, Vector<String> const& expected_extension) {
auto locale_id = Unicode::parse_unicode_locale_id(locale);
VERIFY(locale_id.has_value());
EXPECT_EQ(locale_id->private_use_extensions, expected_extension);

View file

@ -19,7 +19,7 @@ namespace JS::Intl {
// 6.2.2 IsStructurallyValidLanguageTag ( locale ), https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag
static Optional<Unicode::LocaleID> is_structurally_valid_language_tag(StringView locale)
{
auto contains_duplicate_variant = [](Vector<StringView>& variants) {
auto contains_duplicate_variant = [](auto& variants) {
if (variants.is_empty())
return false;

View file

@ -400,7 +400,7 @@ static Optional<Extension> parse_extension(GenericLexer& lexer)
return {};
}
static Vector<StringView> parse_private_use_extensions(GenericLexer& lexer)
static Vector<String> parse_private_use_extensions(GenericLexer& lexer)
{
// https://unicode.org/reports/tr35/#pu_extensions
//
@ -411,8 +411,8 @@ static Vector<StringView> parse_private_use_extensions(GenericLexer& lexer)
if (!header.has_value())
return {};
auto parse_values = [&]() -> Vector<StringView> {
Vector<StringView> extensions;
auto parse_values = [&]() -> Vector<String> {
Vector<String> extensions;
while (true) {
auto segment = consume_next_segment(lexer);
@ -491,18 +491,18 @@ Optional<String> canonicalize_unicode_locale_id(LocaleID& locale_id)
Title,
};
auto append_sep_and_string = [&](Optional<StringView> const& string, Case case_ = Case::Lower) {
auto append_sep_and_string = [&](Optional<String> const& string, Case case_ = Case::Lower) {
if (!string.has_value())
return;
switch (case_) {
case Case::Upper:
builder.appendff("-{}", string->to_uppercase_string());
builder.appendff("-{}", string->to_uppercase());
break;
case Case::Lower:
builder.appendff("-{}", string->to_lowercase_string());
builder.appendff("-{}", string->to_lowercase());
break;
case Case::Title:
builder.appendff("-{}", string->to_titlecase_string());
builder.appendff("-{}", string->to_titlecase());
break;
}
};
@ -510,7 +510,7 @@ Optional<String> canonicalize_unicode_locale_id(LocaleID& locale_id)
if (!locale_id.language_id.language.has_value())
return {};
builder.append(locale_id.language_id.language->to_lowercase_string());
builder.append(locale_id.language_id.language->to_lowercase());
append_sep_and_string(locale_id.language_id.script, Case::Title);
append_sep_and_string(locale_id.language_id.region, Case::Upper);

View file

@ -17,25 +17,25 @@ namespace Unicode {
struct LanguageID {
bool is_root { false };
Optional<StringView> language {};
Optional<StringView> script {};
Optional<StringView> region {};
Vector<StringView> variants {};
Optional<String> language {};
Optional<String> script {};
Optional<String> region {};
Vector<String> variants {};
};
struct Keyword {
StringView key {};
Vector<StringView> types {};
String key {};
Vector<String> types {};
};
struct LocaleExtension {
Vector<StringView> attributes {};
Vector<String> attributes {};
Vector<Keyword> keywords {};
};
struct TransformedField {
StringView key;
Vector<StringView> values {};
String key;
Vector<String> values {};
};
struct TransformedExtension {
@ -45,7 +45,7 @@ struct TransformedExtension {
struct OtherExtension {
char key {};
Vector<StringView> values {};
Vector<String> values {};
};
using Extension = Variant<LocaleExtension, TransformedExtension, OtherExtension>;
@ -53,7 +53,7 @@ using Extension = Variant<LocaleExtension, TransformedExtension, OtherExtension>
struct LocaleID {
LanguageID language_id {};
Vector<Extension> extensions {};
Vector<StringView> private_use_extensions {};
Vector<String> private_use_extensions {};
};
// Note: These methods only verify that the provided strings match the EBNF grammar of the