LibUnicode: Generate unique lists of calendar symbols

Each of the 374 generated calendars include 4 sets of symbols, each of
which have 3 lists of symbols (narrow, short, long). Of these 4488
lists, only 819 are unique.
This commit is contained in:
Timothy Flynn 2021-12-11 15:15:52 -05:00 committed by Brian Gianforcaro
parent af7caa97c8
commit cf8ef954e5

View file

@ -37,6 +37,9 @@ constexpr auto s_calendar_range_pattern_index_type = "u16"sv;
using CalendarFormatIndexType = u8; using CalendarFormatIndexType = u8;
constexpr auto s_calendar_format_index_type = "u8"sv; constexpr auto s_calendar_format_index_type = "u8"sv;
using SymbolListIndexType = u16;
constexpr auto s_symbol_list_index_type = "u16"sv;
struct CalendarPattern : public Unicode::CalendarPattern { struct CalendarPattern : public Unicode::CalendarPattern {
bool contains_only_date_fields() const bool contains_only_date_fields() const
{ {
@ -240,10 +243,12 @@ struct AK::Traits<CalendarFormat> : public GenericTraits<CalendarFormat> {
static unsigned hash(CalendarFormat const& c) { return c.hash(); } static unsigned hash(CalendarFormat const& c) { return c.hash(); }
}; };
using SymbolList = Vector<StringIndexType>;
struct CalendarSymbols { struct CalendarSymbols {
Vector<StringIndexType> narrow_symbols {}; SymbolListIndexType narrow_symbols { 0 };
Vector<StringIndexType> short_symbols {}; SymbolListIndexType short_symbols { 0 };
Vector<StringIndexType> long_symbols {}; SymbolListIndexType long_symbols { 0 };
}; };
struct Calendar { struct Calendar {
@ -284,6 +289,7 @@ struct UnicodeLocaleData {
UniqueStorage<CalendarPattern, CalendarPatternIndexType> unique_patterns; UniqueStorage<CalendarPattern, CalendarPatternIndexType> unique_patterns;
UniqueStorage<CalendarRangePattern, CalendarRangePatternIndexType> unique_range_patterns; UniqueStorage<CalendarRangePattern, CalendarRangePatternIndexType> unique_range_patterns;
UniqueStorage<CalendarFormat, CalendarFormatIndexType> unique_formats; UniqueStorage<CalendarFormat, CalendarFormatIndexType> unique_formats;
UniqueStorage<SymbolList, SymbolListIndexType> unique_symbol_lists;
HashMap<String, Locale> locales; HashMap<String, Locale> locales;
@ -876,26 +882,41 @@ static void generate_missing_patterns(Calendar& calendar, Vector<CalendarPattern
static void parse_calendar_symbols(Calendar& calendar, JsonObject const& calendar_object, UnicodeLocaleData& locale_data) static void parse_calendar_symbols(Calendar& calendar, JsonObject const& calendar_object, UnicodeLocaleData& locale_data)
{ {
auto ensure_symbols = [&](auto name, size_t size) -> CalendarSymbols& { auto ensure_symbols = [&](auto name) -> CalendarSymbols& {
if (!locale_data.symbols.contains_slow(name)) if (!locale_data.symbols.contains_slow(name))
locale_data.symbols.append(name); locale_data.symbols.append(name);
return calendar.symbols.ensure(name, [&]() { return calendar.symbols.ensure(name);
CalendarSymbols symbols {}; };
symbols.narrow_symbols.resize(size);
symbols.short_symbols.resize(size);
symbols.long_symbols.resize(size);
return symbols; auto create_symbol_lists = [](size_t size) {
}); SymbolList narrow_symbol_list;
SymbolList short_symbol_list;
SymbolList long_symbol_list;
narrow_symbol_list.resize(size);
short_symbol_list.resize(size);
long_symbol_list.resize(size);
return Array<SymbolList, 3> { {
move(narrow_symbol_list),
move(short_symbol_list),
move(long_symbol_list),
} };
};
auto store_symbol_lists = [&](auto name, auto symbol_lists) {
auto& symbols = ensure_symbols(name);
symbols.narrow_symbols = locale_data.unique_symbol_lists.ensure(move(symbol_lists[0]));
symbols.short_symbols = locale_data.unique_symbol_lists.ensure(move(symbol_lists[1]));
symbols.long_symbols = locale_data.unique_symbol_lists.ensure(move(symbol_lists[2]));
}; };
auto parse_era_symbols = [&](auto const& symbols_object) { auto parse_era_symbols = [&](auto const& symbols_object) {
auto const& narrow_symbols = symbols_object.get("eraNarrow"sv).as_object(); auto const& narrow_symbols = symbols_object.get("eraNarrow"sv).as_object();
auto const& short_symbols = symbols_object.get("eraAbbr"sv).as_object(); auto const& short_symbols = symbols_object.get("eraAbbr"sv).as_object();
auto const& long_symbols = symbols_object.get("eraNames"sv).as_object(); auto const& long_symbols = symbols_object.get("eraNames"sv).as_object();
auto symbol_lists = create_symbol_lists(2);
auto& symbols = ensure_symbols("era"sv, 2);
auto append_symbol = [&](auto& symbols, auto const& key, auto symbol) { auto append_symbol = [&](auto& symbols, auto const& key, auto symbol) {
if (auto key_index = key.to_uint(); key_index.has_value()) if (auto key_index = key.to_uint(); key_index.has_value())
@ -903,22 +924,23 @@ static void parse_calendar_symbols(Calendar& calendar, JsonObject const& calenda
}; };
narrow_symbols.for_each_member([&](auto const& key, JsonValue const& value) { narrow_symbols.for_each_member([&](auto const& key, JsonValue const& value) {
append_symbol(symbols.narrow_symbols, key, value.as_string()); append_symbol(symbol_lists[0], key, value.as_string());
}); });
short_symbols.for_each_member([&](auto const& key, JsonValue const& value) { short_symbols.for_each_member([&](auto const& key, JsonValue const& value) {
append_symbol(symbols.short_symbols, key, value.as_string()); append_symbol(symbol_lists[1], key, value.as_string());
}); });
long_symbols.for_each_member([&](auto const& key, JsonValue const& value) { long_symbols.for_each_member([&](auto const& key, JsonValue const& value) {
append_symbol(symbols.long_symbols, key, value.as_string()); append_symbol(symbol_lists[2], key, value.as_string());
}); });
store_symbol_lists("era"sv, move(symbol_lists));
}; };
auto parse_month_symbols = [&](auto const& symbols_object) { auto parse_month_symbols = [&](auto const& symbols_object) {
auto const& narrow_symbols = symbols_object.get("narrow"sv).as_object(); auto const& narrow_symbols = symbols_object.get("narrow"sv).as_object();
auto const& short_symbols = symbols_object.get("abbreviated"sv).as_object(); auto const& short_symbols = symbols_object.get("abbreviated"sv).as_object();
auto const& long_symbols = symbols_object.get("wide"sv).as_object(); auto const& long_symbols = symbols_object.get("wide"sv).as_object();
auto symbol_lists = create_symbol_lists(12);
auto& month_symbols = ensure_symbols("month"sv, 12);
auto append_symbol = [&](auto& symbols, auto const& key, auto symbol) { auto append_symbol = [&](auto& symbols, auto const& key, auto symbol) {
auto key_index = key.to_uint().value() - 1; auto key_index = key.to_uint().value() - 1;
@ -926,22 +948,23 @@ static void parse_calendar_symbols(Calendar& calendar, JsonObject const& calenda
}; };
narrow_symbols.for_each_member([&](auto const& key, JsonValue const& value) { narrow_symbols.for_each_member([&](auto const& key, JsonValue const& value) {
append_symbol(month_symbols.narrow_symbols, key, value.as_string()); append_symbol(symbol_lists[0], key, value.as_string());
}); });
short_symbols.for_each_member([&](auto const& key, JsonValue const& value) { short_symbols.for_each_member([&](auto const& key, JsonValue const& value) {
append_symbol(month_symbols.short_symbols, key, value.as_string()); append_symbol(symbol_lists[1], key, value.as_string());
}); });
long_symbols.for_each_member([&](auto const& key, JsonValue const& value) { long_symbols.for_each_member([&](auto const& key, JsonValue const& value) {
append_symbol(month_symbols.long_symbols, key, value.as_string()); append_symbol(symbol_lists[2], key, value.as_string());
}); });
store_symbol_lists("month"sv, move(symbol_lists));
}; };
auto parse_weekday_symbols = [&](auto const& symbols_object) { auto parse_weekday_symbols = [&](auto const& symbols_object) {
auto const& narrow_symbols = symbols_object.get("narrow"sv).as_object(); auto const& narrow_symbols = symbols_object.get("narrow"sv).as_object();
auto const& short_symbols = symbols_object.get("abbreviated"sv).as_object(); auto const& short_symbols = symbols_object.get("abbreviated"sv).as_object();
auto const& long_symbols = symbols_object.get("wide"sv).as_object(); auto const& long_symbols = symbols_object.get("wide"sv).as_object();
auto symbol_lists = create_symbol_lists(7);
auto& weekday_symbols = ensure_symbols("weekday"sv, 7);
auto append_symbol = [&](auto& symbols, auto const& key, auto symbol) { auto append_symbol = [&](auto& symbols, auto const& key, auto symbol) {
if (key == "sun"sv) if (key == "sun"sv)
@ -961,22 +984,23 @@ static void parse_calendar_symbols(Calendar& calendar, JsonObject const& calenda
}; };
narrow_symbols.for_each_member([&](auto const& key, JsonValue const& value) { narrow_symbols.for_each_member([&](auto const& key, JsonValue const& value) {
append_symbol(weekday_symbols.narrow_symbols, key, value.as_string()); append_symbol(symbol_lists[0], key, value.as_string());
}); });
short_symbols.for_each_member([&](auto const& key, JsonValue const& value) { short_symbols.for_each_member([&](auto const& key, JsonValue const& value) {
append_symbol(weekday_symbols.short_symbols, key, value.as_string()); append_symbol(symbol_lists[1], key, value.as_string());
}); });
long_symbols.for_each_member([&](auto const& key, JsonValue const& value) { long_symbols.for_each_member([&](auto const& key, JsonValue const& value) {
append_symbol(weekday_symbols.long_symbols, key, value.as_string()); append_symbol(symbol_lists[2], key, value.as_string());
}); });
store_symbol_lists("weekday"sv, move(symbol_lists));
}; };
auto parse_day_period_symbols = [&](auto const& symbols_object) { auto parse_day_period_symbols = [&](auto const& symbols_object) {
auto const& narrow_symbols = symbols_object.get("narrow"sv).as_object(); auto const& narrow_symbols = symbols_object.get("narrow"sv).as_object();
auto const& short_symbols = symbols_object.get("abbreviated"sv).as_object(); auto const& short_symbols = symbols_object.get("abbreviated"sv).as_object();
auto const& long_symbols = symbols_object.get("wide"sv).as_object(); auto const& long_symbols = symbols_object.get("wide"sv).as_object();
auto symbol_lists = create_symbol_lists(10);
auto& day_period_symbols = ensure_symbols("dayPeriod"sv, 10);
auto append_symbol = [&](auto& symbols, auto const& key, auto symbol) { auto append_symbol = [&](auto& symbols, auto const& key, auto symbol) {
if (auto day_period = day_period_from_string(key); day_period.has_value()) if (auto day_period = day_period_from_string(key); day_period.has_value())
@ -984,14 +1008,16 @@ static void parse_calendar_symbols(Calendar& calendar, JsonObject const& calenda
}; };
narrow_symbols.for_each_member([&](auto const& key, JsonValue const& value) { narrow_symbols.for_each_member([&](auto const& key, JsonValue const& value) {
append_symbol(day_period_symbols.narrow_symbols, key, value.as_string()); append_symbol(symbol_lists[0], key, value.as_string());
}); });
short_symbols.for_each_member([&](auto const& key, JsonValue const& value) { short_symbols.for_each_member([&](auto const& key, JsonValue const& value) {
append_symbol(day_period_symbols.short_symbols, key, value.as_string()); append_symbol(symbol_lists[1], key, value.as_string());
}); });
long_symbols.for_each_member([&](auto const& key, JsonValue const& value) { long_symbols.for_each_member([&](auto const& key, JsonValue const& value) {
append_symbol(day_period_symbols.long_symbols, key, value.as_string()); append_symbol(symbol_lists[2], key, value.as_string());
}); });
store_symbol_lists("dayPeriod"sv, move(symbol_lists));
}; };
parse_era_symbols(calendar_object.get("eras"sv).as_object()); parse_era_symbols(calendar_object.get("eras"sv).as_object());
@ -1319,6 +1345,7 @@ static void generate_unicode_locale_implementation(Core::File& file, UnicodeLoca
generator.set("calendar_pattern_index_type"sv, s_calendar_pattern_index_type); generator.set("calendar_pattern_index_type"sv, s_calendar_pattern_index_type);
generator.set("calendar_range_pattern_index_type"sv, s_calendar_range_pattern_index_type); generator.set("calendar_range_pattern_index_type"sv, s_calendar_range_pattern_index_type);
generator.set("calendar_format_index_type"sv, s_calendar_format_index_type); generator.set("calendar_format_index_type"sv, s_calendar_format_index_type);
generator.set("symbol_list_index_type"sv, s_symbol_list_index_type);
generator.set("calendar_symbols_size"sv, String::number(locale_data.symbols.size())); generator.set("calendar_symbols_size"sv, String::number(locale_data.symbols.size()));
generator.append(R"~~~( generator.append(R"~~~(
@ -1461,7 +1488,7 @@ struct CalendarData {
Span<@calendar_range_pattern_index_type@ const> range_formats {}; Span<@calendar_range_pattern_index_type@ const> range_formats {};
Span<@calendar_range_pattern_index_type@ const> range12_formats {}; Span<@calendar_range_pattern_index_type@ const> range12_formats {};
Array<Span<CalendarSymbols const>, @calendar_symbols_size@> symbols {}; Array<CalendarSymbols, @calendar_symbols_size@> symbols {};
}; };
struct TimeZoneData { struct TimeZoneData {
@ -1478,6 +1505,7 @@ struct DayPeriodData {
)~~~"); )~~~");
locale_data.unique_formats.generate(generator, "CalendarFormat"sv, "s_calendar_formats"sv, 10); locale_data.unique_formats.generate(generator, "CalendarFormat"sv, "s_calendar_formats"sv, 10);
locale_data.unique_symbol_lists.generate(generator, s_string_index_type, "s_symbol_lists"sv);
auto append_pattern_list = [&](auto name, auto type, auto const& formats) { auto append_pattern_list = [&](auto name, auto type, auto const& formats) {
generator.set("name", move(name)); generator.set("name", move(name));
@ -1497,41 +1525,16 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~");
generator.append(" } };"); generator.append(" } };");
}; };
auto append_calendar_symbols_for_style = [&](auto name, auto style, auto const& symbols) {
name = String::formatted("{}_{}", name, style);
generator.set("name", name);
generator.set("size", String::number(symbols.size()));
generator.append(R"~~~(
static constexpr Array<@string_index_type@, @size@> @name@ { {)~~~");
bool first = true;
for (auto symbol : symbols) {
generator.append(first ? " " : ", ");
generator.append(String::number(symbol));
first = false;
}
generator.append(" } };");
return name;
};
auto append_calendar_symbols = [&](auto name, auto symbol, auto const& symbols) { auto append_calendar_symbols = [&](auto name, auto symbol, auto const& symbols) {
name = String::formatted("{}_{}", name, symbol.to_lowercase()); name = String::formatted("{}_{}", name, symbol.to_lowercase());
auto narrow_symbols = append_calendar_symbols_for_style(name, "narrow"sv, symbols.narrow_symbols); generator.set("narrow_symbols", String::number(symbols.narrow_symbols));
auto short_symbols = append_calendar_symbols_for_style(name, "short"sv, symbols.short_symbols); generator.set("short_symbols", String::number(symbols.short_symbols));
auto long_symbols = append_calendar_symbols_for_style(name, "long"sv, symbols.long_symbols); generator.set("long_symbols", String::number(symbols.long_symbols));
generator.set("narrow_symbols", move(narrow_symbols));
generator.set("short_symbols", move(short_symbols));
generator.set("long_symbols", move(long_symbols));
generator.set("name", name); generator.set("name", name);
generator.append(R"~~~( generator.append(R"~~~(
static constexpr Array<CalendarSymbols, 3> @name@ { @narrow_symbols@.span(), @short_symbols@.span(), @long_symbols@.span() }; static constexpr Array<@symbol_list_index_type@, 3> @name@ { { @narrow_symbols@, @short_symbols@, @long_symbols@ } };)~~~");
)~~~");
return name; return name;
}; };
@ -1559,6 +1562,8 @@ static constexpr Array<CalendarSymbols, 3> @name@ { @narrow_symbols@.span(), @sh
auto name = append_calendar_symbols(symbols_name, symbol_key, symbols); auto name = append_calendar_symbols(symbols_name, symbol_key, symbols);
symbols_names.append(name); symbols_names.append(name);
} }
generator.append("\n");
} }
generator.set("name", name); generator.set("name", name);
@ -1817,7 +1822,8 @@ static CalendarSymbols find_calendar_symbols(StringView locale, StringView calen
auto symbols = data->symbols.at(symbol_index); auto symbols = data->symbols.at(symbol_index);
VERIFY(style_index < symbols.size()); VERIFY(style_index < symbols.size());
return symbols.at(style_index); auto symbol_list_index = symbols.at(style_index);
return s_symbol_lists.at(symbol_list_index);
} }
return {}; return {};