LibUnicode: Parse and generate format pattern skeletons

Pattern skeletons are more or less the "key" of format patterns. Every
format pattern is assigned a skeleton. Interval patterns (which are not
yet parsed) are also assigned a skeleton - this is used to match them to
an "owning" format pattern. So we will use the skeleton generated here
to match format patterns at runtime with their available interval
patterns.

An alternative approach would be to append interval patterns directly to
their owning format pattern, but this has some draw backs:

    1. Skeletons aren't totally unique. A skeleton may appear in both
       the "dateFormats" and "availableFormats" objects, in which case
       the same interval formats would be generated more than once.

    2. Otherwise unique format patterns may only differ by the interval
       patterns assigned to them. This would cause the UniqueStorage for
       the format patterns to increase in size, impacting both compile
       times and libunicode.so size.
This commit is contained in:
Timothy Flynn 2021-12-08 13:42:04 -05:00 committed by Linus Groh
parent b17c6ab661
commit fe84a365c2
2 changed files with 24 additions and 10 deletions

View file

@ -45,6 +45,7 @@ struct CalendarPattern : public Unicode::CalendarPattern {
unsigned hash() const
{
auto hash = pair_int_hash(pattern_index, pattern12_index);
hash = pair_int_hash(hash, skeleton_index);
auto hash_field = [&](auto const& field) {
if (field.has_value())
@ -70,7 +71,8 @@ struct CalendarPattern : public Unicode::CalendarPattern {
bool operator==(CalendarPattern const& other) const
{
return (pattern_index == other.pattern_index)
return (skeleton_index == other.skeleton_index)
&& (pattern_index == other.pattern_index)
&& (pattern12_index == other.pattern12_index)
&& (era == other.era)
&& (year == other.year)
@ -85,6 +87,7 @@ struct CalendarPattern : public Unicode::CalendarPattern {
&& (time_zone_name == other.time_zone_name);
}
StringIndexType skeleton_index { 0 };
StringIndexType pattern_index { 0 };
StringIndexType pattern12_index { 0 };
};
@ -100,7 +103,8 @@ struct AK::Formatter<CalendarPattern> : Formatter<FormatString> {
};
return Formatter<FormatString>::format(builder,
"{{ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} }}",
"{{ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} }}",
pattern.skeleton_index,
pattern.pattern_index,
pattern.pattern12_index,
field_to_i8(pattern.era),
@ -298,13 +302,16 @@ static String remove_period_from_pattern(String pattern)
return pattern;
}
static Optional<CalendarPatternIndexType> parse_date_time_pattern(String pattern, UnicodeLocaleData& locale_data)
static Optional<CalendarPatternIndexType> parse_date_time_pattern(String pattern, String skeleton, UnicodeLocaleData& locale_data)
{
// https://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
using Unicode::CalendarPatternStyle;
CalendarPattern format {};
if (!skeleton.is_empty())
format.skeleton_index = locale_data.unique_strings.ensure(move(skeleton));
GenericLexer lexer { pattern };
StringBuilder builder;
bool hour12 { false };
@ -742,10 +749,12 @@ static ErrorOr<void> parse_calendars(String locale_calendars_path, UnicodeLocale
});
};
auto parse_patterns = [&](auto& formats, auto const& patterns_object, Vector<CalendarPattern>* patterns) {
auto parse_patterns = [&](auto& formats, auto const& patterns_object, auto const& skeletons_object, Vector<CalendarPattern>* patterns) {
auto parse_pattern = [&](auto name) {
auto format = patterns_object.get(name);
auto format_index = parse_date_time_pattern(format.as_string(), locale_data).value();
auto skeleton = skeletons_object.get(name);
auto format_index = parse_date_time_pattern(format.as_string(), skeleton.as_string_or(String::empty()), locale_data).value();
if (patterns)
patterns->append(locale_data.unique_patterns.get(format_index));
@ -774,17 +783,19 @@ static ErrorOr<void> parse_calendars(String locale_calendars_path, UnicodeLocale
Vector<CalendarPattern> time_formats;
auto const& date_formats_object = value.as_object().get("dateFormats"sv);
parse_patterns(calendar.date_formats, date_formats_object.as_object(), &date_formats);
auto const& date_skeletons_object = value.as_object().get("dateSkeletons"sv);
parse_patterns(calendar.date_formats, date_formats_object.as_object(), date_skeletons_object.as_object(), &date_formats);
auto const& time_formats_object = value.as_object().get("timeFormats"sv);
parse_patterns(calendar.time_formats, time_formats_object.as_object(), &time_formats);
auto const& time_skeletons_object = value.as_object().get("timeSkeletons"sv);
parse_patterns(calendar.time_formats, time_formats_object.as_object(), time_skeletons_object.as_object(), &time_formats);
auto const& date_time_formats_object = value.as_object().get("dateTimeFormats"sv);
parse_patterns(calendar.date_time_formats, date_time_formats_object.as_object(), nullptr);
parse_patterns(calendar.date_time_formats, date_time_formats_object.as_object(), JsonObject {}, nullptr);
auto const& available_formats = date_time_formats_object.as_object().get("availableFormats"sv);
available_formats.as_object().for_each_member([&](auto const&, JsonValue const& pattern) {
auto pattern_index = parse_date_time_pattern(pattern.as_string(), locale_data);
available_formats.as_object().for_each_member([&](auto const& skeleton, JsonValue const& pattern) {
auto pattern_index = parse_date_time_pattern(pattern.as_string(), skeleton, locale_data);
if (!pattern_index.has_value())
return;
@ -992,6 +1003,7 @@ struct CalendarPattern {
Unicode::CalendarPattern to_unicode_calendar_pattern() const {
Unicode::CalendarPattern calendar_pattern {};
calendar_pattern.skeleton = s_string_list[skeleton];
calendar_pattern.pattern = s_string_list[pattern];
if (pattern12 != 0)
calendar_pattern.pattern12 = s_string_list[pattern12];
@ -1022,6 +1034,7 @@ struct CalendarPattern {
return calendar_pattern;
}
@string_index_type@ skeleton { 0 };
@string_index_type@ pattern { 0 };
@string_index_type@ pattern12 { 0 };

View file

@ -100,6 +100,7 @@ struct CalendarPattern {
callback(time_zone_name, other.time_zone_name, Field::TimeZoneName);
}
String skeleton {};
String pattern {};
Optional<String> pattern12 {};