From fe84a365c2c7a6eff742069e54d2020e08a0a98d Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Wed, 8 Dec 2021 13:42:04 -0500 Subject: [PATCH] LibUnicode: Parse and generate format pattern skeletons Pattern skeletons are more or less the "key" of format patterns. Every format pattern is assigned a skeleton. Interval patterns (which are not yet parsed) are also assigned a skeleton - this is used to match them to an "owning" format pattern. So we will use the skeleton generated here to match format patterns at runtime with their available interval patterns. An alternative approach would be to append interval patterns directly to their owning format pattern, but this has some draw backs: 1. Skeletons aren't totally unique. A skeleton may appear in both the "dateFormats" and "availableFormats" objects, in which case the same interval formats would be generated more than once. 2. Otherwise unique format patterns may only differ by the interval patterns assigned to them. This would cause the UniqueStorage for the format patterns to increase in size, impacting both compile times and libunicode.so size. --- .../GenerateUnicodeDateTimeFormat.cpp | 33 +++++++++++++------ .../Libraries/LibUnicode/DateTimeFormat.h | 1 + 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp index 8041be1cda7..34a3e042ed0 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp @@ -45,6 +45,7 @@ struct CalendarPattern : public Unicode::CalendarPattern { unsigned hash() const { auto hash = pair_int_hash(pattern_index, pattern12_index); + hash = pair_int_hash(hash, skeleton_index); auto hash_field = [&](auto const& field) { if (field.has_value()) @@ -70,7 +71,8 @@ struct CalendarPattern : public Unicode::CalendarPattern { bool operator==(CalendarPattern const& other) const { - return (pattern_index == other.pattern_index) + return (skeleton_index == other.skeleton_index) + && (pattern_index == other.pattern_index) && (pattern12_index == other.pattern12_index) && (era == other.era) && (year == other.year) @@ -85,6 +87,7 @@ struct CalendarPattern : public Unicode::CalendarPattern { && (time_zone_name == other.time_zone_name); } + StringIndexType skeleton_index { 0 }; StringIndexType pattern_index { 0 }; StringIndexType pattern12_index { 0 }; }; @@ -100,7 +103,8 @@ struct AK::Formatter : Formatter { }; return Formatter::format(builder, - "{{ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} }}", + "{{ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} }}", + pattern.skeleton_index, pattern.pattern_index, pattern.pattern12_index, field_to_i8(pattern.era), @@ -298,13 +302,16 @@ static String remove_period_from_pattern(String pattern) return pattern; } -static Optional parse_date_time_pattern(String pattern, UnicodeLocaleData& locale_data) +static Optional parse_date_time_pattern(String pattern, String skeleton, UnicodeLocaleData& locale_data) { // https://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table using Unicode::CalendarPatternStyle; CalendarPattern format {}; + if (!skeleton.is_empty()) + format.skeleton_index = locale_data.unique_strings.ensure(move(skeleton)); + GenericLexer lexer { pattern }; StringBuilder builder; bool hour12 { false }; @@ -742,10 +749,12 @@ static ErrorOr parse_calendars(String locale_calendars_path, UnicodeLocale }); }; - auto parse_patterns = [&](auto& formats, auto const& patterns_object, Vector* patterns) { + auto parse_patterns = [&](auto& formats, auto const& patterns_object, auto const& skeletons_object, Vector* patterns) { auto parse_pattern = [&](auto name) { auto format = patterns_object.get(name); - auto format_index = parse_date_time_pattern(format.as_string(), locale_data).value(); + auto skeleton = skeletons_object.get(name); + + auto format_index = parse_date_time_pattern(format.as_string(), skeleton.as_string_or(String::empty()), locale_data).value(); if (patterns) patterns->append(locale_data.unique_patterns.get(format_index)); @@ -774,17 +783,19 @@ static ErrorOr parse_calendars(String locale_calendars_path, UnicodeLocale Vector time_formats; auto const& date_formats_object = value.as_object().get("dateFormats"sv); - parse_patterns(calendar.date_formats, date_formats_object.as_object(), &date_formats); + auto const& date_skeletons_object = value.as_object().get("dateSkeletons"sv); + parse_patterns(calendar.date_formats, date_formats_object.as_object(), date_skeletons_object.as_object(), &date_formats); auto const& time_formats_object = value.as_object().get("timeFormats"sv); - parse_patterns(calendar.time_formats, time_formats_object.as_object(), &time_formats); + auto const& time_skeletons_object = value.as_object().get("timeSkeletons"sv); + parse_patterns(calendar.time_formats, time_formats_object.as_object(), time_skeletons_object.as_object(), &time_formats); auto const& date_time_formats_object = value.as_object().get("dateTimeFormats"sv); - parse_patterns(calendar.date_time_formats, date_time_formats_object.as_object(), nullptr); + parse_patterns(calendar.date_time_formats, date_time_formats_object.as_object(), JsonObject {}, nullptr); auto const& available_formats = date_time_formats_object.as_object().get("availableFormats"sv); - available_formats.as_object().for_each_member([&](auto const&, JsonValue const& pattern) { - auto pattern_index = parse_date_time_pattern(pattern.as_string(), locale_data); + available_formats.as_object().for_each_member([&](auto const& skeleton, JsonValue const& pattern) { + auto pattern_index = parse_date_time_pattern(pattern.as_string(), skeleton, locale_data); if (!pattern_index.has_value()) return; @@ -992,6 +1003,7 @@ struct CalendarPattern { Unicode::CalendarPattern to_unicode_calendar_pattern() const { Unicode::CalendarPattern calendar_pattern {}; + calendar_pattern.skeleton = s_string_list[skeleton]; calendar_pattern.pattern = s_string_list[pattern]; if (pattern12 != 0) calendar_pattern.pattern12 = s_string_list[pattern12]; @@ -1022,6 +1034,7 @@ struct CalendarPattern { return calendar_pattern; } + @string_index_type@ skeleton { 0 }; @string_index_type@ pattern { 0 }; @string_index_type@ pattern12 { 0 }; diff --git a/Userland/Libraries/LibUnicode/DateTimeFormat.h b/Userland/Libraries/LibUnicode/DateTimeFormat.h index e22a5ad63c8..34230b84bf3 100644 --- a/Userland/Libraries/LibUnicode/DateTimeFormat.h +++ b/Userland/Libraries/LibUnicode/DateTimeFormat.h @@ -100,6 +100,7 @@ struct CalendarPattern { callback(time_zone_name, other.time_zone_name, Field::TimeZoneName); } + String skeleton {}; String pattern {}; Optional pattern12 {};