LibUnicode: Add minimal support for generic & offset-based time zones

ECMA-402 now supports short-offset, long-offset, short-generic, and
long-generic time zone name formatting. For example, in the en-US locale
the America/Eastern time zone would be formatted as:

    short-offset: GMT-5
    long-offset: GMT-05:00
    short-generic: ET
    long-generic: Eastern Time

We currently only support the UTC time zone, however. Therefore, this
very minimal implementation does not consider GMT offset or generic
display names. Instead, the CLDR defines specific strings for UTC.
This commit is contained in:
Timothy Flynn 2022-01-02 14:23:24 -05:00 committed by Linus Groh
parent d2ac40bcd7
commit 126a3fe180
3 changed files with 109 additions and 19 deletions

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
* Copyright (c) 2021-2022, Tim Flynn <trflynn89@pm.me>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -61,6 +61,9 @@ constexpr auto s_time_zone_index_type = "u16"sv;
using TimeZoneListIndexType = u8;
constexpr auto s_time_zone_list_index_type = "u8"sv;
using TimeZoneFormatIndexType = u8;
constexpr auto s_time_zone_format_index_type = "u8"sv;
using DayPeriodIndexType = u8;
constexpr auto s_day_period_index_type = "u8"sv;
@ -407,6 +410,33 @@ struct AK::Traits<TimeZone> : public GenericTraits<TimeZone> {
static unsigned hash(TimeZone const& t) { return t.hash(); }
};
struct TimeZoneFormat {
unsigned hash() const
{
return int_hash(gmt_zero_format);
}
bool operator==(TimeZoneFormat const& other) const
{
return gmt_zero_format == other.gmt_zero_format;
}
StringIndexType gmt_zero_format { 0 };
};
template<>
struct AK::Formatter<TimeZoneFormat> : Formatter<FormatString> {
ErrorOr<void> format(FormatBuilder& builder, TimeZoneFormat const& time_zone_format)
{
return Formatter<FormatString>::format(builder, "{{ {} }}", time_zone_format.gmt_zero_format);
}
};
template<>
struct AK::Traits<TimeZoneFormat> : public GenericTraits<TimeZoneFormat> {
static unsigned hash(TimeZoneFormat const& t) { return t.hash(); }
};
struct DayPeriod {
unsigned hash() const
{
@ -459,7 +489,10 @@ struct AK::Formatter<Unicode::HourCycle> : Formatter<FormatString> {
struct Locale {
HashMap<String, CalendarIndexType> calendars;
TimeZoneListIndexType time_zones { 0 };
TimeZoneFormatIndexType time_zone_formats { 0 };
DayPeriodListIndexType day_periods { 0 };
};
@ -476,6 +509,7 @@ struct UnicodeLocaleData {
UniqueStorage<Calendar, CalendarIndexType> unique_calendars;
UniqueStorage<TimeZone, TimeZoneIndexType> unique_time_zones;
UniqueStorage<TimeZoneList, TimeZoneListIndexType> unique_time_zone_lists;
UniqueStorage<TimeZoneFormat, TimeZoneFormatIndexType> unique_time_zone_formats;
UniqueStorage<DayPeriod, DayPeriodIndexType> unique_day_periods;
UniqueStorage<DayPeriodList, DayPeriodListIndexType> unique_day_period_lists;
UniqueStorage<HourCycleList, HourCycleListIndexType> unique_hour_cycle_lists;
@ -821,13 +855,27 @@ static Optional<CalendarPattern> parse_date_time_pattern_raw(String pattern, Str
}
// Zone
else if (all_of(segment, is_any_of("zZOvVXx"))) {
else if (all_of(segment, is_any_of("zV"sv))) {
builder.append("{timeZoneName}");
if (segment.length() < 4)
format.time_zone_name = CalendarPatternStyle::Short;
else
format.time_zone_name = CalendarPatternStyle::Long;
} else if (all_of(segment, is_any_of("ZOXx"sv))) {
builder.append("{timeZoneName}");
if (segment.length() < 4)
format.time_zone_name = CalendarPatternStyle::ShortOffset;
else
format.time_zone_name = CalendarPatternStyle::LongOffset;
} else if (all_of(segment, is_char('v'))) {
builder.append("{timeZoneName}");
if (segment.length() < 4)
format.time_zone_name = CalendarPatternStyle::ShortGeneric;
else
format.time_zone_name = CalendarPatternStyle::LongGeneric;
}
// Non-patterns
@ -1327,6 +1375,7 @@ static ErrorOr<void> parse_time_zone_names(String locale_time_zone_names_path, U
auto const& dates_object = locale_object.as_object().get("dates"sv);
auto const& time_zone_names_object = dates_object.as_object().get("timeZoneNames"sv);
auto const& meta_zone_object = time_zone_names_object.as_object().get("metazone"sv);
auto const& gmt_zero_format_string = time_zone_names_object.as_object().get("gmtZeroFormat"sv);
if (meta_zone_object.is_null())
return {};
@ -1349,6 +1398,9 @@ static ErrorOr<void> parse_time_zone_names(String locale_time_zone_names_path, U
TimeZoneList time_zones;
TimeZoneFormat time_zone_formats {};
time_zone_formats.gmt_zero_format = locale_data.unique_strings.ensure(gmt_zero_format_string.as_string());
auto parse_time_zone = [&](StringView meta_zone, JsonObject const& meta_zone_object) {
auto const& golden_zone = locale_data.meta_zones.find(meta_zone)->value;
TimeZone time_zone {};
@ -1383,6 +1435,8 @@ static ErrorOr<void> parse_time_zone_names(String locale_time_zone_names_path, U
parse_time_zone("UTC"sv, utc_object.as_object());
locale.time_zones = locale_data.unique_time_zone_lists.ensure(move(time_zones));
locale.time_zone_formats = locale_data.unique_time_zone_formats.ensure(move(time_zone_formats));
return {};
}
@ -1689,6 +1743,10 @@ struct TimeZoneData {
@string_index_type@ short_name { 0 };
};
struct TimeZoneFormat {
@string_index_type@ gmt_zero_format { 0 };
};
struct DayPeriodData {
u8 day_period { 0 };
u8 begin { 0 };
@ -1703,6 +1761,7 @@ struct DayPeriodData {
locale_data.unique_calendars.generate(generator, "CalendarData"sv, "s_calendars"sv, 10);
locale_data.unique_time_zones.generate(generator, "TimeZoneData"sv, "s_time_zones"sv, 30);
locale_data.unique_time_zone_lists.generate(generator, s_time_zone_index_type, "s_time_zone_lists"sv);
locale_data.unique_time_zone_formats.generate(generator, "TimeZoneFormat"sv, "s_time_zone_formats"sv, 30);
locale_data.unique_day_periods.generate(generator, "DayPeriodData"sv, "s_day_periods"sv, 30);
locale_data.unique_day_period_lists.generate(generator, s_day_period_index_type, "s_day_period_lists"sv);
locale_data.unique_hour_cycle_lists.generate(generator, "u8"sv, "s_hour_cycle_lists"sv);
@ -1752,6 +1811,7 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~");
generate_mapping(generator, locale_data.locales, s_calendar_index_type, "s_locale_calendars"sv, "s_calendars_{}", [&](auto const& name, auto const& value) { append_calendars(name, value.calendars); });
append_mapping(locales, locale_data.locales, s_time_zone_index_type, "s_locale_time_zones"sv, [](auto const& locale) { return locale.time_zones; });
append_mapping(locales, locale_data.locales, s_time_zone_format_index_type, "s_locale_time_zone_formats"sv, [](auto const& locale) { return locale.time_zone_formats; });
append_mapping(locales, locale_data.locales, s_day_period_index_type, "s_locale_day_periods"sv, [](auto const& locale) { return locale.day_periods; });
append_mapping(locale_data.hour_cycle_regions, locale_data.hour_cycles, s_hour_cycle_list_index_type, "s_hour_cycles"sv, [](auto const& hour_cycles) { return hour_cycles; });
generator.append("\n");
@ -2011,6 +2071,18 @@ Optional<StringView> get_calendar_day_period_symbol_for_hour(StringView locale,
return Detail::get_calendar_day_period_symbol(locale, calendar, style, Unicode::DayPeriod::PM);
}
static TimeZoneFormat const* find_time_zone_formats(StringView locale)
{
auto locale_value = locale_from_string(locale);
if (!locale_value.has_value())
return nullptr;
auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
auto time_zone_format_index = s_locale_time_zone_formats.at(locale_index);
return &s_time_zone_formats.at(time_zone_format_index);
}
static TimeZoneData const* find_time_zone_data(StringView locale, StringView time_zone)
{
auto locale_value = locale_from_string(locale);
@ -2034,23 +2106,21 @@ static TimeZoneData const* find_time_zone_data(StringView locale, StringView tim
Optional<StringView> get_time_zone_name(StringView locale, StringView time_zone, CalendarPatternStyle style) asm("unicode_get_time_zone_name");
Optional<StringView> get_time_zone_name(StringView locale, StringView time_zone, CalendarPatternStyle style)
{
if (auto const* data = find_time_zone_data(locale, time_zone); data != nullptr) {
@string_index_type@ time_zone_index = 0;
// FIXME: This becomes more complicated when time zones other than UTC are supported. We will need to know the GMT offset
// of each time zone (which must be parsed from the time zone database, not the CLDR). For now, assuming UTC means
// we can assume a GMT offset of 0, for which the CLDR has a specific format string for the offset styles. Further,
// we will need to parse the "generic" time zone names from timeZoneNames.json.
VERIFY(time_zone == "UTC"sv);
switch (style) {
case CalendarPatternStyle::Long:
time_zone_index = data->long_name;
break;
case CalendarPatternStyle::Short:
time_zone_index = data->short_name;
break;
default:
VERIFY_NOT_REACHED();
if ((style == CalendarPatternStyle::Long) || (style == CalendarPatternStyle::Short)) {
if (auto const* data = find_time_zone_data(locale, time_zone); data != nullptr) {
auto time_zone_index = style == CalendarPatternStyle::Long ? data->long_name : data->short_name;
if (time_zone_index != 0)
return s_string_list[time_zone_index];
}
if (time_zone_index != 0)
return s_string_list[time_zone_index];
} else {
if (auto const* formats = find_time_zone_formats(locale); formats != nullptr)
return s_string_list[formats->gmt_zero_format];
}
return {};

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
* Copyright (c) 2021-2022, Tim Flynn <trflynn89@pm.me>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -53,6 +53,14 @@ CalendarPatternStyle calendar_pattern_style_from_string(StringView style)
return CalendarPatternStyle::Numeric;
if (style == "2-digit"sv)
return CalendarPatternStyle::TwoDigit;
if (style == "shortOffset"sv)
return CalendarPatternStyle::ShortOffset;
if (style == "longOffset"sv)
return CalendarPatternStyle::LongOffset;
if (style == "shortGeneric"sv)
return CalendarPatternStyle::ShortGeneric;
if (style == "longGeneric"sv)
return CalendarPatternStyle::LongGeneric;
VERIFY_NOT_REACHED();
}
@ -69,6 +77,14 @@ StringView calendar_pattern_style_to_string(CalendarPatternStyle style)
return "numeric"sv;
case CalendarPatternStyle::TwoDigit:
return "2-digit"sv;
case CalendarPatternStyle::ShortOffset:
return "shortOffset"sv;
case CalendarPatternStyle::LongOffset:
return "longOffset"sv;
case CalendarPatternStyle::ShortGeneric:
return "shortGeneric"sv;
case CalendarPatternStyle::LongGeneric:
return "longGeneric"sv;
default:
VERIFY_NOT_REACHED();
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
* Copyright (c) 2021-2022, Tim Flynn <trflynn89@pm.me>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -71,6 +71,10 @@ enum class CalendarPatternStyle : u8 {
Long,
Numeric,
TwoDigit,
ShortOffset,
LongOffset,
ShortGeneric,
LongGeneric,
};
struct CalendarPattern {