Meta+LibUnicode+LibJS: Upgrade to ICU 76.1

This updates our local ICU overlay port to use ICU 76.1. This includes
Unicode 16 and CLDR 46.

Upstream vcpkg is not able to supply versions past 74 yet due to various
dependency issues, but we are able to use this version ourselves. The
overlay port now includes a patch to revert ICU's dependence on autoconf
2.72 for now, as this version is not yet available on all systems.

All of the test changes were cross-referenced with Firefox to ensure
correctness.
This commit is contained in:
Timothy Flynn 2025-01-18 13:29:28 -05:00 committed by Tim Flynn
parent 0763997591
commit 6a564376fc
Notes: github-actions[bot] 2025-01-18 22:57:42 +00:00
13 changed files with 95 additions and 52 deletions

View file

@ -308,9 +308,9 @@ describe("day", () => {
describe("dayPeriod", () => {
// prettier-ignore
const data = [
{ dayPeriod: "narrow", en0: "5 in the afternoon", en1: "7 in the morning", ar0: "٥ بعد الظهر", ar1: "٧ صباحًا", as0: "অপৰাহ্ন ৫", as1: "পূৰ্বাহ্ন "},
{ dayPeriod: "short", en0: "5 in the afternoon", en1: "7 in the morning", ar0: "٥ بعد الظهر", ar1: "٧ ص", as0: "অপৰাহ্ন ৫", as1: "পূৰ্বাহ্ন "},
{ dayPeriod: "long", en0: "5 in the afternoon", en1: "7 in the morning", ar0: "٥ بعد الظهر", ar1: "٧ صباحًا", as0: "অপৰাহ্ন ৫", as1: "পূৰ্বাহ্ন "},
{ dayPeriod: "narrow", en0: "5 in the afternoon", en1: "7 in the morning", ar0: "٥ بعد الظهر", ar1: "٧ صباحًا", as0: "pm ৫", as1: "am "},
{ dayPeriod: "short", en0: "5 in the afternoon", en1: "7 in the morning", ar0: "٥ بعد الظهر", ar1: "٧ ص", as0: "PM ৫", as1: "AM "},
{ dayPeriod: "long", en0: "5 in the afternoon", en1: "7 in the morning", ar0: "٥ بعد الظهر", ar1: "٧ صباحًا", as0: "PM ৫", as1: "AM "},
];
test("all", () => {
@ -404,9 +404,9 @@ describe("dayPeriod", () => {
test("dayPeriod without time", () => {
// prettier-ignore
const data = [
{ dayPeriod: "narrow", en0: "in the afternoon", en1: "in the morning", ar0: "بعد الظهر", ar1: "صباحًا", as0: "অপৰাহ্ন", as1: "পূৰ্বাহ্ন"},
{ dayPeriod: "short", en0: "in the afternoon", en1: "in the morning", ar0: "بعد الظهر", ar1: "ص", as0: "অপৰাহ্ন", as1: "পূৰ্বাহ্ন"},
{ dayPeriod: "long", en0: "in the afternoon", en1: "in the morning", ar0: "بعد الظهر", ar1: "صباحًا", as0: "অপৰাহ্ন", as1: "পূৰ্বাহ্ন"},
{ dayPeriod: "narrow", en0: "in the afternoon", en1: "in the morning", ar0: "بعد الظهر", ar1: "صباحًا", as0: "pm", as1: "am"},
{ dayPeriod: "short", en0: "in the afternoon", en1: "in the morning", ar0: "بعد الظهر", ar1: "ص", as0: "PM", as1: "AM"},
{ dayPeriod: "long", en0: "in the afternoon", en1: "in the morning", ar0: "بعد الظهر", ar1: "صباحًا", as0: "PM", as1: "AM"},
];
data.forEach(d => {

View file

@ -124,13 +124,13 @@ describe("correct behavior", () => {
});
test("timeZone", () => {
const en = new Intl.DateTimeFormat("en", { timeZone: "EST5EDT" });
expect(en.resolvedOptions().timeZone).toBe("EST5EDT");
const en = new Intl.DateTimeFormat("en", { timeZone: "America/New_York" });
expect(en.resolvedOptions().timeZone).toBe("America/New_York");
const el = new Intl.DateTimeFormat("el", { timeZone: "UTC" });
expect(el.resolvedOptions().timeZone).toBe("UTC");
["UTC", "EST5EDT", "+01:02", "-20:30", "+00:00"].forEach(timeZone => {
["UTC", "America/New_York", "+01:02", "-20:30", "+00:00"].forEach(timeZone => {
const en = new Intl.DateTimeFormat("en", { timeZone: timeZone });
expect(en.resolvedOptions().timeZone).toBe(timeZone);

View file

@ -124,7 +124,7 @@ describe("correct behavior", () => {
expect(es419.of("Latn")).toBe("latín");
const zhHant = new Intl.DisplayNames(["zh-Hant"], { type: "script" });
expect(zhHant.of("Latn")).toBe("拉丁");
expect(zhHant.of("Latn")).toBe("拉丁字母");
expect(en.of("Aaaa")).toBe("Aaaa");
expect(es419.of("Aaaa")).toBe("Aaaa");
@ -211,14 +211,14 @@ describe("correct behavior", () => {
test("option type dateTimeField, style long", () => {
// prettier-ignore
const data = [
{ dateTimeField: "era", en: "era", es419: "era", zhHant: "年代" },
{ dateTimeField: "era", en: "era", es419: "era", zhHant: "紀元" },
{ dateTimeField: "year", en: "year", es419: "año", zhHant: "年" },
{ dateTimeField: "quarter", en: "quarter", es419: "trimestre", zhHant: "季" },
{ dateTimeField: "quarter", en: "quarter", es419: "trimestre", zhHant: "季" },
{ dateTimeField: "month", en: "month", es419: "mes", zhHant: "月" },
{ dateTimeField: "weekOfYear", en: "week", es419: "semana", zhHant: "週" },
{ dateTimeField: "weekday", en: "day of the week", es419: "día de la semana", zhHant: "週天" },
{ dateTimeField: "weekday", en: "day of the week", es419: "día de la semana", zhHant: "星期" },
{ dateTimeField: "day", en: "day", es419: "día", zhHant: "日" },
{ dateTimeField: "dayPeriod", en: "AM/PM", es419: "a.m./p.m.", zhHant: "上午/下午" },
{ dateTimeField: "dayPeriod", en: "AM/PM", es419: "a.m./p.m.", zhHant: "時段" },
{ dateTimeField: "hour", en: "hour", es419: "hora", zhHant: "小時" },
{ dateTimeField: "minute", en: "minute", es419: "minuto", zhHant: "分鐘" },
{ dateTimeField: "second", en: "second", es419: "segundo", zhHant: "秒" },
@ -239,16 +239,16 @@ describe("correct behavior", () => {
test("option type dateTimeField, style short", () => {
// prettier-ignore
const data = [
{ dateTimeField: "era", en: "era", es419: "era", zhHant: "年代" },
{ dateTimeField: "era", en: "era", es419: "era", zhHant: "紀元" },
{ dateTimeField: "year", en: "yr.", es419: "a", zhHant: "年" },
{ dateTimeField: "quarter", en: "qtr.", es419: "trim.", zhHant: "季" },
{ dateTimeField: "quarter", en: "qtr.", es419: "trim.", zhHant: "季" },
{ dateTimeField: "month", en: "mo.", es419: "m", zhHant: "月" },
{ dateTimeField: "weekOfYear", en: "wk.", es419: "sem.", zhHant: "週" },
{ dateTimeField: "weekday", en: "day of wk.", es419: "día de sem.", zhHant: "週天" },
{ dateTimeField: "weekday", en: "day of wk.", es419: "día de sem.", zhHant: "星期" },
{ dateTimeField: "day", en: "day", es419: "d", zhHant: "日" },
{ dateTimeField: "dayPeriod", en: "AM/PM", es419: "a.m./p.m.", zhHant: "上午/下午" },
{ dateTimeField: "hour", en: "hr.", es419: "h", zhHant: "時" },
{ dateTimeField: "minute", en: "min.", es419: "min", zhHant: "分" },
{ dateTimeField: "dayPeriod", en: "AM/PM", es419: "a.m./p.m.", zhHant: "時段" },
{ dateTimeField: "hour", en: "hr.", es419: "h", zhHant: "時" },
{ dateTimeField: "minute", en: "min.", es419: "min", zhHant: "分" },
{ dateTimeField: "second", en: "sec.", es419: "s", zhHant: "秒" },
{ dateTimeField: "timeZoneName", en: "zone", es419: "zona", zhHant: "時區" },
];
@ -267,16 +267,16 @@ describe("correct behavior", () => {
test("option type dateTimeField, style narrow", () => {
// prettier-ignore
const data = [
{ dateTimeField: "era", en: "era", es419: "era", zhHant: "年代" },
{ dateTimeField: "era", en: "era", es419: "era", zhHant: "紀元" },
{ dateTimeField: "year", en: "yr", es419: "a", zhHant: "年" },
{ dateTimeField: "quarter", en: "qtr", es419: "trim.", zhHant: "季" },
{ dateTimeField: "quarter", en: "qtr", es419: "trim.", zhHant: "季" },
{ dateTimeField: "month", en: "mo", es419: "m", zhHant: "月" },
{ dateTimeField: "weekOfYear", en: "wk", es419: "sem.", zhHant: "週" },
{ dateTimeField: "weekday", en: "day of wk.", es419: "día de sem.", zhHant: "週天" },
{ dateTimeField: "weekday", en: "day of wk.", es419: "día de sem.", zhHant: "星期" },
{ dateTimeField: "day", en: "day", es419: "d", zhHant: "日" },
{ dateTimeField: "dayPeriod", en: "AM/PM", es419: "a.m./p.m.", zhHant: "上午/下午" },
{ dateTimeField: "hour", en: "hr", es419: "h", zhHant: "時" },
{ dateTimeField: "minute", en: "min", es419: "min", zhHant: "分" },
{ dateTimeField: "dayPeriod", en: "AM/PM", es419: "a.m./p.m.", zhHant: "時段" },
{ dateTimeField: "hour", en: "hr", es419: "h", zhHant: "時" },
{ dateTimeField: "minute", en: "min", es419: "min", zhHant: "分" },
{ dateTimeField: "second", en: "sec", es419: "s", zhHant: "秒" },
{ dateTimeField: "timeZoneName", en: "zone", es419: "zona", zhHant: "時區" },
];

View file

@ -24,9 +24,9 @@ describe("special values", () => {
expect(en.format(undefined)).toBe("NaN");
const ar = new Intl.NumberFormat("ar-u-nu-arab");
expect(ar.format()).toBe("ليس رقم");
expect(ar.format(NaN)).toBe("ليس رقم");
expect(ar.format(undefined)).toBe("ليس رقم");
expect(ar.format()).toBe("ليس رقمًا");
expect(ar.format(NaN)).toBe("ليس رقمًا");
expect(ar.format(undefined)).toBe("ليس رقمًا");
});
test("Infinity", () => {

View file

@ -20,9 +20,9 @@ describe("special values", () => {
expect(en.formatToParts(undefined)).toEqual([{ type: "nan", value: "NaN" }]);
const ar = new Intl.NumberFormat("ar-u-nu-arab");
expect(ar.formatToParts()).toEqual([{ type: "nan", value: "ليس رقم" }]);
expect(ar.formatToParts(NaN)).toEqual([{ type: "nan", value: "ليس رقم" }]);
expect(ar.formatToParts(undefined)).toEqual([{ type: "nan", value: "ليس رقم" }]);
expect(ar.formatToParts()).toEqual([{ type: "nan", value: "ليس رقمًا" }]);
expect(ar.formatToParts(NaN)).toEqual([{ type: "nan", value: "ليس رقمًا" }]);
expect(ar.formatToParts(undefined)).toEqual([{ type: "nan", value: "ليس رقمًا" }]);
});
test("Infinity", () => {

View file

@ -19,7 +19,7 @@ describe("special values", () => {
test("NaN", () => {
expect(NaN.toLocaleString()).toBe("NaN");
expect(NaN.toLocaleString("en")).toBe("NaN");
expect(NaN.toLocaleString("ar-u-nu-arab")).toBe("ليس رقم");
expect(NaN.toLocaleString("ar-u-nu-arab")).toBe("ليس رقمًا");
});
test("Infinity", () => {

View file

@ -15,7 +15,7 @@ namespace Unicode::IDNA {
// https://www.unicode.org/reports/tr46/#ToASCII
ErrorOr<String> to_ascii(Utf8View domain_name, ToAsciiOptions const& options)
{
u32 icu_options = UIDNA_DEFAULT;
u32 icu_options = 0;
if (options.check_bidi == CheckBidi::Yes)
icu_options |= UIDNA_CHECK_BIDI;

View file

@ -138,7 +138,13 @@ Optional<TimeZoneOffset> time_zone_offset(StringView time_zone, UnixDateTime tim
i32 raw_offset = 0;
i32 dst_offset = 0;
time_zone_data->time_zone().getOffset(static_cast<UDate>(time.milliseconds_since_epoch()), 0, raw_offset, dst_offset, status);
// We must clamp the time we provide to ICU such that the result of converting milliseconds to days fits in an i32.
// Further, that conversion must still be valid after applying DST offsets to the time we provide.
static constexpr auto min_time = (static_cast<UDate>(AK::NumericLimits<i32>::min()) + U_MILLIS_PER_DAY) * U_MILLIS_PER_DAY;
static constexpr auto max_time = (static_cast<UDate>(AK::NumericLimits<i32>::max()) - U_MILLIS_PER_DAY) * U_MILLIS_PER_DAY;
auto icu_time = clamp(static_cast<UDate>(time.milliseconds_since_epoch()), min_time, max_time);
time_zone_data->time_zone().getOffset(icu_time, 0, raw_offset, dst_offset, status);
if (icu_failure(status))
return {};

View file

@ -0,0 +1,49 @@
diff --git a/source/configure.ac b/source/configure.ac
index da4f170abfa..28dbff06a4c 100644
--- a/source/configure.ac
+++ b/source/configure.ac
@@ -6,16 +6,15 @@ AC_COPYRIGHT([ Copyright (C) 2016 and later: Unicode, Inc. and others. License &
# NOTE: please use 'autoreconf' to rebuild, otherwise 'aclocal && autoconf'.
-# Check for minimum autoconf version. Use exactly this version when rebuilding
-# 'aclocal.m4' and 'configure' for merging upstream, to avoid spurious diffs.
-AC_PREREQ([2.72])
+# Check for autoconf version
+AC_PREREQ(2.69)
# Process this file with autoreconf to produce a configure script
AC_INIT([ICU],
- m4_esyscmd_s([sed -n 's/^[ ]*#[ ]*define[ ]*U_ICU_VERSION[ ]*"\([^"]*\)".*/\1/p' common/unicode/uvernum.h]),
- [https://icu.unicode.org/bugs],
- [icu4c],
- [https://icu.unicode.org/])
+ m4_esyscmd_s([sed -n 's/^[ ]*#[ ]*define[ ]*U_ICU_VERSION[ ]*"\([^"]*\)".*/\1/p' "./common/unicode/uvernum.h]"),
+ [http://icu-project.org/bugs],
+ [International Components for Unicode],
+ [http://icu-project.org])
# Instruct Python to never write any byte code to the ICU source tree.
PYTHONDONTWRITEBYTECODE=1
@@ -660,13 +659,14 @@ fi
AC_SUBST(U_HAVE_DIRENT_H)
# Check for endianness
-AC_C_BIGENDIAN([
- U_IS_BIG_ENDIAN=1
- U_ENDIAN_CHAR="b"
- ], [
- U_IS_BIG_ENDIAN=0
- U_ENDIAN_CHAR="l"
- ], [], AC_MSG_ERROR([universal endianness not supported]))
+AC_C_BIGENDIAN()
+if test $ac_cv_c_bigendian = no; then
+U_IS_BIG_ENDIAN=0
+U_ENDIAN_CHAR="l"
+else
+U_IS_BIG_ENDIAN=1
+U_ENDIAN_CHAR="b"
+fi
AC_SUBST(U_IS_BIG_ENDIAN)
# Do various POSIX related checks

View file

@ -1,12 +0,0 @@
diff --git a/source/common/putil.cpp b/source/common/putil.cpp
index ab25f3b..94782f8 100644
--- a/source/common/putil.cpp
+++ b/source/common/putil.cpp
@@ -48,7 +48,6 @@
#if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
/* tzset isn't defined in strict ANSI on MinGW. */
-#undef __STRICT_ANSI__
#endif
/*

View file

@ -21,7 +21,7 @@ vcpkg_download_distfile(
ARCHIVE
URLS "https://github.com/unicode-org/icu/releases/download/release-${VERSION3}/icu4c-${VERSION2}-src.tgz"
FILENAME "icu4c-${VERSION2}-src.tgz"
SHA512 e6c7876c0f3d756f3a6969cad9a8909e535eeaac352f3a721338b9cbd56864bf7414469d29ec843462997815d2ca9d0dab06d38c37cdd4d8feb28ad04d8781b0
SHA512 b702ab62fb37a1574d5f4a768326d0f8fa30d9db5b015605b5f8215b5d8547f83d84880c586d3dcc7b6c76f8d47ef34e04b0f51baa55908f737024dd79a42a6c
)
vcpkg_extract_source_archive(SOURCE_PATH
@ -36,7 +36,7 @@ vcpkg_extract_source_archive(SOURCE_PATH
fix-win-build.patch
vcpkg-cross-data.patch
darwin-rpath.patch
mingw-strict-ansi.diff # backport of https://github.com/unicode-org/icu/pull/3003
ladybird-remove-autoconf272.patch # Reverts https://github.com/unicode-org/icu/commit/b542ae9d9123231d89ab0694e5809c9dfa45c8f7
)
vcpkg_find_acquire_program(PYTHON3)

View file

@ -1,7 +1,7 @@
{
"name": "icu",
"version": "74.2",
"port-version": 4,
"version": "76.1",
"port-version": 0,
"description": "Mature and widely used Unicode and localization library.",
"homepage": "https://icu.unicode.org/home",
"license": "ICU",

View file

@ -145,7 +145,7 @@
},
{
"name": "icu",
"version": "74.2#2"
"version": "76.1#0"
},
{
"name": "libjpeg-turbo",