From 6a564376fc011167c90a4bd79b408f9a52684e0c Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Sat, 18 Jan 2025 13:29:28 -0500 Subject: [PATCH] Meta+LibUnicode+LibJS: Upgrade to ICU 76.1 This updates our local ICU overlay port to use ICU 76.1. This includes Unicode 16 and CLDR 46. Upstream vcpkg is not able to supply versions past 74 yet due to various dependency issues, but we are able to use this version ourselves. The overlay port now includes a patch to revert ICU's dependence on autoconf 2.72 for now, as this version is not yet available on all systems. All of the test changes were cross-referenced with Firefox to ensure correctness. --- .../DateTimeFormat.prototype.format.js | 12 ++--- ...ateTimeFormat.prototype.resolvedOptions.js | 6 +-- .../DisplayNames/DisplayNames.prototype.of.js | 34 ++++++------- .../NumberFormat.prototype.format.js | 6 +-- .../NumberFormat.prototype.formatToParts.js | 6 +-- .../Number/Number.prototype.toLocaleString.js | 2 +- Libraries/LibUnicode/IDNA.cpp | 2 +- Libraries/LibUnicode/TimeZone.cpp | 8 ++- .../icu/ladybird-remove-autoconf272.patch | 49 +++++++++++++++++++ .../overlay-ports/icu/mingw-strict-ansi.diff | 12 ----- .../vcpkg/overlay-ports/icu/portfile.cmake | 4 +- Meta/CMake/vcpkg/overlay-ports/icu/vcpkg.json | 4 +- vcpkg.json | 2 +- 13 files changed, 95 insertions(+), 52 deletions(-) create mode 100644 Meta/CMake/vcpkg/overlay-ports/icu/ladybird-remove-autoconf272.patch delete mode 100644 Meta/CMake/vcpkg/overlay-ports/icu/mingw-strict-ansi.diff diff --git a/Libraries/LibJS/Tests/builtins/Intl/DateTimeFormat/DateTimeFormat.prototype.format.js b/Libraries/LibJS/Tests/builtins/Intl/DateTimeFormat/DateTimeFormat.prototype.format.js index 2fa055bf1c0..0300fb3090a 100644 --- a/Libraries/LibJS/Tests/builtins/Intl/DateTimeFormat/DateTimeFormat.prototype.format.js +++ b/Libraries/LibJS/Tests/builtins/Intl/DateTimeFormat/DateTimeFormat.prototype.format.js @@ -308,9 +308,9 @@ describe("day", () => { describe("dayPeriod", () => { // prettier-ignore const data = [ - { dayPeriod: "narrow", en0: "5 in the afternoon", en1: "7 in the morning", ar0: "٥ بعد الظهر", ar1: "٧ صباحًا", as0: "অপৰাহ্ন ৫", as1: "পূৰ্বাহ্ন ৭"}, - { dayPeriod: "short", en0: "5 in the afternoon", en1: "7 in the morning", ar0: "٥ بعد الظهر", ar1: "٧ ص", as0: "অপৰাহ্ন ৫", as1: "পূৰ্বাহ্ন ৭"}, - { dayPeriod: "long", en0: "5 in the afternoon", en1: "7 in the morning", ar0: "٥ بعد الظهر", ar1: "٧ صباحًا", as0: "অপৰাহ্ন ৫", as1: "পূৰ্বাহ্ন ৭"}, + { dayPeriod: "narrow", en0: "5 in the afternoon", en1: "7 in the morning", ar0: "٥ بعد الظهر", ar1: "٧ صباحًا", as0: "pm ৫", as1: "am ৭"}, + { dayPeriod: "short", en0: "5 in the afternoon", en1: "7 in the morning", ar0: "٥ بعد الظهر", ar1: "٧ ص", as0: "PM ৫", as1: "AM ৭"}, + { dayPeriod: "long", en0: "5 in the afternoon", en1: "7 in the morning", ar0: "٥ بعد الظهر", ar1: "٧ صباحًا", as0: "PM ৫", as1: "AM ৭"}, ]; test("all", () => { @@ -404,9 +404,9 @@ describe("dayPeriod", () => { test("dayPeriod without time", () => { // prettier-ignore const data = [ - { dayPeriod: "narrow", en0: "in the afternoon", en1: "in the morning", ar0: "بعد الظهر", ar1: "صباحًا", as0: "অপৰাহ্ন", as1: "পূৰ্বাহ্ন"}, - { dayPeriod: "short", en0: "in the afternoon", en1: "in the morning", ar0: "بعد الظهر", ar1: "ص", as0: "অপৰাহ্ন", as1: "পূৰ্বাহ্ন"}, - { dayPeriod: "long", en0: "in the afternoon", en1: "in the morning", ar0: "بعد الظهر", ar1: "صباحًا", as0: "অপৰাহ্ন", as1: "পূৰ্বাহ্ন"}, + { dayPeriod: "narrow", en0: "in the afternoon", en1: "in the morning", ar0: "بعد الظهر", ar1: "صباحًا", as0: "pm", as1: "am"}, + { dayPeriod: "short", en0: "in the afternoon", en1: "in the morning", ar0: "بعد الظهر", ar1: "ص", as0: "PM", as1: "AM"}, + { dayPeriod: "long", en0: "in the afternoon", en1: "in the morning", ar0: "بعد الظهر", ar1: "صباحًا", as0: "PM", as1: "AM"}, ]; data.forEach(d => { diff --git a/Libraries/LibJS/Tests/builtins/Intl/DateTimeFormat/DateTimeFormat.prototype.resolvedOptions.js b/Libraries/LibJS/Tests/builtins/Intl/DateTimeFormat/DateTimeFormat.prototype.resolvedOptions.js index 50e9f1056f7..d3b866e1811 100644 --- a/Libraries/LibJS/Tests/builtins/Intl/DateTimeFormat/DateTimeFormat.prototype.resolvedOptions.js +++ b/Libraries/LibJS/Tests/builtins/Intl/DateTimeFormat/DateTimeFormat.prototype.resolvedOptions.js @@ -124,13 +124,13 @@ describe("correct behavior", () => { }); test("timeZone", () => { - const en = new Intl.DateTimeFormat("en", { timeZone: "EST5EDT" }); - expect(en.resolvedOptions().timeZone).toBe("EST5EDT"); + const en = new Intl.DateTimeFormat("en", { timeZone: "America/New_York" }); + expect(en.resolvedOptions().timeZone).toBe("America/New_York"); const el = new Intl.DateTimeFormat("el", { timeZone: "UTC" }); expect(el.resolvedOptions().timeZone).toBe("UTC"); - ["UTC", "EST5EDT", "+01:02", "-20:30", "+00:00"].forEach(timeZone => { + ["UTC", "America/New_York", "+01:02", "-20:30", "+00:00"].forEach(timeZone => { const en = new Intl.DateTimeFormat("en", { timeZone: timeZone }); expect(en.resolvedOptions().timeZone).toBe(timeZone); diff --git a/Libraries/LibJS/Tests/builtins/Intl/DisplayNames/DisplayNames.prototype.of.js b/Libraries/LibJS/Tests/builtins/Intl/DisplayNames/DisplayNames.prototype.of.js index 47fff9594da..ee831aa8311 100644 --- a/Libraries/LibJS/Tests/builtins/Intl/DisplayNames/DisplayNames.prototype.of.js +++ b/Libraries/LibJS/Tests/builtins/Intl/DisplayNames/DisplayNames.prototype.of.js @@ -124,7 +124,7 @@ describe("correct behavior", () => { expect(es419.of("Latn")).toBe("latín"); const zhHant = new Intl.DisplayNames(["zh-Hant"], { type: "script" }); - expect(zhHant.of("Latn")).toBe("拉丁文"); + expect(zhHant.of("Latn")).toBe("拉丁字母"); expect(en.of("Aaaa")).toBe("Aaaa"); expect(es419.of("Aaaa")).toBe("Aaaa"); @@ -211,14 +211,14 @@ describe("correct behavior", () => { test("option type dateTimeField, style long", () => { // prettier-ignore const data = [ - { dateTimeField: "era", en: "era", es419: "era", zhHant: "年代" }, + { dateTimeField: "era", en: "era", es419: "era", zhHant: "紀元" }, { dateTimeField: "year", en: "year", es419: "año", zhHant: "年" }, - { dateTimeField: "quarter", en: "quarter", es419: "trimestre", zhHant: "季" }, + { dateTimeField: "quarter", en: "quarter", es419: "trimestre", zhHant: "季度" }, { dateTimeField: "month", en: "month", es419: "mes", zhHant: "月" }, { dateTimeField: "weekOfYear", en: "week", es419: "semana", zhHant: "週" }, - { dateTimeField: "weekday", en: "day of the week", es419: "día de la semana", zhHant: "週天" }, + { dateTimeField: "weekday", en: "day of the week", es419: "día de la semana", zhHant: "星期" }, { dateTimeField: "day", en: "day", es419: "día", zhHant: "日" }, - { dateTimeField: "dayPeriod", en: "AM/PM", es419: "a.m./p.m.", zhHant: "上午/下午" }, + { dateTimeField: "dayPeriod", en: "AM/PM", es419: "a.m./p.m.", zhHant: "時段" }, { dateTimeField: "hour", en: "hour", es419: "hora", zhHant: "小時" }, { dateTimeField: "minute", en: "minute", es419: "minuto", zhHant: "分鐘" }, { dateTimeField: "second", en: "second", es419: "segundo", zhHant: "秒" }, @@ -239,16 +239,16 @@ describe("correct behavior", () => { test("option type dateTimeField, style short", () => { // prettier-ignore const data = [ - { dateTimeField: "era", en: "era", es419: "era", zhHant: "年代" }, + { dateTimeField: "era", en: "era", es419: "era", zhHant: "紀元" }, { dateTimeField: "year", en: "yr.", es419: "a", zhHant: "年" }, - { dateTimeField: "quarter", en: "qtr.", es419: "trim.", zhHant: "季" }, + { dateTimeField: "quarter", en: "qtr.", es419: "trim.", zhHant: "季度" }, { dateTimeField: "month", en: "mo.", es419: "m", zhHant: "月" }, { dateTimeField: "weekOfYear", en: "wk.", es419: "sem.", zhHant: "週" }, - { dateTimeField: "weekday", en: "day of wk.", es419: "día de sem.", zhHant: "週天" }, + { dateTimeField: "weekday", en: "day of wk.", es419: "día de sem.", zhHant: "星期" }, { dateTimeField: "day", en: "day", es419: "d", zhHant: "日" }, - { dateTimeField: "dayPeriod", en: "AM/PM", es419: "a.m./p.m.", zhHant: "上午/下午" }, - { dateTimeField: "hour", en: "hr.", es419: "h", zhHant: "小時" }, - { dateTimeField: "minute", en: "min.", es419: "min", zhHant: "分鐘" }, + { dateTimeField: "dayPeriod", en: "AM/PM", es419: "a.m./p.m.", zhHant: "時段" }, + { dateTimeField: "hour", en: "hr.", es419: "h", zhHant: "時" }, + { dateTimeField: "minute", en: "min.", es419: "min", zhHant: "分" }, { dateTimeField: "second", en: "sec.", es419: "s", zhHant: "秒" }, { dateTimeField: "timeZoneName", en: "zone", es419: "zona", zhHant: "時區" }, ]; @@ -267,16 +267,16 @@ describe("correct behavior", () => { test("option type dateTimeField, style narrow", () => { // prettier-ignore const data = [ - { dateTimeField: "era", en: "era", es419: "era", zhHant: "年代" }, + { dateTimeField: "era", en: "era", es419: "era", zhHant: "紀元" }, { dateTimeField: "year", en: "yr", es419: "a", zhHant: "年" }, - { dateTimeField: "quarter", en: "qtr", es419: "trim.", zhHant: "季" }, + { dateTimeField: "quarter", en: "qtr", es419: "trim.", zhHant: "季度" }, { dateTimeField: "month", en: "mo", es419: "m", zhHant: "月" }, { dateTimeField: "weekOfYear", en: "wk", es419: "sem.", zhHant: "週" }, - { dateTimeField: "weekday", en: "day of wk.", es419: "día de sem.", zhHant: "週天" }, + { dateTimeField: "weekday", en: "day of wk.", es419: "día de sem.", zhHant: "星期" }, { dateTimeField: "day", en: "day", es419: "d", zhHant: "日" }, - { dateTimeField: "dayPeriod", en: "AM/PM", es419: "a.m./p.m.", zhHant: "上午/下午" }, - { dateTimeField: "hour", en: "hr", es419: "h", zhHant: "小時" }, - { dateTimeField: "minute", en: "min", es419: "min", zhHant: "分鐘" }, + { dateTimeField: "dayPeriod", en: "AM/PM", es419: "a.m./p.m.", zhHant: "時段" }, + { dateTimeField: "hour", en: "hr", es419: "h", zhHant: "時" }, + { dateTimeField: "minute", en: "min", es419: "min", zhHant: "分" }, { dateTimeField: "second", en: "sec", es419: "s", zhHant: "秒" }, { dateTimeField: "timeZoneName", en: "zone", es419: "zona", zhHant: "時區" }, ]; diff --git a/Libraries/LibJS/Tests/builtins/Intl/NumberFormat/NumberFormat.prototype.format.js b/Libraries/LibJS/Tests/builtins/Intl/NumberFormat/NumberFormat.prototype.format.js index 6775ac80c92..8897351e5d3 100644 --- a/Libraries/LibJS/Tests/builtins/Intl/NumberFormat/NumberFormat.prototype.format.js +++ b/Libraries/LibJS/Tests/builtins/Intl/NumberFormat/NumberFormat.prototype.format.js @@ -24,9 +24,9 @@ describe("special values", () => { expect(en.format(undefined)).toBe("NaN"); const ar = new Intl.NumberFormat("ar-u-nu-arab"); - expect(ar.format()).toBe("ليس رقم"); - expect(ar.format(NaN)).toBe("ليس رقم"); - expect(ar.format(undefined)).toBe("ليس رقم"); + expect(ar.format()).toBe("ليس رقمًا"); + expect(ar.format(NaN)).toBe("ليس رقمًا"); + expect(ar.format(undefined)).toBe("ليس رقمًا"); }); test("Infinity", () => { diff --git a/Libraries/LibJS/Tests/builtins/Intl/NumberFormat/NumberFormat.prototype.formatToParts.js b/Libraries/LibJS/Tests/builtins/Intl/NumberFormat/NumberFormat.prototype.formatToParts.js index 0f6b6792e16..98ac2aef314 100644 --- a/Libraries/LibJS/Tests/builtins/Intl/NumberFormat/NumberFormat.prototype.formatToParts.js +++ b/Libraries/LibJS/Tests/builtins/Intl/NumberFormat/NumberFormat.prototype.formatToParts.js @@ -20,9 +20,9 @@ describe("special values", () => { expect(en.formatToParts(undefined)).toEqual([{ type: "nan", value: "NaN" }]); const ar = new Intl.NumberFormat("ar-u-nu-arab"); - expect(ar.formatToParts()).toEqual([{ type: "nan", value: "ليس رقم" }]); - expect(ar.formatToParts(NaN)).toEqual([{ type: "nan", value: "ليس رقم" }]); - expect(ar.formatToParts(undefined)).toEqual([{ type: "nan", value: "ليس رقم" }]); + expect(ar.formatToParts()).toEqual([{ type: "nan", value: "ليس رقمًا" }]); + expect(ar.formatToParts(NaN)).toEqual([{ type: "nan", value: "ليس رقمًا" }]); + expect(ar.formatToParts(undefined)).toEqual([{ type: "nan", value: "ليس رقمًا" }]); }); test("Infinity", () => { diff --git a/Libraries/LibJS/Tests/builtins/Number/Number.prototype.toLocaleString.js b/Libraries/LibJS/Tests/builtins/Number/Number.prototype.toLocaleString.js index 4338b29d68c..b481dbe4c8c 100644 --- a/Libraries/LibJS/Tests/builtins/Number/Number.prototype.toLocaleString.js +++ b/Libraries/LibJS/Tests/builtins/Number/Number.prototype.toLocaleString.js @@ -19,7 +19,7 @@ describe("special values", () => { test("NaN", () => { expect(NaN.toLocaleString()).toBe("NaN"); expect(NaN.toLocaleString("en")).toBe("NaN"); - expect(NaN.toLocaleString("ar-u-nu-arab")).toBe("ليس رقم"); + expect(NaN.toLocaleString("ar-u-nu-arab")).toBe("ليس رقمًا"); }); test("Infinity", () => { diff --git a/Libraries/LibUnicode/IDNA.cpp b/Libraries/LibUnicode/IDNA.cpp index d80b77a8460..b6859f9de3e 100644 --- a/Libraries/LibUnicode/IDNA.cpp +++ b/Libraries/LibUnicode/IDNA.cpp @@ -15,7 +15,7 @@ namespace Unicode::IDNA { // https://www.unicode.org/reports/tr46/#ToASCII ErrorOr to_ascii(Utf8View domain_name, ToAsciiOptions const& options) { - u32 icu_options = UIDNA_DEFAULT; + u32 icu_options = 0; if (options.check_bidi == CheckBidi::Yes) icu_options |= UIDNA_CHECK_BIDI; diff --git a/Libraries/LibUnicode/TimeZone.cpp b/Libraries/LibUnicode/TimeZone.cpp index 382f37fce0e..d1bc1fff42a 100644 --- a/Libraries/LibUnicode/TimeZone.cpp +++ b/Libraries/LibUnicode/TimeZone.cpp @@ -138,7 +138,13 @@ Optional time_zone_offset(StringView time_zone, UnixDateTime tim i32 raw_offset = 0; i32 dst_offset = 0; - time_zone_data->time_zone().getOffset(static_cast(time.milliseconds_since_epoch()), 0, raw_offset, dst_offset, status); + // We must clamp the time we provide to ICU such that the result of converting milliseconds to days fits in an i32. + // Further, that conversion must still be valid after applying DST offsets to the time we provide. + static constexpr auto min_time = (static_cast(AK::NumericLimits::min()) + U_MILLIS_PER_DAY) * U_MILLIS_PER_DAY; + static constexpr auto max_time = (static_cast(AK::NumericLimits::max()) - U_MILLIS_PER_DAY) * U_MILLIS_PER_DAY; + auto icu_time = clamp(static_cast(time.milliseconds_since_epoch()), min_time, max_time); + + time_zone_data->time_zone().getOffset(icu_time, 0, raw_offset, dst_offset, status); if (icu_failure(status)) return {}; diff --git a/Meta/CMake/vcpkg/overlay-ports/icu/ladybird-remove-autoconf272.patch b/Meta/CMake/vcpkg/overlay-ports/icu/ladybird-remove-autoconf272.patch new file mode 100644 index 00000000000..b9b74add6fd --- /dev/null +++ b/Meta/CMake/vcpkg/overlay-ports/icu/ladybird-remove-autoconf272.patch @@ -0,0 +1,49 @@ +diff --git a/source/configure.ac b/source/configure.ac +index da4f170abfa..28dbff06a4c 100644 +--- a/source/configure.ac ++++ b/source/configure.ac +@@ -6,16 +6,15 @@ AC_COPYRIGHT([ Copyright (C) 2016 and later: Unicode, Inc. and others. License & + + # NOTE: please use 'autoreconf' to rebuild, otherwise 'aclocal && autoconf'. + +-# Check for minimum autoconf version. Use exactly this version when rebuilding +-# 'aclocal.m4' and 'configure' for merging upstream, to avoid spurious diffs. +-AC_PREREQ([2.72]) ++# Check for autoconf version ++AC_PREREQ(2.69) + + # Process this file with autoreconf to produce a configure script + AC_INIT([ICU], +- m4_esyscmd_s([sed -n 's/^[ ]*#[ ]*define[ ]*U_ICU_VERSION[ ]*"\([^"]*\)".*/\1/p' common/unicode/uvernum.h]), +- [https://icu.unicode.org/bugs], +- [icu4c], +- [https://icu.unicode.org/]) ++ m4_esyscmd_s([sed -n 's/^[ ]*#[ ]*define[ ]*U_ICU_VERSION[ ]*"\([^"]*\)".*/\1/p' "./common/unicode/uvernum.h]"), ++ [http://icu-project.org/bugs], ++ [International Components for Unicode], ++ [http://icu-project.org]) + + # Instruct Python to never write any byte code to the ICU source tree. + PYTHONDONTWRITEBYTECODE=1 +@@ -660,13 +659,14 @@ fi + AC_SUBST(U_HAVE_DIRENT_H) + + # Check for endianness +-AC_C_BIGENDIAN([ +- U_IS_BIG_ENDIAN=1 +- U_ENDIAN_CHAR="b" +- ], [ +- U_IS_BIG_ENDIAN=0 +- U_ENDIAN_CHAR="l" +- ], [], AC_MSG_ERROR([universal endianness not supported])) ++AC_C_BIGENDIAN() ++if test $ac_cv_c_bigendian = no; then ++U_IS_BIG_ENDIAN=0 ++U_ENDIAN_CHAR="l" ++else ++U_IS_BIG_ENDIAN=1 ++U_ENDIAN_CHAR="b" ++fi + AC_SUBST(U_IS_BIG_ENDIAN) + + # Do various POSIX related checks diff --git a/Meta/CMake/vcpkg/overlay-ports/icu/mingw-strict-ansi.diff b/Meta/CMake/vcpkg/overlay-ports/icu/mingw-strict-ansi.diff deleted file mode 100644 index ea6de18bdd3..00000000000 --- a/Meta/CMake/vcpkg/overlay-ports/icu/mingw-strict-ansi.diff +++ /dev/null @@ -1,12 +0,0 @@ -diff --git a/source/common/putil.cpp b/source/common/putil.cpp -index ab25f3b..94782f8 100644 ---- a/source/common/putil.cpp -+++ b/source/common/putil.cpp -@@ -48,7 +48,6 @@ - - #if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__ - /* tzset isn't defined in strict ANSI on MinGW. */ --#undef __STRICT_ANSI__ - #endif - - /* diff --git a/Meta/CMake/vcpkg/overlay-ports/icu/portfile.cmake b/Meta/CMake/vcpkg/overlay-ports/icu/portfile.cmake index 26015d26585..9bde72aae66 100644 --- a/Meta/CMake/vcpkg/overlay-ports/icu/portfile.cmake +++ b/Meta/CMake/vcpkg/overlay-ports/icu/portfile.cmake @@ -21,7 +21,7 @@ vcpkg_download_distfile( ARCHIVE URLS "https://github.com/unicode-org/icu/releases/download/release-${VERSION3}/icu4c-${VERSION2}-src.tgz" FILENAME "icu4c-${VERSION2}-src.tgz" - SHA512 e6c7876c0f3d756f3a6969cad9a8909e535eeaac352f3a721338b9cbd56864bf7414469d29ec843462997815d2ca9d0dab06d38c37cdd4d8feb28ad04d8781b0 + SHA512 b702ab62fb37a1574d5f4a768326d0f8fa30d9db5b015605b5f8215b5d8547f83d84880c586d3dcc7b6c76f8d47ef34e04b0f51baa55908f737024dd79a42a6c ) vcpkg_extract_source_archive(SOURCE_PATH @@ -36,7 +36,7 @@ vcpkg_extract_source_archive(SOURCE_PATH fix-win-build.patch vcpkg-cross-data.patch darwin-rpath.patch - mingw-strict-ansi.diff # backport of https://github.com/unicode-org/icu/pull/3003 + ladybird-remove-autoconf272.patch # Reverts https://github.com/unicode-org/icu/commit/b542ae9d9123231d89ab0694e5809c9dfa45c8f7 ) vcpkg_find_acquire_program(PYTHON3) diff --git a/Meta/CMake/vcpkg/overlay-ports/icu/vcpkg.json b/Meta/CMake/vcpkg/overlay-ports/icu/vcpkg.json index 23c96f05e10..145a8f49b23 100644 --- a/Meta/CMake/vcpkg/overlay-ports/icu/vcpkg.json +++ b/Meta/CMake/vcpkg/overlay-ports/icu/vcpkg.json @@ -1,7 +1,7 @@ { "name": "icu", - "version": "74.2", - "port-version": 4, + "version": "76.1", + "port-version": 0, "description": "Mature and widely used Unicode and localization library.", "homepage": "https://icu.unicode.org/home", "license": "ICU", diff --git a/vcpkg.json b/vcpkg.json index c498a765abb..2b9ffda7c3e 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -145,7 +145,7 @@ }, { "name": "icu", - "version": "74.2#2" + "version": "76.1#0" }, { "name": "libjpeg-turbo",