LibUnicode: Stop generating large UnicodeData hash map

The data in this hash map is now available by way of much smaller arrays
and is now unused.
This commit is contained in:
Timothy Flynn 2021-10-09 19:18:46 -04:00 committed by Andreas Kling
parent d83b262e64
commit 79707d83d3

View file

@ -125,12 +125,6 @@ struct UnicodeData {
NormalizationProps normalization_props;
};
static constexpr auto s_desired_fields = Array {
"canonical_combining_class"sv,
"simple_uppercase_mapping"sv,
"simple_lowercase_mapping"sv,
};
static Vector<u32> parse_code_point_list(StringView const& list)
{
Vector<u32> code_points;
@ -457,7 +451,6 @@ static void generate_unicode_data_header(Core::File& file, UnicodeData& unicode_
StringBuilder builder;
SourceGenerator generator { builder };
generator.set("casing_transform_size", String::number(unicode_data.largest_casing_transform_size));
generator.set("special_casing_size", String::number(unicode_data.largest_special_casing_size));
auto generate_enum = [&](StringView name, StringView default_, Vector<String> values, Vector<Alias> aliases = {}) {
quick_sort(values);
@ -529,44 +522,8 @@ struct SpecialCasing {
Condition condition { Condition::None };
};
struct UnicodeData {
u32 code_point;)~~~");
auto append_field = [&](StringView type, StringView name) {
if (!s_desired_fields.span().contains_slow(name))
return;
generator.set("type", type);
generator.set("name", name);
generator.append(R"~~~(
@type@ @name@;)~~~");
};
// Note: For compile-time performance, only primitive types are used.
append_field("char const*"sv, "name"sv);
append_field("u8"sv, "canonical_combining_class"sv);
append_field("char const*"sv, "bidi_class"sv);
append_field("char const*"sv, "decomposition_type"sv);
append_field("i8"sv, "numeric_value_decimal"sv);
append_field("i8"sv, "numeric_value_digit"sv);
append_field("i8"sv, "numeric_value_numeric"sv);
append_field("bool"sv, "bidi_mirrored"sv);
append_field("char const*"sv, "unicode_1_name"sv);
append_field("char const*"sv, "iso_comment"sv);
append_field("u32"sv, "simple_uppercase_mapping"sv);
append_field("u32"sv, "simple_lowercase_mapping"sv);
append_field("u32"sv, "simple_titlecase_mapping"sv);
generator.append(R"~~~(
SpecialCasing const* special_casing[@special_casing_size@] {};
u32 special_casing_size { 0 };
};
namespace Detail {
Optional<UnicodeData> unicode_data_for_code_point(u32 code_point);
u32 canonical_combining_class(u32 code_point);
u32 simple_uppercase_mapping(u32 code_point);
@ -598,7 +555,6 @@ static void generate_unicode_data_implementation(Core::File& file, UnicodeData c
generator.set("largest_special_casing_size", String::number(unicode_data.largest_special_casing_size));
generator.set("special_casing_size", String::number(unicode_data.special_casing.size()));
generator.set("code_point_data_size", String::number(unicode_data.code_point_data.size()));
generator.append(R"~~~(
#include <AK/Array.h>
@ -653,41 +609,6 @@ static constexpr Array<SpecialCasing, @special_casing_size@> s_special_casing {
generator.append(R"~~~(
} };
static constexpr Array<UnicodeData, @code_point_data_size@> s_unicode_data { {)~~~");
auto append_field = [&](StringView name, String value) {
if (!s_desired_fields.span().contains_slow(name))
return;
generator.set("value", move(value));
generator.append(", @value@");
};
for (auto const& data : unicode_data.code_point_data) {
generator.set("code_point", String::formatted("{:#x}", data.code_point));
generator.append(R"~~~(
{ @code_point@)~~~");
append_field("name", String::formatted("\"{}\"", data.name));
append_field("canonical_combining_class", String::number(data.canonical_combining_class));
append_field("bidi_class", String::formatted("\"{}\"", data.bidi_class));
append_field("decomposition_type", String::formatted("\"{}\"", data.decomposition_type));
append_field("numeric_value_decimal", String::number(data.numeric_value_decimal.value_or(-1)));
append_field("numeric_value_digit", String::number(data.numeric_value_digit.value_or(-1)));
append_field("numeric_value_numeric", String::number(data.numeric_value_numeric.value_or(-1)));
append_field("bidi_mirrored", String::formatted("{}", data.bidi_mirrored));
append_field("unicode_1_name", String::formatted("\"{}\"", data.unicode_1_name));
append_field("iso_comment", String::formatted("\"{}\"", data.iso_comment));
append_field("simple_uppercase_mapping", String::formatted("{:#x}", data.simple_uppercase_mapping.value_or(data.code_point)));
append_field("simple_lowercase_mapping", String::formatted("{:#x}", data.simple_lowercase_mapping.value_or(data.code_point)));
append_field("simple_titlecase_mapping", String::formatted("{:#x}", data.simple_titlecase_mapping.value_or(data.code_point)));
append_list_and_size(data.special_casing_indices, "&s_special_casing[{}]"sv);
generator.append(" },");
}
generator.append(R"~~~(
} };
struct CodePointMapping {
u32 code_point { 0 };
u32 mapping { 0 };
@ -840,52 +761,8 @@ static constexpr Array<Span<CodePointRange const>, @size@> @name@ { {)~~~");
append_prop_list("s_script_extensions"sv, "s_script_extension_{}"sv, unicode_data.script_extensions);
generator.append(R"~~~(
static HashMap<u32, UnicodeData const*> const& ensure_code_point_map()
{
static HashMap<u32, UnicodeData const*> code_point_to_data_map;
code_point_to_data_map.ensure_capacity(s_unicode_data.size());
for (auto const& unicode_data : s_unicode_data)
code_point_to_data_map.set(unicode_data.code_point, &unicode_data);
return code_point_to_data_map;
}
static Optional<u32> index_of_code_point_in_range(u32 code_point)
{)~~~");
for (auto const& range : unicode_data.code_point_ranges) {
generator.set("first", String::formatted("{:#x}", range.first));
generator.set("last", String::formatted("{:#x}", range.last));
generator.append(R"~~~(
if ((code_point > @first@) && (code_point < @last@))
return @first@;)~~~");
}
generator.append(R"~~~(
return {};
}
namespace Detail {
Optional<UnicodeData> unicode_data_for_code_point(u32 code_point)
{
static auto const& code_point_to_data_map = ensure_code_point_map();
VERIFY(is_unicode(code_point));
if (auto data = code_point_to_data_map.get(code_point); data.has_value())
return *(data.value());
if (auto index = index_of_code_point_in_range(code_point); index.has_value()) {
auto data_for_range = *(code_point_to_data_map.get(*index).value());
data_for_range.simple_uppercase_mapping = code_point;
data_for_range.simple_lowercase_mapping = code_point;
return data_for_range;
}
return {};
}
)~~~");
auto append_code_point_mapping_search = [&](StringView method, StringView mappings, StringView fallback) {