mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-23 09:51:57 -05:00
LibUnicode: Dynamically load the generated UnicodeData symbols
The generated data for libunicodedata.so is quite large, and loading it is a price paid by nearly every application by way of depending on LibRegex. In order to defer this cost until an application actually uses one of the surrounding APIs, dynamically load the generated symbols. To be able to load the symbols dynamically, the generated methods must have demangled names. Typically, this is accomplished with `extern "C"` blocks. The clang toolchain complains about this here because the types returned from the generators are strictly C++ types. So to demangle the names, we use the asm() compiler directive to manually define a symbol name; the caveat is that we *must* be sure the symbols are unique. As an extra precaution, we prefix each symbol name with "unicode_". For more details, see: https://gcc.gnu.org/onlinedocs/gcc/Asm-Labels.html This symbol loader used in this implementation provides the additional benefit of removing many [[maybe_unused]] attributes from the LibUnicode methods. Internally, if ENABLE_UNICODE_DATABASE_DOWNLOAD is OFF, the loader is able to stub out the function pointers it returns. Note that as of this commit, LibUnicode is still directly linked against LibUnicodeData. This commit is just a first step towards removing that.
This commit is contained in:
parent
749d5ebd68
commit
3fd53baa25
7 changed files with 256 additions and 101 deletions
|
@ -427,6 +427,7 @@ if (BUILD_LAGOM)
|
||||||
SOURCES ${LIBUNICODE_SOURCES} ${UNICODE_DATA_SOURCES}
|
SOURCES ${LIBUNICODE_SOURCES} ${UNICODE_DATA_SOURCES}
|
||||||
)
|
)
|
||||||
target_compile_definitions(LagomUnicode PRIVATE ENABLE_UNICODE_DATA=$<BOOL:${ENABLE_UNICODE_DATABASE_DOWNLOAD}>)
|
target_compile_definitions(LagomUnicode PRIVATE ENABLE_UNICODE_DATA=$<BOOL:${ENABLE_UNICODE_DATABASE_DOWNLOAD}>)
|
||||||
|
target_link_libraries(LagomUnicode -ldl)
|
||||||
|
|
||||||
# WASM
|
# WASM
|
||||||
file(GLOB LIBWASM_SOURCES CONFIGURE_DEPENDS "../../Userland/Libraries/LibWasm/*/*.cpp")
|
file(GLOB LIBWASM_SOURCES CONFIGURE_DEPENDS "../../Userland/Libraries/LibWasm/*/*.cpp")
|
||||||
|
|
|
@ -574,8 +574,6 @@ enum class @name@ : @underlying@ {)~~~");
|
||||||
generator.append(R"~~~(
|
generator.append(R"~~~(
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <AK/Optional.h>
|
|
||||||
#include <AK/Span.h>
|
|
||||||
#include <AK/Types.h>
|
#include <AK/Types.h>
|
||||||
#include <LibUnicode/Forward.h>
|
#include <LibUnicode/Forward.h>
|
||||||
#include <LibUnicode/UnicodeLocale.h>
|
#include <LibUnicode/UnicodeLocale.h>
|
||||||
|
@ -605,28 +603,6 @@ struct SpecialCasing {
|
||||||
Condition condition { Condition::None };
|
Condition condition { Condition::None };
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace Detail {
|
|
||||||
|
|
||||||
Optional<String> code_point_display_name(u32 code_point);
|
|
||||||
|
|
||||||
u32 canonical_combining_class(u32 code_point);
|
|
||||||
|
|
||||||
u32 simple_uppercase_mapping(u32 code_point);
|
|
||||||
u32 simple_lowercase_mapping(u32 code_point);
|
|
||||||
Span<SpecialCasing const* const> special_case_mapping(u32 code_point);
|
|
||||||
|
|
||||||
bool code_point_has_general_category(u32 code_point, GeneralCategory general_category);
|
|
||||||
Optional<GeneralCategory> general_category_from_string(StringView general_category);
|
|
||||||
|
|
||||||
bool code_point_has_property(u32 code_point, Property property);
|
|
||||||
Optional<Property> property_from_string(StringView property);
|
|
||||||
|
|
||||||
bool code_point_has_script(u32 code_point, Script script);
|
|
||||||
bool code_point_has_script_extension(u32 code_point, Script script);
|
|
||||||
Optional<Script> script_from_string(StringView script);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
)~~~");
|
)~~~");
|
||||||
|
|
||||||
|
@ -645,11 +621,13 @@ static void generate_unicode_data_implementation(Core::File& file, UnicodeData c
|
||||||
#include <AK/Array.h>
|
#include <AK/Array.h>
|
||||||
#include <AK/BinarySearch.h>
|
#include <AK/BinarySearch.h>
|
||||||
#include <AK/CharacterTypes.h>
|
#include <AK/CharacterTypes.h>
|
||||||
|
#include <AK/Optional.h>
|
||||||
|
#include <AK/Span.h>
|
||||||
#include <AK/String.h>
|
#include <AK/String.h>
|
||||||
#include <AK/StringView.h>
|
#include <AK/StringView.h>
|
||||||
#include <LibUnicode/UnicodeData.h>
|
#include <LibUnicode/UnicodeData.h>
|
||||||
|
|
||||||
namespace Unicode {
|
namespace Unicode::Detail {
|
||||||
)~~~");
|
)~~~");
|
||||||
|
|
||||||
auto append_list_and_size = [&](auto const& list, StringView format) {
|
auto append_list_and_size = [&](auto const& list, StringView format) {
|
||||||
|
@ -873,8 +851,7 @@ static constexpr Array<CodePointName, @code_point_display_names_size@> s_code_po
|
||||||
)~~~");
|
)~~~");
|
||||||
|
|
||||||
generator.append(R"~~~(
|
generator.append(R"~~~(
|
||||||
namespace Detail {
|
Optional<String> code_point_display_name(u32 code_point) asm("unicode_code_point_display_name");
|
||||||
|
|
||||||
Optional<String> code_point_display_name(u32 code_point)
|
Optional<String> code_point_display_name(u32 code_point)
|
||||||
{
|
{
|
||||||
if (auto const* entry = binary_search(s_code_point_display_names, code_point, nullptr, CodePointNameComparator {})) {
|
if (auto const* entry = binary_search(s_code_point_display_names, code_point, nullptr, CodePointNameComparator {})) {
|
||||||
|
@ -893,6 +870,7 @@ Optional<String> code_point_display_name(u32 code_point)
|
||||||
generator.set("mappings", mappings);
|
generator.set("mappings", mappings);
|
||||||
generator.set("fallback", fallback);
|
generator.set("fallback", fallback);
|
||||||
generator.append(R"~~~(
|
generator.append(R"~~~(
|
||||||
|
u32 @method@(u32 code_point) asm("unicode_@method@");
|
||||||
u32 @method@(u32 code_point)
|
u32 @method@(u32 code_point)
|
||||||
{
|
{
|
||||||
auto const* mapping = binary_search(@mappings@, code_point, nullptr, CodePointComparator<CodePointMapping> {});
|
auto const* mapping = binary_search(@mappings@, code_point, nullptr, CodePointComparator<CodePointMapping> {});
|
||||||
|
@ -906,6 +884,7 @@ u32 @method@(u32 code_point)
|
||||||
append_code_point_mapping_search("simple_lowercase_mapping"sv, "s_lowercase_mappings"sv, "code_point"sv);
|
append_code_point_mapping_search("simple_lowercase_mapping"sv, "s_lowercase_mappings"sv, "code_point"sv);
|
||||||
|
|
||||||
generator.append(R"~~~(
|
generator.append(R"~~~(
|
||||||
|
Span<SpecialCasing const* const> special_case_mapping(u32 code_point) asm("unicode_special_case_mapping");
|
||||||
Span<SpecialCasing const* const> special_case_mapping(u32 code_point)
|
Span<SpecialCasing const* const> special_case_mapping(u32 code_point)
|
||||||
{
|
{
|
||||||
auto const* mapping = binary_search(s_special_case_mappings, code_point, nullptr, CodePointComparator<SpecialCaseMapping> {});
|
auto const* mapping = binary_search(s_special_case_mappings, code_point, nullptr, CodePointComparator<SpecialCaseMapping> {});
|
||||||
|
@ -921,6 +900,7 @@ Span<SpecialCasing const* const> special_case_mapping(u32 code_point)
|
||||||
generator.set("enum_snake", enum_snake);
|
generator.set("enum_snake", enum_snake);
|
||||||
generator.set("collection_name", collection_name);
|
generator.set("collection_name", collection_name);
|
||||||
generator.append(R"~~~(
|
generator.append(R"~~~(
|
||||||
|
bool code_point_has_@enum_snake@(u32 code_point, @enum_title@ @enum_snake@) asm("unicode_code_point_has_@enum_snake@");
|
||||||
bool code_point_has_@enum_snake@(u32 code_point, @enum_title@ @enum_snake@)
|
bool code_point_has_@enum_snake@(u32 code_point, @enum_title@ @enum_snake@)
|
||||||
{
|
{
|
||||||
auto index = static_cast<@enum_title@UnderlyingType>(@enum_snake@);
|
auto index = static_cast<@enum_title@UnderlyingType>(@enum_snake@);
|
||||||
|
@ -941,7 +921,7 @@ bool code_point_has_@enum_snake@(u32 code_point, @enum_title@ @enum_snake@)
|
||||||
for (auto const& alias : aliases)
|
for (auto const& alias : aliases)
|
||||||
hashes.set(alias.alias.hash(), alias.alias);
|
hashes.set(alias.alias.hash(), alias.alias);
|
||||||
|
|
||||||
generate_value_from_string(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes));
|
generate_value_from_string_for_dynamic_loading(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes));
|
||||||
};
|
};
|
||||||
|
|
||||||
append_prop_search("GeneralCategory"sv, "general_category"sv, "s_general_categories"sv);
|
append_prop_search("GeneralCategory"sv, "general_category"sv, "s_general_categories"sv);
|
||||||
|
@ -956,8 +936,6 @@ bool code_point_has_@enum_snake@(u32 code_point, @enum_title@ @enum_snake@)
|
||||||
|
|
||||||
generator.append(R"~~~(
|
generator.append(R"~~~(
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
)~~~");
|
)~~~");
|
||||||
|
|
||||||
VERIFY(file.write(generator.as_string_view()));
|
VERIFY(file.write(generator.as_string_view()));
|
||||||
|
|
|
@ -345,6 +345,60 @@ Optional<@return_type@> @method_name@(StringView key)
|
||||||
)~~~");
|
)~~~");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This is a temporary duplicate of generate_value_from_string() until all generators support dynamic loading.
|
||||||
|
template<typename ValueType>
|
||||||
|
void generate_value_from_string_for_dynamic_loading(SourceGenerator& generator, StringView method_name_format, StringView value_type, StringView value_name, HashValueMap<ValueType> hashes, Optional<StringView> return_type = {}, StringView return_format = "{}"sv)
|
||||||
|
{
|
||||||
|
ensure_from_string_types_are_generated(generator);
|
||||||
|
|
||||||
|
generator.set("method_name", String::formatted(method_name_format, value_name));
|
||||||
|
generator.set("value_type", value_type);
|
||||||
|
generator.set("value_name", value_name);
|
||||||
|
generator.set("return_type", return_type.has_value() ? *return_type : value_type);
|
||||||
|
generator.set("size", String::number(hashes.size()));
|
||||||
|
|
||||||
|
generator.append(R"~~~(
|
||||||
|
Optional<@return_type@> @method_name@(StringView key) asm("unicode_@method_name@");
|
||||||
|
Optional<@return_type@> @method_name@(StringView key)
|
||||||
|
{
|
||||||
|
constexpr Array<HashValuePair<@value_type@>, @size@> hash_pairs { {
|
||||||
|
)~~~");
|
||||||
|
|
||||||
|
auto hash_keys = hashes.keys();
|
||||||
|
quick_sort(hash_keys);
|
||||||
|
|
||||||
|
constexpr size_t max_values_per_row = 10;
|
||||||
|
size_t values_in_current_row = 0;
|
||||||
|
|
||||||
|
for (auto hash_key : hash_keys) {
|
||||||
|
if (values_in_current_row++ > 0)
|
||||||
|
generator.append(" ");
|
||||||
|
|
||||||
|
if constexpr (IsIntegral<ValueType>)
|
||||||
|
generator.set("value"sv, String::number(hashes.get(hash_key).value()));
|
||||||
|
else
|
||||||
|
generator.set("value"sv, String::formatted("{}::{}", value_type, hashes.get(hash_key).value()));
|
||||||
|
|
||||||
|
generator.set("hash"sv, String::number(hash_key));
|
||||||
|
generator.append("{ @hash@U, @value@ },"sv);
|
||||||
|
|
||||||
|
if (values_in_current_row == max_values_per_row) {
|
||||||
|
generator.append("\n ");
|
||||||
|
values_in_current_row = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
generator.set("return_statement", String::formatted(return_format, "value->value"sv));
|
||||||
|
generator.append(R"~~~(
|
||||||
|
} };
|
||||||
|
|
||||||
|
if (auto const* value = binary_search(hash_pairs, key.hash(), nullptr, HashValueComparator<@value_type@> {}))
|
||||||
|
return @return_statement@;
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
)~~~");
|
||||||
|
}
|
||||||
|
|
||||||
template<typename IdentifierFormatter>
|
template<typename IdentifierFormatter>
|
||||||
void generate_enum(SourceGenerator& generator, IdentifierFormatter&& format_identifier, StringView name, StringView default_, Vector<String>& values, Vector<Alias> aliases = {})
|
void generate_enum(SourceGenerator& generator, IdentifierFormatter&& format_identifier, StringView name, StringView default_, Vector<String>& values, Vector<Alias> aliases = {})
|
||||||
{
|
{
|
||||||
|
|
|
@ -13,6 +13,7 @@ set(SOURCES
|
||||||
DateTimeFormat.cpp
|
DateTimeFormat.cpp
|
||||||
Locale.cpp
|
Locale.cpp
|
||||||
NumberFormat.cpp
|
NumberFormat.cpp
|
||||||
|
UnicodeSymbols.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
serenity_lib(LibUnicode unicode)
|
serenity_lib(LibUnicode unicode)
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include <AK/Utf8View.h>
|
#include <AK/Utf8View.h>
|
||||||
#include <LibUnicode/CharacterTypes.h>
|
#include <LibUnicode/CharacterTypes.h>
|
||||||
#include <LibUnicode/Locale.h>
|
#include <LibUnicode/Locale.h>
|
||||||
|
#include <LibUnicode/UnicodeSymbols.h>
|
||||||
|
|
||||||
#if ENABLE_UNICODE_DATA
|
#if ENABLE_UNICODE_DATA
|
||||||
# include <LibUnicode/UnicodeData.h>
|
# include <LibUnicode/UnicodeData.h>
|
||||||
|
@ -23,6 +24,18 @@ namespace Unicode {
|
||||||
|
|
||||||
#if ENABLE_UNICODE_DATA
|
#if ENABLE_UNICODE_DATA
|
||||||
|
|
||||||
|
static u32 canonical_combining_class(u32 code_point)
|
||||||
|
{
|
||||||
|
static auto const& symbols = Detail::Symbols::ensure_loaded();
|
||||||
|
return symbols.canonical_combining_class(code_point);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Span<Unicode::SpecialCasing const* const> special_case_mapping(u32 code_point)
|
||||||
|
{
|
||||||
|
static auto const& symbols = Detail::Symbols::ensure_loaded();
|
||||||
|
return symbols.special_case_mapping(code_point);
|
||||||
|
}
|
||||||
|
|
||||||
static bool is_after_uppercase_i(Utf8View const& string, size_t index)
|
static bool is_after_uppercase_i(Utf8View const& string, size_t index)
|
||||||
{
|
{
|
||||||
// There is an uppercase I before C, and there is no intervening combining character class 230 (Above) or 0.
|
// There is an uppercase I before C, and there is no intervening combining character class 230 (Above) or 0.
|
||||||
|
@ -36,11 +49,11 @@ static bool is_after_uppercase_i(Utf8View const& string, size_t index)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 canonical_combining_class = Detail::canonical_combining_class(code_point);
|
u32 combining_class = canonical_combining_class(code_point);
|
||||||
|
|
||||||
if (canonical_combining_class == 0)
|
if (combining_class == 0)
|
||||||
found_uppercase_i = false;
|
found_uppercase_i = false;
|
||||||
else if (canonical_combining_class == 230)
|
else if (combining_class == 230)
|
||||||
found_uppercase_i = false;
|
found_uppercase_i = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,11 +73,11 @@ static bool is_after_soft_dotted_code_point(Utf8View const& string, size_t index
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 canonical_combining_class = Detail::canonical_combining_class(code_point);
|
u32 combining_class = canonical_combining_class(code_point);
|
||||||
|
|
||||||
if (canonical_combining_class == 0)
|
if (combining_class == 0)
|
||||||
found_soft_dotted_code_point = false;
|
found_soft_dotted_code_point = false;
|
||||||
else if (canonical_combining_class == 230)
|
else if (combining_class == 230)
|
||||||
found_soft_dotted_code_point = false;
|
found_soft_dotted_code_point = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -119,11 +132,11 @@ static bool is_followed_by_combining_class_above(Utf8View const& string, size_t
|
||||||
: Utf8View {};
|
: Utf8View {};
|
||||||
|
|
||||||
for (auto code_point : following_view) {
|
for (auto code_point : following_view) {
|
||||||
u32 canonical_combining_class = Detail::canonical_combining_class(code_point);
|
u32 combining_class = canonical_combining_class(code_point);
|
||||||
|
|
||||||
if (canonical_combining_class == 0)
|
if (combining_class == 0)
|
||||||
return false;
|
return false;
|
||||||
if (canonical_combining_class == 230)
|
if (combining_class == 230)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -142,11 +155,11 @@ static bool is_followed_by_combining_dot_above(Utf8View const& string, size_t in
|
||||||
if (code_point == 0x307)
|
if (code_point == 0x307)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
u32 canonical_combining_class = Detail::canonical_combining_class(code_point);
|
u32 combining_class = canonical_combining_class(code_point);
|
||||||
|
|
||||||
if (canonical_combining_class == 0)
|
if (combining_class == 0)
|
||||||
return false;
|
return false;
|
||||||
if (canonical_combining_class == 230)
|
if (combining_class == 230)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -162,7 +175,7 @@ static SpecialCasing const* find_matching_special_case(u32 code_point, Utf8View
|
||||||
requested_locale = *maybe_locale;
|
requested_locale = *maybe_locale;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto special_casings = Detail::special_case_mapping(code_point);
|
auto special_casings = special_case_mapping(code_point);
|
||||||
|
|
||||||
for (auto const* special_casing : special_casings) {
|
for (auto const* special_casing : special_casings) {
|
||||||
if (special_casing->locale != Locale::None && special_casing->locale != requested_locale)
|
if (special_casing->locale != Locale::None && special_casing->locale != requested_locale)
|
||||||
|
@ -206,29 +219,20 @@ static SpecialCasing const* find_matching_special_case(u32 code_point, Utf8View
|
||||||
|
|
||||||
u32 to_unicode_lowercase(u32 code_point)
|
u32 to_unicode_lowercase(u32 code_point)
|
||||||
{
|
{
|
||||||
#if ENABLE_UNICODE_DATA
|
static auto const& symbols = Detail::Symbols::ensure_loaded();
|
||||||
return Detail::simple_lowercase_mapping(code_point);
|
return symbols.simple_lowercase_mapping(code_point);
|
||||||
#else
|
|
||||||
return AK::to_ascii_lowercase(code_point);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 to_unicode_uppercase(u32 code_point)
|
u32 to_unicode_uppercase(u32 code_point)
|
||||||
{
|
{
|
||||||
#if ENABLE_UNICODE_DATA
|
static auto const& symbols = Detail::Symbols::ensure_loaded();
|
||||||
return Detail::simple_uppercase_mapping(code_point);
|
return symbols.simple_uppercase_mapping(code_point);
|
||||||
#else
|
|
||||||
return AK::to_ascii_uppercase(code_point);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional<String> code_point_display_name([[maybe_unused]] u32 code_point)
|
Optional<String> code_point_display_name(u32 code_point)
|
||||||
{
|
{
|
||||||
#if ENABLE_UNICODE_DATA
|
static auto const& symbols = Detail::Symbols::ensure_loaded();
|
||||||
return Detail::code_point_display_name(code_point);
|
return symbols.code_point_display_name(code_point);
|
||||||
#else
|
|
||||||
return {};
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
String to_unicode_lowercase_full(StringView string, [[maybe_unused]] Optional<StringView> locale)
|
String to_unicode_lowercase_full(StringView string, [[maybe_unused]] Optional<StringView> locale)
|
||||||
|
@ -289,40 +293,28 @@ String to_unicode_uppercase_full(StringView string, [[maybe_unused]] Optional<St
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional<GeneralCategory> general_category_from_string([[maybe_unused]] StringView general_category)
|
Optional<GeneralCategory> general_category_from_string(StringView general_category)
|
||||||
{
|
{
|
||||||
#if ENABLE_UNICODE_DATA
|
static auto const& symbols = Detail::Symbols::ensure_loaded();
|
||||||
return Detail::general_category_from_string(general_category);
|
return symbols.general_category_from_string(general_category);
|
||||||
#else
|
|
||||||
return {};
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool code_point_has_general_category([[maybe_unused]] u32 code_point, [[maybe_unused]] GeneralCategory general_category)
|
bool code_point_has_general_category(u32 code_point, GeneralCategory general_category)
|
||||||
{
|
{
|
||||||
#if ENABLE_UNICODE_DATA
|
static auto const& symbols = Detail::Symbols::ensure_loaded();
|
||||||
return Detail::code_point_has_general_category(code_point, general_category);
|
return symbols.code_point_has_general_category(code_point, general_category);
|
||||||
#else
|
|
||||||
return {};
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional<Property> property_from_string([[maybe_unused]] StringView property)
|
Optional<Property> property_from_string(StringView property)
|
||||||
{
|
{
|
||||||
#if ENABLE_UNICODE_DATA
|
static auto const& symbols = Detail::Symbols::ensure_loaded();
|
||||||
return Detail::property_from_string(property);
|
return symbols.property_from_string(property);
|
||||||
#else
|
|
||||||
return {};
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool code_point_has_property([[maybe_unused]] u32 code_point, [[maybe_unused]] Property property)
|
bool code_point_has_property(u32 code_point, Property property)
|
||||||
{
|
{
|
||||||
#if ENABLE_UNICODE_DATA
|
static auto const& symbols = Detail::Symbols::ensure_loaded();
|
||||||
return Detail::code_point_has_property(code_point, property);
|
return symbols.code_point_has_property(code_point, property);
|
||||||
#else
|
|
||||||
return false;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_ecma262_property([[maybe_unused]] Property property)
|
bool is_ecma262_property([[maybe_unused]] Property property)
|
||||||
|
@ -392,31 +384,22 @@ bool is_ecma262_property([[maybe_unused]] Property property)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional<Script> script_from_string([[maybe_unused]] StringView script)
|
Optional<Script> script_from_string(StringView script)
|
||||||
{
|
{
|
||||||
#if ENABLE_UNICODE_DATA
|
static auto const& symbols = Detail::Symbols::ensure_loaded();
|
||||||
return Detail::script_from_string(script);
|
return symbols.script_from_string(script);
|
||||||
#else
|
|
||||||
return {};
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool code_point_has_script([[maybe_unused]] u32 code_point, [[maybe_unused]] Script script)
|
bool code_point_has_script(u32 code_point, Script script)
|
||||||
{
|
{
|
||||||
#if ENABLE_UNICODE_DATA
|
static auto const& symbols = Detail::Symbols::ensure_loaded();
|
||||||
return Detail::code_point_has_script(code_point, script);
|
return symbols.code_point_has_script(code_point, script);
|
||||||
#else
|
|
||||||
return false;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool code_point_has_script_extension([[maybe_unused]] u32 code_point, [[maybe_unused]] Script script)
|
bool code_point_has_script_extension(u32 code_point, Script script)
|
||||||
{
|
{
|
||||||
#if ENABLE_UNICODE_DATA
|
static auto const& symbols = Detail::Symbols::ensure_loaded();
|
||||||
return Detail::code_point_has_script_extension(code_point, script);
|
return symbols.code_point_has_script_extension(code_point, script);
|
||||||
#else
|
|
||||||
return false;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
94
Userland/Libraries/LibUnicode/UnicodeSymbols.cpp
Normal file
94
Userland/Libraries/LibUnicode/UnicodeSymbols.cpp
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <LibUnicode/UnicodeSymbols.h>
|
||||||
|
|
||||||
|
#if ENABLE_UNICODE_DATA
|
||||||
|
# if defined(__serenity__)
|
||||||
|
# include <LibDl/dlfcn.h>
|
||||||
|
# include <LibDl/dlfcn_integration.h>
|
||||||
|
# else
|
||||||
|
# include <dlfcn.h>
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
# include <AK/Function.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace Unicode::Detail {
|
||||||
|
|
||||||
|
#if !ENABLE_UNICODE_DATA
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
struct FunctionStub;
|
||||||
|
|
||||||
|
template<typename ReturnType, typename... ParameterTypes>
|
||||||
|
struct FunctionStub<Function<ReturnType(ParameterTypes...)>> {
|
||||||
|
static constexpr auto make_stub()
|
||||||
|
{
|
||||||
|
return [](ParameterTypes...) -> ReturnType { return {}; };
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// This loader supports 3 modes:
|
||||||
|
//
|
||||||
|
// 1. When the Unicode data generators are enabled, and the target is Serenity, the symbols are
|
||||||
|
// dynamically loaded from the shared library containing them.
|
||||||
|
//
|
||||||
|
// 2. When the Unicode data generators are enabled, and the target is Lagom, the symbols are
|
||||||
|
// dynamically loaded from the main program.
|
||||||
|
//
|
||||||
|
// 3. When the Unicode data generators are disabled, the symbols are stubbed out to empty lambdas.
|
||||||
|
// This allows callers to remain agnostic as to whether the generators are enabled.
|
||||||
|
Symbols const& Symbols::ensure_loaded()
|
||||||
|
{
|
||||||
|
static Symbols symbols {};
|
||||||
|
|
||||||
|
static bool initialized = false;
|
||||||
|
if (initialized)
|
||||||
|
return symbols;
|
||||||
|
|
||||||
|
#if ENABLE_UNICODE_DATA
|
||||||
|
# if defined(__serenity__)
|
||||||
|
static void* libunicodedata = MUST(__dlopen("libunicodedata.so.serenity", RTLD_NOW));
|
||||||
|
|
||||||
|
auto load_symbol = [&]<typename T>(T& dest, char const* name) {
|
||||||
|
dest = reinterpret_cast<T>(MUST(__dlsym(libunicodedata, name)));
|
||||||
|
};
|
||||||
|
# else
|
||||||
|
static void* libunicodedata = dlopen(nullptr, RTLD_NOW);
|
||||||
|
VERIFY(libunicodedata);
|
||||||
|
|
||||||
|
auto load_symbol = [&]<typename T>(T& dest, char const* name) {
|
||||||
|
dest = reinterpret_cast<T>(dlsym(libunicodedata, name));
|
||||||
|
VERIFY(dest);
|
||||||
|
};
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
auto load_symbol = []<typename T>(T& dest, char const*) {
|
||||||
|
dest = +FunctionStub<Function<RemovePointer<T>>>::make_stub();
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
load_symbol(symbols.code_point_display_name, "unicode_code_point_display_name");
|
||||||
|
load_symbol(symbols.canonical_combining_class, "unicode_canonical_combining_class");
|
||||||
|
load_symbol(symbols.simple_uppercase_mapping, "unicode_simple_uppercase_mapping");
|
||||||
|
load_symbol(symbols.simple_lowercase_mapping, "unicode_simple_lowercase_mapping");
|
||||||
|
load_symbol(symbols.special_case_mapping, "unicode_special_case_mapping");
|
||||||
|
load_symbol(symbols.general_category_from_string, "unicode_general_category_from_string");
|
||||||
|
load_symbol(symbols.code_point_has_general_category, "unicode_code_point_has_general_category");
|
||||||
|
load_symbol(symbols.property_from_string, "unicode_property_from_string");
|
||||||
|
load_symbol(symbols.code_point_has_property, "unicode_code_point_has_property");
|
||||||
|
load_symbol(symbols.script_from_string, "unicode_script_from_string");
|
||||||
|
load_symbol(symbols.code_point_has_script, "unicode_code_point_has_script");
|
||||||
|
load_symbol(symbols.code_point_has_script_extension, "unicode_code_point_has_script_extension");
|
||||||
|
|
||||||
|
initialized = true;
|
||||||
|
return symbols;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
44
Userland/Libraries/LibUnicode/UnicodeSymbols.h
Normal file
44
Userland/Libraries/LibUnicode/UnicodeSymbols.h
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <AK/Optional.h>
|
||||||
|
#include <AK/String.h>
|
||||||
|
#include <AK/StringView.h>
|
||||||
|
#include <AK/Types.h>
|
||||||
|
#include <LibUnicode/Forward.h>
|
||||||
|
|
||||||
|
namespace Unicode::Detail {
|
||||||
|
|
||||||
|
struct Symbols {
|
||||||
|
static Symbols const& ensure_loaded();
|
||||||
|
|
||||||
|
// Loaded from UnicodeData.cpp:
|
||||||
|
|
||||||
|
Optional<String> (*code_point_display_name)(u32) { nullptr };
|
||||||
|
|
||||||
|
u32 (*canonical_combining_class)(u32 code_point) { nullptr };
|
||||||
|
|
||||||
|
u32 (*simple_uppercase_mapping)(u32) { nullptr };
|
||||||
|
u32 (*simple_lowercase_mapping)(u32) { nullptr };
|
||||||
|
Span<SpecialCasing const* const> (*special_case_mapping)(u32 code_point) { nullptr };
|
||||||
|
|
||||||
|
Optional<GeneralCategory> (*general_category_from_string)(StringView) { nullptr };
|
||||||
|
bool (*code_point_has_general_category)(u32, GeneralCategory) { nullptr };
|
||||||
|
|
||||||
|
Optional<Property> (*property_from_string)(StringView) { nullptr };
|
||||||
|
bool (*code_point_has_property)(u32, Property) { nullptr };
|
||||||
|
|
||||||
|
Optional<Script> (*script_from_string)(StringView) { nullptr };
|
||||||
|
bool (*code_point_has_script)(u32, Script) { nullptr };
|
||||||
|
bool (*code_point_has_script_extension)(u32, Script) { nullptr };
|
||||||
|
|
||||||
|
private:
|
||||||
|
Symbols() = default;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in a new issue