mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-01-22 17:24:48 -05:00
LibTextCodec: Implement euc-kr
encoder
Implements the `euc-kr` encoder, as specified by https://encoding.spec.whatwg.org/#euc-kr-encoder
This commit is contained in:
parent
72d0e3284b
commit
826292536c
Notes:
github-actions[bot]
2024-08-08 16:51:31 +00:00
Author: https://github.com/BenJilks Commit: https://github.com/LadybirdBrowser/ladybird/commit/826292536c0 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/975 Reviewed-by: https://github.com/shannonbooth Reviewed-by: https://github.com/skyrising Reviewed-by: https://github.com/tcl3 ✅
4 changed files with 62 additions and 1 deletions
|
@ -270,7 +270,7 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
|
|||
{ "big5"sv, prepare_table(data.get("big5"sv)->as_array(), GenerateAccessor::Yes) },
|
||||
{ "jis0208"sv, prepare_table(data.get("jis0208"sv)->as_array(), GenerateAccessor::Yes, GenerateInverseAccessor::Yes) },
|
||||
{ "jis0212"sv, prepare_table(data.get("jis0212"sv)->as_array(), GenerateAccessor::Yes) },
|
||||
{ "euc_kr"sv, prepare_table(data.get("euc-kr"sv)->as_array(), GenerateAccessor::Yes) },
|
||||
{ "euc_kr"sv, prepare_table(data.get("euc-kr"sv)->as_array(), GenerateAccessor::Yes, GenerateInverseAccessor::Yes) },
|
||||
{ "ibm866"sv, prepare_table(data.get("ibm866"sv)->as_array()) },
|
||||
{ "iso_8859_2"sv, prepare_table(data.get("iso-8859-2"sv)->as_array()) },
|
||||
{ "iso_8859_3"sv, prepare_table(data.get("iso-8859-3"sv)->as_array()) },
|
||||
|
|
|
@ -43,3 +43,21 @@ TEST_CASE(test_euc_jp_encoder)
|
|||
EXPECT(processed_bytes[3] == 0xA5);
|
||||
EXPECT(processed_bytes[4] == 0xC4);
|
||||
}
|
||||
|
||||
TEST_CASE(test_euc_kr_encoder)
|
||||
{
|
||||
TextCodec::EUCKREncoder encoder;
|
||||
// U+B29F Hangul Syllable Neulh
|
||||
// U+7C97 CJK Unified Ideograph-7C97
|
||||
auto test_string = "\U0000B29F\U00007C97"sv;
|
||||
|
||||
Vector<u8> processed_bytes;
|
||||
MUST(encoder.process(Utf8View(test_string), [&](u8 byte) {
|
||||
return processed_bytes.try_append(byte);
|
||||
}));
|
||||
EXPECT(processed_bytes.size() == 4);
|
||||
EXPECT(processed_bytes[0] == 0x88);
|
||||
EXPECT(processed_bytes[1] == 0x6B);
|
||||
EXPECT(processed_bytes[2] == 0xF0);
|
||||
EXPECT(processed_bytes[3] == 0xD8);
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@ namespace TextCodec {
|
|||
namespace {
|
||||
UTF8Encoder s_utf8_encoder;
|
||||
EUCJPEncoder s_euc_jp_encoder;
|
||||
EUCKREncoder s_euc_kr_encoder;
|
||||
}
|
||||
|
||||
Optional<Encoder&> encoder_for_exact_name(StringView encoding)
|
||||
|
@ -23,6 +24,8 @@ Optional<Encoder&> encoder_for_exact_name(StringView encoding)
|
|||
return s_utf8_encoder;
|
||||
if (encoding.equals_ignoring_ascii_case("euc-jp"sv))
|
||||
return s_euc_jp_encoder;
|
||||
if (encoding.equals_ignoring_ascii_case("euc-kr"sv))
|
||||
return s_euc_kr_encoder;
|
||||
dbgln("TextCodec: No encoder implemented for encoding '{}'", encoding);
|
||||
return {};
|
||||
}
|
||||
|
@ -100,4 +103,39 @@ ErrorOr<void> EUCJPEncoder::process(Utf8View input, Function<ErrorOr<void>(u8)>
|
|||
return {};
|
||||
}
|
||||
|
||||
// https://encoding.spec.whatwg.org/#euc-kr-encoder
|
||||
ErrorOr<void> EUCKREncoder::process(Utf8View input, Function<ErrorOr<void>(u8)> on_byte)
|
||||
{
|
||||
for (u32 item : input) {
|
||||
// 1. If code point is end-of-queue, return finished.
|
||||
|
||||
// 2. If code point is an ASCII code point, return a byte whose value is code point.
|
||||
if (is_ascii(item)) {
|
||||
TRY(on_byte(static_cast<u8>(item)));
|
||||
continue;
|
||||
}
|
||||
|
||||
// 3. Let pointer be the index pointer for code point in index EUC-KR.
|
||||
auto pointer = code_point_euc_kr_index(item);
|
||||
|
||||
// 4. If pointer is null, return error with code point.
|
||||
if (!pointer.has_value()) {
|
||||
// TODO: Report error.
|
||||
continue;
|
||||
}
|
||||
|
||||
// 5. Let lead be pointer / 190 + 0x81.
|
||||
auto lead = *pointer / 190 + 0x81;
|
||||
|
||||
// 6. Let trail be pointer % 190 + 0x41.
|
||||
auto trail = *pointer % 190 + 0x41;
|
||||
|
||||
// 7. Return two bytes whose values are lead and trail.
|
||||
TRY(on_byte(static_cast<u8>(lead)));
|
||||
TRY(on_byte(static_cast<u8>(trail)));
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -29,6 +29,11 @@ public:
|
|||
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte) override;
|
||||
};
|
||||
|
||||
class EUCKREncoder final : public Encoder {
|
||||
public:
|
||||
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte) override;
|
||||
};
|
||||
|
||||
Optional<Encoder&> encoder_for_exact_name(StringView encoding);
|
||||
Optional<Encoder&> encoder_for(StringView label);
|
||||
|
||||
|
|
Loading…
Reference in a new issue