2021-05-20 12:56:38 +02:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
|
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2023-01-07 14:48:20 -05:00
|
|
|
#include <AK/Concepts.h>
|
|
|
|
#include <AK/Error.h>
|
2021-05-20 12:56:38 +02:00
|
|
|
#include <AK/Forward.h>
|
|
|
|
|
|
|
|
namespace AK::UnicodeUtils {
|
|
|
|
|
2023-10-28 17:06:51 -04:00
|
|
|
constexpr int bytes_to_store_code_point_in_utf8(u32 code_point)
|
|
|
|
{
|
|
|
|
if (code_point <= 0x7f)
|
|
|
|
return 1;
|
|
|
|
if (code_point <= 0x7ff)
|
|
|
|
return 2;
|
|
|
|
if (code_point <= 0xffff)
|
|
|
|
return 3;
|
|
|
|
if (code_point <= 0x10ffff)
|
|
|
|
return 4;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-10-04 16:59:13 +02:00
|
|
|
template<typename Callback>
|
|
|
|
[[nodiscard]] constexpr int code_point_to_utf8(u32 code_point, Callback callback)
|
|
|
|
{
|
|
|
|
if (code_point <= 0x7f) {
|
2023-03-07 14:28:21 +00:00
|
|
|
callback(static_cast<char>(code_point));
|
2021-10-04 16:59:13 +02:00
|
|
|
return 1;
|
|
|
|
} else if (code_point <= 0x07ff) {
|
2023-03-07 14:28:21 +00:00
|
|
|
callback(static_cast<char>(((code_point >> 6) & 0x1f) | 0xc0));
|
|
|
|
callback(static_cast<char>(((code_point >> 0) & 0x3f) | 0x80));
|
2021-10-04 16:59:13 +02:00
|
|
|
return 2;
|
|
|
|
} else if (code_point <= 0xffff) {
|
2023-03-07 14:28:21 +00:00
|
|
|
callback(static_cast<char>(((code_point >> 12) & 0x0f) | 0xe0));
|
|
|
|
callback(static_cast<char>(((code_point >> 6) & 0x3f) | 0x80));
|
|
|
|
callback(static_cast<char>(((code_point >> 0) & 0x3f) | 0x80));
|
2021-10-04 16:59:13 +02:00
|
|
|
return 3;
|
|
|
|
} else if (code_point <= 0x10ffff) {
|
2023-03-07 14:28:21 +00:00
|
|
|
callback(static_cast<char>(((code_point >> 18) & 0x07) | 0xf0));
|
|
|
|
callback(static_cast<char>(((code_point >> 12) & 0x3f) | 0x80));
|
|
|
|
callback(static_cast<char>(((code_point >> 6) & 0x3f) | 0x80));
|
|
|
|
callback(static_cast<char>(((code_point >> 0) & 0x3f) | 0x80));
|
2021-10-04 16:59:13 +02:00
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2023-01-07 14:48:20 -05:00
|
|
|
template<FallibleFunction<char> Callback>
|
|
|
|
[[nodiscard]] ErrorOr<int> try_code_point_to_utf8(u32 code_point, Callback&& callback)
|
|
|
|
{
|
|
|
|
if (code_point <= 0x7f) {
|
|
|
|
TRY(callback(static_cast<char>(code_point)));
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (code_point <= 0x07ff) {
|
|
|
|
TRY(callback(static_cast<char>((((code_point >> 6) & 0x1f) | 0xc0))));
|
|
|
|
TRY(callback(static_cast<char>((((code_point >> 0) & 0x3f) | 0x80))));
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
if (code_point <= 0xffff) {
|
|
|
|
TRY(callback(static_cast<char>((((code_point >> 12) & 0x0f) | 0xe0))));
|
|
|
|
TRY(callback(static_cast<char>((((code_point >> 6) & 0x3f) | 0x80))));
|
|
|
|
TRY(callback(static_cast<char>((((code_point >> 0) & 0x3f) | 0x80))));
|
|
|
|
return 3;
|
|
|
|
}
|
|
|
|
if (code_point <= 0x10ffff) {
|
|
|
|
TRY(callback(static_cast<char>((((code_point >> 18) & 0x07) | 0xf0))));
|
|
|
|
TRY(callback(static_cast<char>((((code_point >> 12) & 0x3f) | 0x80))));
|
|
|
|
TRY(callback(static_cast<char>((((code_point >> 6) & 0x3f) | 0x80))));
|
|
|
|
TRY(callback(static_cast<char>((((code_point >> 0) & 0x3f) | 0x80))));
|
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2021-05-20 12:56:38 +02:00
|
|
|
}
|