Protect against bad RCT2 strings

This commit is contained in:
Ted John 2018-04-19 22:07:12 +01:00
parent a7e0f30212
commit 6d76d76f98
2 changed files with 25 additions and 6 deletions

View file

@ -29,21 +29,31 @@ static std::wstring DecodeToWideChar(const std::string_view& src)
{
std::wstring decoded;
decoded.reserve(src.size());
for (auto it = src.begin(); it != src.end(); it++)
for (auto it = src.begin(); it != src.end(); )
{
uint8_t c = *it;
uint8_t c = *it++;
if (c == 255)
{
// Push next two characters
uint8 a = 0;
uint8 b = 0;
if (++it != src.end())
if (it != src.end())
{
a = *it;
if (++it != src.end())
a = *it++;
if (it != src.end())
{
b = *it;
b = *it++;
}
else
{
// 2nd byte for double byte character is missing
break;
}
}
else
{
// 1st byte for double byte character is missing
break;
}
wchar_t cp = (a << 8) | b;

View file

@ -42,6 +42,15 @@ TEST_F(Localisation, RCT2_to_UTF8_PL)
ASSERT_EQ(expected, actual);
}
TEST_F(Localisation, RCT2_to_UTF8_ZH_TW_PREMATURE_END)
{
// This string can be found in BATFL.DAT, the last double byte character is missing its second byte.
auto input = StringFromHex("ffa470ffabacffa8aeffbdf8ffa662ffc54bffb944ffa457ffaeb6ffb0caffb76effc2");
auto expected = u8"小型車輛在鐵道上振動搖";
auto actual = rct2_to_utf8(input, RCT2_LANGUAGE_ID_CHINESE_TRADITIONAL);
ASSERT_EQ(expected, actual);
}
///////////////////////////////////////////////////////////////////////////////
// Tests for utf8_to_rct2
///////////////////////////////////////////////////////////////////////////////