LibC: Fix up mbtowc

One more proper implementation and one less FIXME.
This commit is contained in:
Tim Schumacher 2021-10-22 00:04:39 +02:00 committed by Brian Gianforcaro
parent 1bc4a0d822
commit 8df6955838
2 changed files with 56 additions and 8 deletions

View file

@ -539,3 +539,44 @@ TEST_CASE(mbrlen)
EXPECT_EQ(errno, EILSEQ);
state = {};
}
TEST_CASE(mbtowc)
{
int ret = 0;
wchar_t wc = 0;
// Ensure that we can parse normal ASCII characters.
ret = mbtowc(&wc, "Hello", 5);
EXPECT_EQ(ret, 1);
EXPECT_EQ(wc, 'H');
// Try two three-byte codepoints (™™), only one of which should be consumed.
ret = mbtowc(&wc, "\xe2\x84\xa2\xe2\x84\xa2", 6);
EXPECT_EQ(ret, 3);
EXPECT_EQ(wc, 0x2122);
// Try a null character, which should return 0.
ret = mbtowc(&wc, "\x00\x00", 2);
EXPECT_EQ(ret, 0);
EXPECT_EQ(wc, 0);
// Try an incomplete multibyte character.
ret = mbtowc(&wc, "\xe2\x84", 2);
EXPECT_EQ(ret, -1);
EXPECT_EQ(errno, EILSEQ);
// Ask if we support shift states and reset the internal state in the process.
ret = mbtowc(nullptr, nullptr, 2);
EXPECT_EQ(ret, 0); // We don't support shift states.
ret = mbtowc(nullptr, "\x00", 1);
EXPECT_EQ(ret, 0); // No error likely means that the state is working again.
// Try an invalid multibyte sequence.
ret = mbtowc(&wc, "\xff", 1);
EXPECT_EQ(ret, -1);
EXPECT_EQ(errno, EILSEQ);
// Try a successful conversion, but without target address.
ret = mbtowc(nullptr, "\xe2\x84\xa2\xe2\x84\xa2", 6);
EXPECT_EQ(ret, 3);
}

View file

@ -884,19 +884,26 @@ size_t mbstowcs(wchar_t* pwcs, const char* s, size_t n)
return mbsrtowcs(pwcs, &s, n, &state);
}
int mbtowc(wchar_t* wch, const char* data, [[maybe_unused]] size_t data_size)
int mbtowc(wchar_t* pwc, const char* s, size_t n)
{
// FIXME: This needs a real implementation.
if (wch && data) {
*wch = *data;
return 1;
static mbstate_t internal_state = {};
// Reset the internal state and ask whether we have shift states.
if (s == nullptr) {
internal_state = {};
return 0;
}
if (!wch && data) {
return 1;
size_t ret = mbrtowc(pwc, s, n, &internal_state);
// Incomplete characters get returned as illegal sequence.
// Internal state is undefined, so don't bother with resetting.
if (ret == -2ul) {
errno = EILSEQ;
return -1;
}
return 0;
return ret;
}
int wctomb(char* s, wchar_t wc)