Here is a C
implementation of both WideCharToMultiByte
and MultiByteToWideChar
.
In both cases I ensure to tack a null
character to the end of the destination buffers.
MultiByteToWideChar does not null-terminate an output string if the input string length is explicitly specified without a terminating null character.
And
WideCharToMultiByte does not null-terminate an output string if the input string length is explicitly specified without a terminating null character.
Even if someone specifies -1
and passes in a null
terminated string I still allocate enough space for an additional null
character because for my use case this was not an issue.
wchar_t* utf8_decode( const char* str, int nbytes ) {
int nchars = 0;
if ( ( nchars = MultiByteToWideChar( CP_UTF8,
MB_ERR_INVALID_CHARS, str, nbytes, NULL, 0 ) ) == 0 ) {
return NULL;
}
wchar_t* wstr = NULL;
if ( !( wstr = malloc( ( ( size_t )nchars + 1 ) * sizeof( wchar_t ) ) ) ) {
return NULL;
}
wstr[ nchars ] = L'\0';
if ( MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS,
str, nbytes, wstr, ( size_t )nchars ) == 0 ) {
free( wstr );
return NULL;
}
return wstr;
}
char* utf8_encode( const wchar_t* wstr, int nchars ) {
int nbytes = 0;
if ( ( nbytes = WideCharToMultiByte( CP_UTF8, WC_ERR_INVALID_CHARS,
wstr, nchars, NULL, 0, NULL, NULL ) ) == 0 ) {
return NULL;
}
char* str = NULL;
if ( !( str = malloc( ( size_t )nbytes + 1 ) ) ) {
return NULL;
}
str[ nbytes ] = '\0';
if ( WideCharToMultiByte( CP_UTF8, WC_ERR_INVALID_CHARS,
wstr, nchars, str, nbytes, NULL, NULL ) == 0 ) {
free( str );
return NULL;
}
return str;
}