From 7b628ea4d53c5b85954088570105764f20b6ff02 Mon Sep 17 00:00:00 2001 From: Dimitriy Ryazantcev Date: Mon, 27 Nov 2023 16:15:24 +0200 Subject: [PATCH] Win32: Simplify Unicode text input code --- src/video/windows/SDL_windowsevents.c | 77 ++++++--------------------- 1 file changed, 17 insertions(+), 60 deletions(-) diff --git a/src/video/windows/SDL_windowsevents.c b/src/video/windows/SDL_windowsevents.c index d5381dd77..dc56b0002 100644 --- a/src/video/windows/SDL_windowsevents.c +++ b/src/video/windows/SDL_windowsevents.c @@ -98,12 +98,6 @@ #ifndef IS_HIGH_SURROGATE #define IS_HIGH_SURROGATE(x) (((x) >= 0xd800) && ((x) <= 0xdbff)) #endif -#ifndef IS_LOW_SURROGATE -#define IS_LOW_SURROGATE(x) (((x) >= 0xdc00) && ((x) <= 0xdfff)) -#endif -#ifndef IS_SURROGATE_PAIR -#define IS_SURROGATE_PAIR(h, l) (IS_HIGH_SURROGATE(h) && IS_LOW_SURROGATE(l)) -#endif #ifndef USER_TIMER_MINIMUM #define USER_TIMER_MINIMUM 0x0000000A @@ -394,39 +388,6 @@ static void WIN_UpdateFocus(SDL_Window *window, SDL_bool expect_focus) } #endif /*!defined(__XBOXONE__) && !defined(__XBOXSERIES__)*/ -static BOOL WIN_ConvertUTF32toUTF8(UINT32 codepoint, char *text) -{ - if (codepoint <= 0x7F) { - text[0] = (char)codepoint; - text[1] = '\0'; - } else if (codepoint <= 0x7FF) { - text[0] = 0xC0 | (char)((codepoint >> 6) & 0x1F); - text[1] = 0x80 | (char)(codepoint & 0x3F); - text[2] = '\0'; - } else if (codepoint <= 0xFFFF) { - text[0] = 0xE0 | (char)((codepoint >> 12) & 0x0F); - text[1] = 0x80 | (char)((codepoint >> 6) & 0x3F); - text[2] = 0x80 | (char)(codepoint & 0x3F); - text[3] = '\0'; - } else if (codepoint <= 0x10FFFF) { - text[0] = 0xF0 | (char)((codepoint >> 18) & 0x0F); - text[1] = 0x80 | (char)((codepoint >> 12) & 0x3F); - text[2] = 0x80 | (char)((codepoint >> 6) & 0x3F); - text[3] = 0x80 | (char)(codepoint & 0x3F); - text[4] = '\0'; - } else { - return SDL_FALSE; - } - return SDL_TRUE; -} - -static BOOL WIN_ConvertUTF16toUTF8(UINT32 high_surrogate, UINT32 low_surrogate, char *text) -{ - const UINT32 SURROGATE_OFFSET = 0x10000U - (0xD800 << 10) - 0xDC00; - const UINT32 codepoint = (high_surrogate << 10) + low_surrogate + SURROGATE_OFFSET; - return WIN_ConvertUTF32toUTF8(codepoint, text); -} - static SDL_bool ShouldGenerateWindowCloseOnAltF4(void) { return !SDL_GetHintBoolean(SDL_HINT_WINDOWS_NO_CLOSE_ON_ALT_F4, SDL_FALSE); @@ -885,7 +846,7 @@ WIN_WindowProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam) returnCode = 1; } else { char text[5]; - if (WIN_ConvertUTF32toUTF8((UINT32)wParam, text)) { + if (SDL_UCS4ToUTF8((Uint32)wParam, text) != text) { SDL_SendKeyboardText(text); } returnCode = 0; @@ -893,31 +854,27 @@ WIN_WindowProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam) break; case WM_CHAR: - /* When a user enters a Unicode code point defined in the Basic Multilingual Plane, Windows sends a WM_CHAR - message with the code point encoded as UTF-16. When a user enters a Unicode code point from a Supplementary - Plane, Windows sends the code point in two separate WM_CHAR messages: The first message includes the UTF-16 - High Surrogate and the second the UTF-16 Low Surrogate. The High and Low Surrogates cannot be individually - converted to valid UTF-8, therefore, we must save the High Surrogate from the first WM_CHAR message and - concatenate it with the Low Surrogate from the second WM_CHAR message. At that point, we have a valid - UTF-16 surrogate pair ready to re-encode as UTF-8. */ + /* Characters outside Unicode Basic Multilingual Plane (BMP) + * are coded as so called "surrogate pair" in two separate character events. + * Cache high surrogate until next character event. */ if (IS_HIGH_SURROGATE(wParam)) { data->high_surrogate = (WCHAR)wParam; - } else if (IS_SURROGATE_PAIR(data->high_surrogate, wParam)) { - /* The code point is in a Supplementary Plane. - Here wParam is the Low Surrogate. */ - char text[5]; - if (WIN_ConvertUTF16toUTF8((UINT32)data->high_surrogate, (UINT32)wParam, text)) { - SDL_SendKeyboardText(text); - } - data->high_surrogate = 0; } else { - /* The code point is in the Basic Multilingual Plane. - It's numerically equal to UTF-32. */ - char text[5]; - if (WIN_ConvertUTF32toUTF8((UINT32)wParam, text)) { - SDL_SendKeyboardText(text); + WCHAR utf16[] = { + data->high_surrogate ? data->high_surrogate : (WCHAR)wParam, + data->high_surrogate ? (WCHAR)wParam : L'\0', + L'\0' + }; + + char utf8[5]; + int result = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, utf16, -1, utf8, sizeof(utf8), NULL, NULL); + if (result > 0) { + SDL_SendKeyboardText(utf8); } + + data->high_surrogate = L'\0'; } + returnCode = 0; break;