stdlib: SDL_utf8strlen and SDL_utf8strnlen now use SDL_StepUTF8 internally.

Otherwise, they might find out strings with malformed UTF-8 sequences produce a different amount of codepoints than the count returned here, overflowing buffers that might be allocated based on the results.
2024-04-01 22:43:19 -04:00 · 2024-04-01 22:43:19 -04:00 · 58529c1827
parent 17d4f8d699
commit 58529c1827
1 changed files with 7 additions and 16 deletions
--- a/src/stdlib/SDL_string.c
+++ b/src/stdlib/SDL_string.c
@ -814,30 +814,21 @@ size_t SDL_utf8strlcpy(SDL_OUT_Z_CAP(dst_bytes) char *dst, const char *src, size
 size_t SDL_utf8strlen(const char *str)
 {
    size_t retval = 0;
-    const char *p = str;
+    while (SDL_StepUTF8(&str, 4)) {
-    unsigned char ch;
+        retval++;
    while ((ch = *(p++)) != 0) {
        /* if top two bits are 1 and 0, it's a continuation byte. */
        if ((ch & 0xc0) != 0x80) {
            retval++;
        }
    }
    return retval;
 }
 size_t SDL_utf8strnlen(const char *str, size_t bytes)
 {
    size_t retval = 0;
-    const char *p = str;
+    const char *strstart = str;
    unsigned char ch;
-    while ((ch = *(p++)) != 0 && bytes-- > 0) {
+    while (SDL_StepUTF8(&str, bytes)) {
-        /* if top two bits are 1 and 0, it's a continuation byte. */
+        bytes -= (size_t) (str - strstart);
-        if ((ch & 0xc0) != 0x80) {
+        strstart = str;
-            retval++;
+        retval++;
        }
    }
    return retval;